1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2012 Milan Jurik. All rights reserved.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  * Copyright 2017 Joyent, Inc.
  27  */
  28 
  29 
  30 /*
  31  * Overview of the RSM Kernel Agent:
  32  * ---------------------------------
  33  *
  34  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
  35  * kernel agent is a pseudo device driver which makes use of the RSMPI
  36  * interface on behalf of the RSMAPI user library.
  37  *
  38  * The kernel agent functionality can be categorized into the following
  39  * components:
  40  * 1. Driver Infrastructure
  41  * 2. Export/Import Segment Management
  42  * 3. Internal resource allocation/deallocation
  43  *
  44  * The driver infrastructure includes the basic module loading entry points
  45  * like _init, _info, _fini to load, unload and report information about
  46  * the driver module. The driver infrastructure also includes the
  47  * autoconfiguration entry points namely, attach, detach and getinfo for
  48  * the device autoconfiguration.
  49  *
  50  * The kernel agent is a pseudo character device driver and exports
  51  * a cb_ops structure which defines the driver entry points for character
  52  * device access. This includes the open and close entry points. The
  53  * other entry points provided include ioctl, devmap and segmap and chpoll.
  54  * read and write entry points are not used since the device is memory
  55  * mapped. Also ddi_prop_op is used for the prop_op entry point.
  56  *
  57  * The ioctl entry point supports a number of commands, which are used by
  58  * the RSMAPI library in order to export and import segments. These
  59  * commands include commands for binding and rebinding the physical pages
  60  * allocated to the virtual address range, publishing the export segment,
  61  * unpublishing and republishing an export segment, creating an
  62  * import segment and a virtual connection from this import segment to
  63  * an export segment, performing scatter-gather data transfer, barrier
  64  * operations.
  65  *
  66  *
  67  * Export and Import segments:
  68  * ---------------------------
  69  *
  70  * In order to create an RSM export segment a process allocates a range in its
  71  * virtual address space for the segment using standard Solaris interfaces.
  72  * The process then calls RSMAPI, which in turn makes an ioctl call to the
  73  * RSM kernel agent for an allocation of physical memory pages and for
  74  * creation of the export segment by binding these pages to the virtual
  75  * address range. These pages are locked in memory so that remote accesses
  76  * are always applied to the correct page. Then the RSM segment is published,
  77  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
  78  * is assigned to it.
  79  *
  80  * In order to import a published RSM segment, RSMAPI creates an import
  81  * segment and forms a virtual connection across the interconnect to the
  82  * export segment, via an ioctl into the kernel agent with the connect
  83  * command. The import segment setup is completed by mapping the
  84  * local device memory into the importers virtual address space. The
  85  * mapping of the import segment is handled by the segmap/devmap
  86  * infrastructure described as follows.
  87  *
  88  * Segmap and Devmap interfaces:
  89  *
  90  * The RSM kernel agent allows device memory to be directly accessed by user
  91  * threads via memory mapping. In order to do so, the RSM kernel agent
  92  * supports the devmap and segmap entry points.
  93  *
  94  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
  95  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
  96  * responsible for exporting the device memory to the user applications.
  97  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
  98  * control is transfered to the devmap_setup call which calls rsm_devmap.
  99  *
 100  * rsm_devmap validates the user mapping to the device or kernel memory
 101  * and passes the information to the system for setting up the mapping. The
 102  * actual setting up of the mapping is done by devmap_devmem_setup(for
 103  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
 104  * registered for device context management via the devmap_devmem_setup
 105  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
 106  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
 107  * is created, a mapping is freed, a mapping is accessed or an existing
 108  * mapping is duplicated respectively. These callbacks allow the RSM kernel
 109  * agent to maintain state information associated with the mappings.
 110  * The state information is mainly in the form of a cookie list for the import
 111  * segment for which mapping has been done.
 112  *
 113  * Forced disconnect of import segments:
 114  *
 115  * When an exported segment is unpublished, the exporter sends a forced
 116  * disconnect message to all its importers. The importer segments are
 117  * unloaded and disconnected. This involves unloading the original
 118  * mappings and remapping to a preallocated kernel trash page. This is
 119  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
 120  * preallocated by the kernel agent during attach using ddi_umem_alloc with
 121  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
 122  * due to unloading of the original mappings.
 123  *
 124  * Additionally every segment has a mapping generation number associated
 125  * with it. This is an entry in the barrier generation page, created
 126  * during attach time. This mapping generation number for the import
 127  * segments is incremented on a force disconnect to notify the application
 128  * of the force disconnect. On this notification, the application needs
 129  * to reconnect the segment to establish a new legitimate mapping.
 130  *
 131  *
 132  * Locks used in the kernel agent:
 133  * -------------------------------
 134  *
 135  * The kernel agent uses a variety of mutexes and condition variables for
 136  * mutual exclusion of the shared data structures and for synchronization
 137  * between the various threads. Some of the locks are described as follows.
 138  *
 139  * Each resource structure, which represents either an export/import segment
 140  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
 141  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
 142  * rsmseglock_acquire and rsmseglock_release macros. An additional
 143  * lock called the rsmsi_lock is used for the shared import data structure
 144  * that is relevant for resources representing import segments. There is
 145  * also a condition variable associated with the resource called s_cv. This
 146  * is used to wait for events like the segment state change etc.
 147  *
 148  * The resource structures are allocated from a pool of resource structures,
 149  * called rsm_resource. This pool is protected via a reader-writer lock,
 150  * called rsmrc_lock.
 151  *
 152  * There are two separate hash tables, one for the export segments and
 153  * one for the import segments. The export segments are inserted into the
 154  * export segment hash table only after they have been published and the
 155  * import segments are inserted in the import segments list only after they
 156  * have successfully connected to an exported segment. These tables are
 157  * protected via reader-writer locks.
 158  *
 159  * Debug Support in the kernel agent:
 160  * ----------------------------------
 161  *
 162  * Debugging support in the kernel agent is provided by the following
 163  * macros.
 164  *
 165  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
 166  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
 167  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
 168  * on the definition of the category and level. All messages that belong to
 169  * the specified category(rsmdbg_category) and are of an equal or greater
 170  * severity than the specified level(rsmdbg_level) are logged. The message
 171  * is a string which uses the same formatting rules as the strings used in
 172  * printf.
 173  *
 174  * The category defines which component of the kernel agent has logged this
 175  * message. There are a number of categories that have been defined such as
 176  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
 177  * DBG_ADDCATEGORY is used to add in another category to the currently
 178  * specified category value so that the component using this new category
 179  * can also effectively log debug messages. Thus, the category of a specific
 180  * message is some combination of the available categories and we can define
 181  * sub-categories if we want a finer level of granularity.
 182  *
 183  * The level defines the severity of the message. Different level values are
 184  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
 185  * the least severe(debug level is 0).
 186  *
 187  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
 188  * variable or a string respectively.
 189  *
 190  *
 191  * NOTES:
 192  *
 193  * Special Fork and Exec Handling:
 194  * -------------------------------
 195  *
 196  * The backing physical pages of an exported segment are always locked down.
 197  * Thus, there are two cases in which a process having exported segments
 198  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
 199  * forks and invokes exit before the duped file descriptors for the export
 200  * segments are closed in the child process. The hang is caused because the
 201  * address space release algorithm in Solaris VM subsystem is based on a
 202  * non-blocking loop which does not terminate while segments are locked
 203  * down. In addition to this, Solaris VM subsystem lacks a callback
 204  * mechanism to the rsm kernel agent to allow unlocking these export
 205  * segment pages.
 206  *
 207  * In order to circumvent this problem, the kernel agent does the following.
 208  * The Solaris VM subsystem keeps memory segments in increasing order of
 209  * virtual addressses. Thus a special page(special_exit_offset) is allocated
 210  * by the kernel agent and is mmapped into the heap area of the process address
 211  * space(the mmap is done by the RSMAPI library). During the mmap processing
 212  * of this special page by the devmap infrastructure, a callback(the same
 213  * devmap context management callbacks discussed above) is registered for an
 214  * unmap.
 215  *
 216  * As discussed above, this page is processed by the Solaris address space
 217  * release code before any of the exported segments pages(which are allocated
 218  * from high memory). It is during this processing that the unmap callback gets
 219  * called and this callback is responsible for force destroying the exported
 220  * segments and thus eliminating the problem of locked pages.
 221  *
 222  * Flow-control:
 223  * ------------
 224  *
 225  * A credit based flow control algorithm is used for messages whose
 226  * processing cannot be done in the interrupt context because it might
 227  * involve invoking rsmpi calls, or might take a long time to complete
 228  * or might need to allocate resources. The algorithm operates on a per
 229  * path basis. To send a message the pathend needs to have a credit and
 230  * it consumes one for every message that is flow controlled. On the
 231  * receiving pathend the message is put on a msgbuf_queue and a task is
 232  * dispatched on the worker thread - recv_taskq where it is processed.
 233  * After processing the message, the receiving pathend dequeues the message,
 234  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
 235  * credits to the sender pathend.
 236  *
 237  * RSM_DRTEST:
 238  * -----------
 239  *
 240  * This is used to enable the DR testing using a test driver on test
 241  * platforms which do not supported DR.
 242  *
 243  */
 244 
 245 #include <sys/types.h>
 246 #include <sys/param.h>
 247 #include <sys/user.h>
 248 #include <sys/buf.h>
 249 #include <sys/systm.h>
 250 #include <sys/cred.h>
 251 #include <sys/vm.h>
 252 #include <sys/uio.h>
 253 #include <vm/seg.h>
 254 #include <vm/page.h>
 255 #include <sys/stat.h>
 256 
 257 #include <sys/time.h>
 258 #include <sys/errno.h>
 259 
 260 #include <sys/file.h>
 261 #include <sys/uio.h>
 262 #include <sys/proc.h>
 263 #include <sys/mman.h>
 264 #include <sys/open.h>
 265 #include <sys/atomic.h>
 266 #include <sys/mem_config.h>
 267 
 268 
 269 #include <sys/ddi.h>
 270 #include <sys/devops.h>
 271 #include <sys/ddidevmap.h>
 272 #include <sys/sunddi.h>
 273 #include <sys/esunddi.h>
 274 #include <sys/ddi_impldefs.h>
 275 
 276 #include <sys/kmem.h>
 277 #include <sys/conf.h>
 278 #include <sys/devops.h>
 279 #include <sys/ddi_impldefs.h>
 280 
 281 #include <sys/modctl.h>
 282 
 283 #include <sys/policy.h>
 284 #include <sys/types.h>
 285 #include <sys/conf.h>
 286 #include <sys/param.h>
 287 
 288 #include <sys/taskq.h>
 289 
 290 #include <sys/rsm/rsm_common.h>
 291 #include <sys/rsm/rsmapi_common.h>
 292 #include <sys/rsm/rsm.h>
 293 #include <rsm_in.h>
 294 #include <sys/rsm/rsmka_path_int.h>
 295 #include <sys/rsm/rsmpi.h>
 296 
 297 #include <sys/modctl.h>
 298 #include <sys/debug.h>
 299 
 300 #include <sys/tuneable.h>
 301 
 302 #ifdef  RSM_DRTEST
 303 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
 304                 void *arg);
 305 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
 306                 void *arg);
 307 #endif
 308 
 309 extern void dbg_printf(int category, int level, char *fmt, ...);
 310 extern void rsmka_pathmanager_init();
 311 extern void rsmka_pathmanager_cleanup();
 312 extern void rele_sendq_token(sendq_token_t *);
 313 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
 314 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
 315 extern int rsmka_topology_ioctl(caddr_t, int, int);
 316 
 317 extern pri_t maxclsyspri;
 318 extern work_queue_t work_queue;
 319 extern kmutex_t ipc_info_lock;
 320 extern kmutex_t ipc_info_cvlock;
 321 extern kcondvar_t ipc_info_cv;
 322 extern kmutex_t path_hold_cvlock;
 323 extern kcondvar_t path_hold_cv;
 324 
 325 extern kmutex_t rsmka_buf_lock;
 326 
 327 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
 328 extern adapter_t *rsmka_lookup_adapter(char *, int);
 329 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
 330 extern boolean_t rsmka_do_path_active(path_t *, int);
 331 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
 332 extern void rsmka_release_adapter(adapter_t *);
 333 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
 334 extern void rsmka_dequeue_msgbuf(path_t *path);
 335 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
 336 /* lint -w2 */
 337 
 338 static int rsm_open(dev_t *, int, int, cred_t *);
 339 static int rsm_close(dev_t, int, int, cred_t *);
 340 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
 341     cred_t *credp, int *rvalp);
 342 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
 343     uint_t);
 344 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
 345     uint_t, uint_t, cred_t *);
 346 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 347     struct pollhead **phpp);
 348 
 349 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 350 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
 351 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
 352 
 353 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
 354 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
 355 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
 356 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
 357                                 rsm_permission_t);
 358 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
 359 static void rsmacl_free(rsmapi_access_entry_t *, int);
 360 static void rsmpiacl_free(rsm_access_entry_t *, int);
 361 
 362 static int rsm_inc_pgcnt(pgcnt_t);
 363 static void rsm_dec_pgcnt(pgcnt_t);
 364 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
 365 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
 366                                         size_t *);
 367 static void exporter_quiesce();
 368 static void rsmseg_suspend(rsmseg_t *, int *);
 369 static void rsmsegshare_suspend(rsmseg_t *);
 370 static int rsmseg_resume(rsmseg_t *, void **);
 371 static int rsmsegshare_resume(rsmseg_t *);
 372 
 373 static struct cb_ops rsm_cb_ops = {
 374         rsm_open,               /* open */
 375         rsm_close,              /* close */
 376         nodev,                  /* strategy */
 377         nodev,                  /* print */
 378         nodev,                  /* dump */
 379         nodev,                  /* read */
 380         nodev,                  /* write */
 381         rsm_ioctl,              /* ioctl */
 382         rsm_devmap,             /* devmap */
 383         NULL,                   /* mmap */
 384         rsm_segmap,             /* segmap */
 385         rsm_chpoll,             /* poll */
 386         ddi_prop_op,            /* cb_prop_op */
 387         0,                      /* streamtab  */
 388         D_NEW|D_MP|D_DEVMAP,    /* Driver compatibility flag */
 389         0,
 390         0,
 391         0
 392 };
 393 
 394 static struct dev_ops rsm_ops = {
 395         DEVO_REV,               /* devo_rev, */
 396         0,                      /* refcnt  */
 397         rsm_info,               /* get_dev_info */
 398         nulldev,                /* identify */
 399         nulldev,                /* probe */
 400         rsm_attach,             /* attach */
 401         rsm_detach,             /* detach */
 402         nodev,                  /* reset */
 403         &rsm_cb_ops,                /* driver operations */
 404         (struct bus_ops *)0,    /* bus operations */
 405         0,
 406         ddi_quiesce_not_needed,         /* quiesce */
 407 };
 408 
 409 /*
 410  * Module linkage information for the kernel.
 411  */
 412 
 413 static struct modldrv modldrv = {
 414         &mod_driverops, /* Type of module.  This one is a pseudo driver */
 415         "Remote Shared Memory Driver",
 416         &rsm_ops,   /* driver ops */
 417 };
 418 
 419 static struct modlinkage modlinkage = {
 420         MODREV_1,
 421         (void *)&modldrv,
 422         0,
 423         0,
 424         0
 425 };
 426 
 427 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
 428 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
 429 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
 430 
 431 static kphysm_setup_vector_t rsm_dr_callback_vec = {
 432         KPHYSM_SETUP_VECTOR_VERSION,
 433         rsm_dr_callback_post_add,
 434         rsm_dr_callback_pre_del,
 435         rsm_dr_callback_post_del
 436 };
 437 
 438 /* This flag can be changed to 0 to help with PIT testing */
 439 int rsmka_modunloadok = 1;
 440 int no_reply_cnt = 0;
 441 
 442 uint64_t rsm_ctrlmsg_errcnt = 0;
 443 uint64_t rsm_ipcsend_errcnt = 0;
 444 
 445 #define MAX_NODES 64
 446 
 447 static struct rsm_driver_data rsm_drv_data;
 448 static struct rsmresource_table rsm_resource;
 449 
 450 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
 451 static void rsmresource_destroy(void);
 452 static int rsmresource_alloc(minor_t *);
 453 static rsmresource_t *rsmresource_free(minor_t rnum);
 454 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
 455 static int rsm_unpublish(rsmseg_t *seg, int mode);
 456 static int rsm_unbind(rsmseg_t *seg);
 457 static uint_t rsmhash(rsm_memseg_id_t key);
 458 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
 459 static void rsmhash_free(rsmhash_table_t *rhash, int size);
 460 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
 461 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
 462 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
 463                                         void *cookie);
 464 int rsm_disconnect(rsmseg_t *seg);
 465 void rsmseg_unload(rsmseg_t *);
 466 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
 467 
 468 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
 469     rsm_intr_q_op_t opcode, rsm_addr_t src,
 470     void *data, size_t size, rsm_intr_hand_arg_t arg);
 471 
 472 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
 473 
 474 rsm_node_id_t my_nodeid;
 475 
 476 /* cookie, va, offsets and length for the barrier */
 477 static rsm_gnum_t               *bar_va;
 478 static ddi_umem_cookie_t        bar_cookie;
 479 static off_t                    barrier_offset;
 480 static size_t                   barrier_size;
 481 static int                      max_segs;
 482 
 483 /* cookie for the trash memory */
 484 static ddi_umem_cookie_t        remap_cookie;
 485 
 486 static rsm_memseg_id_t  rsm_nextavail_segmentid;
 487 
 488 extern taskq_t *work_taskq;
 489 extern char *taskq_name;
 490 
 491 static dev_info_t *rsm_dip;     /* private copy of devinfo pointer */
 492 
 493 static rsmhash_table_t rsm_export_segs;         /* list of exported segs */
 494 rsmhash_table_t rsm_import_segs;                /* list of imported segs */
 495 static rsmhash_table_t rsm_event_queues;        /* list of event queues */
 496 
 497 static  rsm_ipc_t       rsm_ipc;                /* ipc info */
 498 
 499 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
 500 static list_head_t      rsm_suspend_list;
 501 
 502 /* list of descriptors for remote importers */
 503 static importers_table_t importer_list;
 504 
 505 kmutex_t rsm_suspend_cvlock;
 506 kcondvar_t rsm_suspend_cv;
 507 
 508 static kmutex_t rsm_lock;
 509 
 510 adapter_t loopback_adapter;
 511 rsm_controller_attr_t loopback_attr;
 512 
 513 int rsmipc_send_controlmsg(path_t *path, int msgtype);
 514 
 515 void rsmka_init_loopback();
 516 
 517 int rsmka_null_seg_create(
 518     rsm_controller_handle_t,
 519     rsm_memseg_export_handle_t *,
 520     size_t,
 521     uint_t,
 522     rsm_memory_local_t *,
 523     rsm_resource_callback_t,
 524     rsm_resource_callback_arg_t);
 525 
 526 int rsmka_null_seg_destroy(
 527     rsm_memseg_export_handle_t);
 528 
 529 int rsmka_null_bind(
 530     rsm_memseg_export_handle_t,
 531     off_t,
 532     rsm_memory_local_t *,
 533     rsm_resource_callback_t,
 534     rsm_resource_callback_arg_t);
 535 
 536 int rsmka_null_unbind(
 537     rsm_memseg_export_handle_t,
 538     off_t,
 539     size_t);
 540 
 541 int rsmka_null_rebind(
 542     rsm_memseg_export_handle_t,
 543     off_t,
 544     rsm_memory_local_t *,
 545     rsm_resource_callback_t,
 546     rsm_resource_callback_arg_t);
 547 
 548 int rsmka_null_publish(
 549     rsm_memseg_export_handle_t,
 550     rsm_access_entry_t [],
 551     uint_t,
 552     rsm_memseg_id_t,
 553     rsm_resource_callback_t,
 554     rsm_resource_callback_arg_t);
 555 
 556 
 557 int rsmka_null_republish(
 558     rsm_memseg_export_handle_t,
 559     rsm_access_entry_t [],
 560     uint_t,
 561     rsm_resource_callback_t,
 562     rsm_resource_callback_arg_t);
 563 
 564 int rsmka_null_unpublish(
 565     rsm_memseg_export_handle_t);
 566 
 567 rsm_ops_t null_rsmpi_ops;
 568 
 569 /*
 570  * data and locks to keep track of total amount of exported memory
 571  */
 572 static  pgcnt_t         rsm_pgcnt;
 573 static  pgcnt_t         rsm_pgcnt_max;  /* max allowed */
 574 static  kmutex_t        rsm_pgcnt_lock;
 575 
 576 static  int             rsm_enable_dr;
 577 
 578 static  char            loopback_str[] = "loopback";
 579 
 580 int             rsm_hash_size;
 581 
 582 /*
 583  * The locking model is as follows:
 584  *
 585  * Local operations:
 586  *              find resource - grab reader lock on resouce list
 587  *              insert rc     - grab writer lock
 588  *              delete rc     - grab writer lock and resource mutex
 589  *              read/write    - no lock
 590  *
 591  * Remote invocations:
 592  *              find resource - grab read lock and resource mutex
 593  *
 594  * State:
 595  *              resource state - grab resource mutex
 596  */
 597 
 598 int
 599 _init(void)
 600 {
 601         int e;
 602 
 603         e = mod_install(&modlinkage);
 604         if (e != 0) {
 605                 return (e);
 606         }
 607 
 608         mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
 609 
 610         mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
 611 
 612 
 613         rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
 614 
 615         rsm_hash_size = RSM_HASHSZ;
 616 
 617         rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 618 
 619         rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 620 
 621         mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
 622 
 623         mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
 624         cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
 625 
 626         mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
 627         cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
 628 
 629         mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
 630         cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
 631 
 632         rsm_ipc.count = RSMIPC_SZ;
 633         rsm_ipc.wanted = 0;
 634         rsm_ipc.sequence = 0;
 635 
 636         (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
 637 
 638         for (e = 0; e < RSMIPC_SZ; e++) {
 639                 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
 640 
 641                 RSMIPC_SET(slot, RSMIPC_FREE);
 642                 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
 643                 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
 644         }
 645 
 646         /*
 647          * Initialize the suspend message list
 648          */
 649         rsm_suspend_list.list_head = NULL;
 650         mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
 651 
 652         /*
 653          * It is assumed here that configuration data is available
 654          * during system boot since _init may be called at that time.
 655          */
 656 
 657         rsmka_pathmanager_init();
 658 
 659         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 660             "rsm: _init done\n"));
 661 
 662         return (DDI_SUCCESS);
 663 
 664 }
 665 
 666 int
 667 _info(struct modinfo *modinfop)
 668 {
 669 
 670         return (mod_info(&modlinkage, modinfop));
 671 }
 672 
 673 int
 674 _fini(void)
 675 {
 676         int e;
 677 
 678         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 679             "rsm: _fini enter\n"));
 680 
 681         /*
 682          * The rsmka_modunloadok flag is simply used to help with
 683          * the PIT testing. Make this flag 0 to disallow modunload.
 684          */
 685         if (rsmka_modunloadok == 0)
 686                 return (EBUSY);
 687 
 688         /* rsm_detach will be called as a result of mod_remove */
 689         e = mod_remove(&modlinkage);
 690         if (e) {
 691                 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
 692                     "Unable to fini RSM %x\n", e));
 693                 return (e);
 694         }
 695 
 696         rsmka_pathmanager_cleanup();
 697 
 698         rw_destroy(&rsm_resource.rsmrc_lock);
 699 
 700         rw_destroy(&rsm_export_segs.rsmhash_rw);
 701         rw_destroy(&rsm_import_segs.rsmhash_rw);
 702         rw_destroy(&rsm_event_queues.rsmhash_rw);
 703 
 704         mutex_destroy(&importer_list.lock);
 705 
 706         mutex_destroy(&rsm_ipc.lock);
 707         cv_destroy(&rsm_ipc.cv);
 708 
 709         (void) mutex_destroy(&rsm_suspend_list.list_lock);
 710 
 711         (void) mutex_destroy(&rsm_pgcnt_lock);
 712 
 713         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
 714 
 715         return (DDI_SUCCESS);
 716 
 717 }
 718 
 719 /*ARGSUSED1*/
 720 static int
 721 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 722 {
 723         minor_t rnum;
 724         int     percent;
 725         int     ret;
 726         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 727 
 728         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
 729 
 730         switch (cmd) {
 731         case DDI_ATTACH:
 732                 break;
 733         case DDI_RESUME:
 734         default:
 735                 DBG_PRINTF((category, RSM_ERR,
 736                     "rsm:rsm_attach - cmd not supported\n"));
 737                 return (DDI_FAILURE);
 738         }
 739 
 740         if (rsm_dip != NULL) {
 741                 DBG_PRINTF((category, RSM_ERR,
 742                     "rsm:rsm_attach - supports only "
 743                     "one instance\n"));
 744                 return (DDI_FAILURE);
 745         }
 746 
 747         rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 748             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 749             "enable-dynamic-reconfiguration", 1);
 750 
 751         mutex_enter(&rsm_drv_data.drv_lock);
 752         rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
 753         mutex_exit(&rsm_drv_data.drv_lock);
 754 
 755         if (rsm_enable_dr) {
 756 #ifdef  RSM_DRTEST
 757                 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
 758                     (void *)NULL);
 759 #else
 760                 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
 761                     (void *)NULL);
 762 #endif
 763                 if (ret != 0) {
 764                         mutex_exit(&rsm_drv_data.drv_lock);
 765                         cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
 766                             "reconfiguration setup failed\n");
 767                         return (DDI_FAILURE);
 768                 }
 769         }
 770 
 771         mutex_enter(&rsm_drv_data.drv_lock);
 772         ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
 773         rsm_drv_data.drv_state = RSM_DRV_OK;
 774         cv_broadcast(&rsm_drv_data.drv_cv);
 775         mutex_exit(&rsm_drv_data.drv_lock);
 776 
 777         /*
 778          * page_list_read_lock();
 779          * xx_setup();
 780          * page_list_read_unlock();
 781          */
 782 
 783         rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 784             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 785             "segment-hashtable-size", RSM_HASHSZ);
 786         if (rsm_hash_size == 0) {
 787                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 788                     "rsm: segment-hashtable-size in rsm.conf "
 789                     "must be greater than 0, defaulting to 128\n"));
 790                 rsm_hash_size = RSM_HASHSZ;
 791         }
 792 
 793         DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
 794             rsm_hash_size));
 795 
 796         rsm_pgcnt = 0;
 797 
 798         percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 799             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 800             "max-exported-memory", 0);
 801         if (percent < 0) {
 802                 DBG_PRINTF((category, RSM_ERR,
 803                     "rsm:rsm_attach not enough memory available to "
 804                     "export, or max-exported-memory set incorrectly.\n"));
 805                 return (DDI_FAILURE);
 806         }
 807         /* 0 indicates no fixed upper limit. maxmem is the max  */
 808         /* available pageable physical mem                      */
 809         rsm_pgcnt_max = (percent*maxmem)/100;
 810 
 811         if (rsm_pgcnt_max > 0) {
 812                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 813                     "rsm: Available physical memory = %lu pages, "
 814                     "Max exportable memory = %lu pages",
 815                     maxmem, rsm_pgcnt_max));
 816         }
 817 
 818         /*
 819          * Create minor number
 820          */
 821         if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
 822                 DBG_PRINTF((category, RSM_ERR,
 823                     "rsm: rsm_attach - Unable to get "
 824                     "minor number\n"));
 825                 return (DDI_FAILURE);
 826         }
 827 
 828         ASSERT(rnum == RSM_DRIVER_MINOR);
 829 
 830         if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
 831             rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 832                 DBG_PRINTF((category, RSM_ERR,
 833                     "rsm: rsm_attach - unable to allocate "
 834                     "minor #\n"));
 835                 return (DDI_FAILURE);
 836         }
 837 
 838         rsm_dip = devi;
 839         /*
 840          * Allocate the hashtables
 841          */
 842         rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
 843         rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
 844 
 845         importer_list.bucket = (importing_token_t **)
 846             kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
 847 
 848         /*
 849          * Allocate a resource struct
 850          */
 851         {
 852                 rsmresource_t *p;
 853 
 854                 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
 855 
 856                 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
 857 
 858                 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
 859         }
 860 
 861         /*
 862          * Based on the rsm.conf property max-segments, determine the maximum
 863          * number of segments that can be exported/imported. This is then used
 864          * to determine the size for barrier failure pages.
 865          */
 866 
 867         /* First get the max number of segments from the rsm.conf file */
 868         max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 869             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 870             "max-segments", 0);
 871         if (max_segs == 0) {
 872                 /* Use default number of segments */
 873                 max_segs = RSM_MAX_NUM_SEG;
 874         }
 875 
 876         /*
 877          * Based on the max number of segments allowed, determine the barrier
 878          * page size. add 1 to max_segs since the barrier page itself uses
 879          * a slot
 880          */
 881         barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
 882             PAGESIZE);
 883 
 884         /*
 885          * allocation of the barrier failure page
 886          */
 887         bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
 888             DDI_UMEM_SLEEP, &bar_cookie);
 889 
 890         /*
 891          * Set the barrier_offset
 892          */
 893         barrier_offset = 0;
 894 
 895         /*
 896          * Allocate a trash memory and get a cookie for it. This will be used
 897          * when remapping segments during force disconnects. Allocate the
 898          * trash memory with a large size which is page aligned.
 899          */
 900         (void) ddi_umem_alloc((size_t)TRASHSIZE,
 901             DDI_UMEM_TRASH, &remap_cookie);
 902 
 903         /* initialize user segment id allocation variable */
 904         rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
 905 
 906         /*
 907          * initialize the null_rsmpi_ops vector and the loopback adapter
 908          */
 909         rsmka_init_loopback();
 910 
 911 
 912         ddi_report_dev(devi);
 913 
 914         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
 915 
 916         return (DDI_SUCCESS);
 917 }
 918 
 919 /*
 920  * The call to mod_remove in the _fine routine will cause the system
 921  * to call rsm_detach
 922  */
 923 /*ARGSUSED*/
 924 static int
 925 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 926 {
 927         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 928 
 929         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
 930 
 931         switch (cmd) {
 932         case DDI_DETACH:
 933                 break;
 934         default:
 935                 DBG_PRINTF((category, RSM_ERR,
 936                     "rsm:rsm_detach - cmd %x not supported\n",
 937                     cmd));
 938                 return (DDI_FAILURE);
 939         }
 940 
 941         mutex_enter(&rsm_drv_data.drv_lock);
 942         while (rsm_drv_data.drv_state != RSM_DRV_OK)
 943                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
 944         rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
 945         mutex_exit(&rsm_drv_data.drv_lock);
 946 
 947         /*
 948          * Unregister the DR callback functions
 949          */
 950         if (rsm_enable_dr) {
 951 #ifdef  RSM_DRTEST
 952                 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 953                     (void *)NULL);
 954 #else
 955                 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 956                     (void *)NULL);
 957 #endif
 958         }
 959 
 960         mutex_enter(&rsm_drv_data.drv_lock);
 961         ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
 962         rsm_drv_data.drv_state = RSM_DRV_NEW;
 963         mutex_exit(&rsm_drv_data.drv_lock);
 964 
 965         ASSERT(rsm_suspend_list.list_head == NULL);
 966 
 967         /*
 968          * Release all resources, seglist, controller, ...
 969          */
 970 
 971         /* remove intersend queues */
 972         /* remove registered services */
 973 
 974 
 975         ddi_remove_minor_node(dip, DRIVER_NAME);
 976         rsm_dip = NULL;
 977 
 978         /*
 979          * Free minor zero resource
 980          */
 981         {
 982                 rsmresource_t *p;
 983 
 984                 p = rsmresource_free(RSM_DRIVER_MINOR);
 985                 if (p) {
 986                         mutex_destroy(&p->rsmrc_lock);
 987                         kmem_free((void *)p, sizeof (*p));
 988                 }
 989         }
 990 
 991         /*
 992          * Free resource table
 993          */
 994 
 995         rsmresource_destroy();
 996 
 997         /*
 998          * Free the hash tables
 999          */
1000         rsmhash_free(&rsm_export_segs, rsm_hash_size);
1001         rsmhash_free(&rsm_import_segs, rsm_hash_size);
1002 
1003         kmem_free((void *)importer_list.bucket,
1004             rsm_hash_size * sizeof (importing_token_t *));
1005         importer_list.bucket = NULL;
1006 
1007 
1008         /* free barrier page */
1009         if (bar_cookie != NULL) {
1010                 ddi_umem_free(bar_cookie);
1011         }
1012         bar_va = NULL;
1013         bar_cookie = NULL;
1014 
1015         /*
1016          * Free the memory allocated for the trash
1017          */
1018         if (remap_cookie != NULL) {
1019                 ddi_umem_free(remap_cookie);
1020         }
1021         remap_cookie = NULL;
1022 
1023         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1024 
1025         return (DDI_SUCCESS);
1026 }
1027 
1028 /*ARGSUSED*/
1029 static int
1030 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1031 {
1032         register int error;
1033         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1034 
1035         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1036 
1037         switch (infocmd) {
1038         case DDI_INFO_DEVT2DEVINFO:
1039                 if (rsm_dip == NULL)
1040                         error = DDI_FAILURE;
1041                 else {
1042                         *result = (void *)rsm_dip;
1043                         error = DDI_SUCCESS;
1044                 }
1045                 break;
1046         case DDI_INFO_DEVT2INSTANCE:
1047                 *result = (void *)0;
1048                 error = DDI_SUCCESS;
1049                 break;
1050         default:
1051                 error = DDI_FAILURE;
1052         }
1053 
1054         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1055         return (error);
1056 }
1057 
1058 adapter_t *
1059 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1060 {
1061         adapter_t *adapter;
1062         char adapter_devname[MAXNAMELEN];
1063         int instance;
1064         DBG_DEFINE(category,
1065             RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1066 
1067         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1068 
1069         instance = msg->cnum;
1070 
1071         if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1072                 return (NULL);
1073         }
1074 
1075         if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1076                 return (NULL);
1077 
1078         if (strcmp(adapter_devname, "loopback") == 0)
1079                 return (&loopback_adapter);
1080 
1081         adapter = rsmka_lookup_adapter(adapter_devname, instance);
1082 
1083         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1084 
1085         return (adapter);
1086 }
1087 
1088 
1089 /*
1090  * *********************** Resource Number Management ********************
1091  * All resources are stored in a simple hash table. The table is an array
1092  * of pointers to resource blks. Each blk contains:
1093  *      base    - base number of this blk
1094  *      used    - number of used slots in this blk.
1095  *      blks    - array of pointers to resource items.
1096  * An entry in a resource blk is empty if it's NULL.
1097  *
1098  * We start with no resource array. Each time we run out of slots, we
1099  * reallocate a new larger array and copy the pointer to the new array and
1100  * a new resource blk is allocated and added to the hash table.
1101  *
1102  * The resource control block contains:
1103  *      root    - array of pointer of resource blks
1104  *      sz      - current size of array.
1105  *      len     - last valid entry in array.
1106  *
1107  * A search operation based on a resource number is as follows:
1108  *      index = rnum / RESOURCE_BLKSZ;
1109  *      ASSERT(index < resource_block.len);
1110  *      ASSERT(index < resource_block.sz);
1111  *      offset = rnum % RESOURCE_BLKSZ;
1112  *      ASSERT(offset >= resource_block.root[index]->base);
1113  *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1114  *      return resource_block.root[index]->blks[offset];
1115  *
1116  * A resource blk is freed with its used count reachs zero.
1117  */
1118 static int
1119 rsmresource_alloc(minor_t *rnum)
1120 {
1121 
1122         /* search for available resource slot */
1123         int i, j, empty = -1;
1124         rsmresource_blk_t *blk;
1125 
1126         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1127             "rsmresource_alloc enter\n"));
1128 
1129         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1130 
1131         /* Try to find an empty slot */
1132         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1133                 blk = rsm_resource.rsmrc_root[i];
1134                 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1135                         /* found an empty slot in this blk */
1136                         for (j = 0; j < RSMRC_BLKSZ; j++) {
1137                                 if (blk->rsmrcblk_blks[j] == NULL) {
1138                                         *rnum = (minor_t)
1139                                             (j + (i * RSMRC_BLKSZ));
1140                                         /*
1141                                          * obey gen page limits
1142                                          */
1143                                         if (*rnum >= max_segs + 1) {
1144                                                 if (empty < 0) {
1145                                                         rw_exit(&rsm_resource.
1146                                                             rsmrc_lock);
1147                                                         DBG_PRINTF((
1148                                                             RSM_KERNEL_ALL,
1149                                                             RSM_ERR,
1150                                                             "rsmresource"
1151                                                             "_alloc failed:"
1152                                                             "not enough res"
1153                                                             "%d\n", *rnum));
1154                                         return (RSMERR_INSUFFICIENT_RESOURCES);
1155                                                 } else {
1156                                                         /* use empty slot */
1157                                                         break;
1158                                                 }
1159 
1160                                         }
1161 
1162                                         blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1163                                         blk->rsmrcblk_avail--;
1164                                         rw_exit(&rsm_resource.rsmrc_lock);
1165                                         DBG_PRINTF((RSM_KERNEL_ALL,
1166                                             RSM_DEBUG_VERBOSE,
1167                                             "rsmresource_alloc done\n"));
1168                                         return (RSM_SUCCESS);
1169                                 }
1170                         }
1171                 } else if (blk == NULL && empty < 0) {
1172                         /* remember first empty slot */
1173                         empty = i;
1174                 }
1175         }
1176 
1177         /* Couldn't find anything, allocate a new blk */
1178         /*
1179          * Do we need to reallocate the root array
1180          */
1181         if (empty < 0) {
1182                 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1183                         /*
1184                          * Allocate new array and copy current stuff into it
1185                          */
1186                         rsmresource_blk_t       **p;
1187                         uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1188                             RSMRC_BLKSZ;
1189                         /*
1190                          * Don't allocate more that max valid rnum
1191                          */
1192                         if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1193                             max_segs + 1) {
1194                                 rw_exit(&rsm_resource.rsmrc_lock);
1195                                 return (RSMERR_INSUFFICIENT_RESOURCES);
1196                         }
1197 
1198                         p = (rsmresource_blk_t **)kmem_zalloc(
1199                             newsz * sizeof (*p),
1200                             KM_SLEEP);
1201 
1202                         if (rsm_resource.rsmrc_root) {
1203                                 uint_t oldsz;
1204 
1205                                 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1206                                     (int)sizeof (*p));
1207 
1208                                 /*
1209                                  * Copy old data into new space and
1210                                  * free old stuff
1211                                  */
1212                                 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1213                                 kmem_free(rsm_resource.rsmrc_root, oldsz);
1214                         }
1215 
1216                         rsm_resource.rsmrc_root = p;
1217                         rsm_resource.rsmrc_sz = (int)newsz;
1218                 }
1219 
1220                 empty = rsm_resource.rsmrc_len;
1221                 rsm_resource.rsmrc_len++;
1222         }
1223 
1224         /*
1225          * Allocate a new blk
1226          */
1227         blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1228         ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1229         rsm_resource.rsmrc_root[empty] = blk;
1230         blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1231 
1232         /*
1233          * Allocate slot
1234          */
1235 
1236         *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1237 
1238         /*
1239          * watch out not to exceed bounds of barrier page
1240          */
1241         if (*rnum >= max_segs + 1) {
1242                 rw_exit(&rsm_resource.rsmrc_lock);
1243                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1244                     "rsmresource_alloc failed %d\n", *rnum));
1245 
1246                 return (RSMERR_INSUFFICIENT_RESOURCES);
1247         }
1248         blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1249 
1250 
1251         rw_exit(&rsm_resource.rsmrc_lock);
1252 
1253         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1254             "rsmresource_alloc done\n"));
1255 
1256         return (RSM_SUCCESS);
1257 }
1258 
1259 static rsmresource_t *
1260 rsmresource_free(minor_t rnum)
1261 {
1262 
1263         /* search for available resource slot */
1264         int i, j;
1265         rsmresource_blk_t *blk;
1266         rsmresource_t *p;
1267 
1268         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1269             "rsmresource_free enter\n"));
1270 
1271         i = (int)(rnum / RSMRC_BLKSZ);
1272         j = (int)(rnum % RSMRC_BLKSZ);
1273 
1274         if (i >= rsm_resource.rsmrc_len) {
1275                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1276                     "rsmresource_free done\n"));
1277                 return (NULL);
1278         }
1279 
1280         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1281 
1282         ASSERT(rsm_resource.rsmrc_root);
1283         ASSERT(i < rsm_resource.rsmrc_len);
1284         ASSERT(i < rsm_resource.rsmrc_sz);
1285         blk = rsm_resource.rsmrc_root[i];
1286         if (blk == NULL) {
1287                 rw_exit(&rsm_resource.rsmrc_lock);
1288                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1289                     "rsmresource_free done\n"));
1290                 return (NULL);
1291         }
1292 
1293         ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1294 
1295         p = blk->rsmrcblk_blks[j];
1296         if (p == RSMRC_RESERVED) {
1297                 p = NULL;
1298         }
1299 
1300         blk->rsmrcblk_blks[j] = NULL;
1301         blk->rsmrcblk_avail++;
1302         if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1303                 /* free this blk */
1304                 kmem_free(blk, sizeof (*blk));
1305                 rsm_resource.rsmrc_root[i] = NULL;
1306         }
1307 
1308         rw_exit(&rsm_resource.rsmrc_lock);
1309 
1310         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1311             "rsmresource_free done\n"));
1312 
1313         return (p);
1314 }
1315 
1316 static rsmresource_t *
1317 rsmresource_lookup(minor_t rnum, int lock)
1318 {
1319         int i, j;
1320         rsmresource_blk_t *blk;
1321         rsmresource_t *p;
1322 
1323         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1324             "rsmresource_lookup enter\n"));
1325 
1326         /* Find resource and lock it in READER mode */
1327         /* search for available resource slot */
1328 
1329         i = (int)(rnum / RSMRC_BLKSZ);
1330         j = (int)(rnum % RSMRC_BLKSZ);
1331 
1332         if (i >= rsm_resource.rsmrc_len) {
1333                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1334                     "rsmresource_lookup done\n"));
1335                 return (NULL);
1336         }
1337 
1338         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1339 
1340         blk = rsm_resource.rsmrc_root[i];
1341         if (blk != NULL) {
1342                 ASSERT(i < rsm_resource.rsmrc_len);
1343                 ASSERT(i < rsm_resource.rsmrc_sz);
1344 
1345                 p = blk->rsmrcblk_blks[j];
1346                 if (lock == RSM_LOCK) {
1347                         if (p != RSMRC_RESERVED) {
1348                                 mutex_enter(&p->rsmrc_lock);
1349                         } else {
1350                                 p = NULL;
1351                         }
1352                 }
1353         } else {
1354                 p = NULL;
1355         }
1356         rw_exit(&rsm_resource.rsmrc_lock);
1357 
1358         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1359             "rsmresource_lookup done\n"));
1360 
1361         return (p);
1362 }
1363 
1364 static void
1365 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1366 {
1367         /* Find resource and lock it in READER mode */
1368         /* Caller can upgrade if need be */
1369         /* search for available resource slot */
1370         int i, j;
1371         rsmresource_blk_t *blk;
1372 
1373         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1374             "rsmresource_insert enter\n"));
1375 
1376         i = (int)(rnum / RSMRC_BLKSZ);
1377         j = (int)(rnum % RSMRC_BLKSZ);
1378 
1379         p->rsmrc_type = type;
1380         p->rsmrc_num = rnum;
1381 
1382         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1383 
1384         ASSERT(rsm_resource.rsmrc_root);
1385         ASSERT(i < rsm_resource.rsmrc_len);
1386         ASSERT(i < rsm_resource.rsmrc_sz);
1387 
1388         blk = rsm_resource.rsmrc_root[i];
1389         ASSERT(blk);
1390 
1391         ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1392 
1393         blk->rsmrcblk_blks[j] = p;
1394 
1395         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1396             "rsmresource_insert done\n"));
1397 
1398         rw_exit(&rsm_resource.rsmrc_lock);
1399 }
1400 
1401 static void
1402 rsmresource_destroy()
1403 {
1404         int i, j;
1405 
1406         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1407             "rsmresource_destroy enter\n"));
1408 
1409         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1410 
1411         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1412                 rsmresource_blk_t       *blk;
1413 
1414                 blk = rsm_resource.rsmrc_root[i];
1415                 if (blk == NULL) {
1416                         continue;
1417                 }
1418                 for (j = 0; j < RSMRC_BLKSZ; j++) {
1419                         if (blk->rsmrcblk_blks[j] != NULL) {
1420                                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1421                                     "Not null slot %d, %lx\n", j,
1422                                     (size_t)blk->rsmrcblk_blks[j]));
1423                         }
1424                 }
1425                 kmem_free(blk, sizeof (*blk));
1426                 rsm_resource.rsmrc_root[i] = NULL;
1427         }
1428         if (rsm_resource.rsmrc_root) {
1429                 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1430                 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1431                 rsm_resource.rsmrc_root = NULL;
1432                 rsm_resource.rsmrc_len = 0;
1433                 rsm_resource.rsmrc_sz = 0;
1434         }
1435 
1436         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1437             "rsmresource_destroy done\n"));
1438 
1439         rw_exit(&rsm_resource.rsmrc_lock);
1440 }
1441 
1442 
1443 /* ******************** Generic Key Hash Table Management ********* */
1444 static rsmresource_t *
1445 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1446     rsm_resource_state_t state)
1447 {
1448         rsmresource_t   *p;
1449         uint_t          hashval;
1450         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1451 
1452         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1453 
1454         hashval = rsmhash(key);
1455 
1456         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1457             key, hashval));
1458 
1459         rw_enter(&rhash->rsmhash_rw, RW_READER);
1460 
1461         p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1462 
1463         for (; p; p = p->rsmrc_next) {
1464                 if (p->rsmrc_key == key) {
1465                         /* acquire resource lock */
1466                         RSMRC_LOCK(p);
1467                         break;
1468                 }
1469         }
1470 
1471         rw_exit(&rhash->rsmhash_rw);
1472 
1473         if (p != NULL && p->rsmrc_state != state) {
1474                 /* state changed, release lock and return null */
1475                 RSMRC_UNLOCK(p);
1476                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1477                     "rsmhash_lookup done: state changed\n"));
1478                 return (NULL);
1479         }
1480 
1481         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1482 
1483         return (p);
1484 }
1485 
1486 static void
1487 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1488 {
1489         rsmresource_t           *p, **back;
1490         uint_t                  hashval;
1491         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1492 
1493         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1494 
1495         hashval = rsmhash(rcelm->rsmrc_key);
1496 
1497         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1498             rcelm->rsmrc_key, hashval));
1499 
1500         /*
1501          * It's ok not to find the segment.
1502          */
1503         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1504 
1505         back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1506 
1507         for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1508                 if (p == rcelm) {
1509                         *back = rcelm->rsmrc_next;
1510                         break;
1511                 }
1512         }
1513 
1514         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1515 
1516         rw_exit(&rhash->rsmhash_rw);
1517 }
1518 
1519 static int
1520 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1521     int dup_check, rsm_resource_state_t state)
1522 {
1523         rsmresource_t   *p = NULL, **bktp;
1524         uint_t          hashval;
1525         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1526 
1527         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1528 
1529         /* lock table */
1530         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1531 
1532         /*
1533          * If the current resource state is other than the state passed in
1534          * then the resource is (probably) already on the list. eg. for an
1535          * import segment if the state is not RSM_STATE_NEW then it's on the
1536          * list already.
1537          */
1538         RSMRC_LOCK(new);
1539         if (new->rsmrc_state != state) {
1540                 RSMRC_UNLOCK(new);
1541                 rw_exit(&rhash->rsmhash_rw);
1542                 return (RSMERR_BAD_SEG_HNDL);
1543         }
1544 
1545         hashval = rsmhash(key);
1546         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1547 
1548         if (dup_check) {
1549                 /*
1550                  * Used for checking export segments; don't want to have
1551                  * the same key used for multiple segments.
1552                  */
1553 
1554                 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1555 
1556                 for (; p; p = p->rsmrc_next) {
1557                         if (p->rsmrc_key == key) {
1558                                 RSMRC_UNLOCK(new);
1559                                 break;
1560                         }
1561                 }
1562         }
1563 
1564         if (p == NULL) {
1565                 /* Key doesn't exist, add it */
1566 
1567                 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1568 
1569                 new->rsmrc_key = key;
1570                 new->rsmrc_next = *bktp;
1571                 *bktp = new;
1572         }
1573 
1574         rw_exit(&rhash->rsmhash_rw);
1575 
1576         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1577 
1578         return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1579 }
1580 
1581 /*
1582  * XOR each byte of the key.
1583  */
1584 static uint_t
1585 rsmhash(rsm_memseg_id_t key)
1586 {
1587         uint_t  hash = key;
1588 
1589         hash ^=  (key >> 8);
1590         hash ^=  (key >> 16);
1591         hash ^=  (key >> 24);
1592 
1593         return (hash % rsm_hash_size);
1594 
1595 }
1596 
1597 /*
1598  * generic function to get a specific bucket
1599  */
1600 static void *
1601 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1602 {
1603 
1604         if (rhash->bucket == NULL)
1605                 return (NULL);
1606         else
1607                 return ((void *)rhash->bucket[hashval]);
1608 }
1609 
1610 /*
1611  * generic function to get a specific bucket's address
1612  */
1613 static void **
1614 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1615 {
1616         if (rhash->bucket == NULL)
1617                 return (NULL);
1618         else
1619                 return ((void **)&(rhash->bucket[hashval]));
1620 }
1621 
1622 /*
1623  * generic function to alloc a hash table
1624  */
1625 static void
1626 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1627 {
1628         rhash->bucket = (rsmresource_t **)
1629             kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1630 }
1631 
1632 /*
1633  * generic function to free a hash table
1634  */
1635 static void
1636 rsmhash_free(rsmhash_table_t *rhash, int size)
1637 {
1638 
1639         kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1640         rhash->bucket = NULL;
1641 
1642 }
1643 /* *********************** Exported Segment Key Management ************ */
1644 
1645 #define rsmexport_add(new, key)         \
1646         rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1647             RSM_STATE_BIND)
1648 
1649 #define rsmexport_rm(arg)       \
1650         rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1651 
1652 #define rsmexport_lookup(key)   \
1653         (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1654 
1655 /* ************************** Import Segment List Management ********** */
1656 
1657 /*
1658  *  Add segment to import list. This will be useful for paging and loopback
1659  * segment unloading.
1660  */
1661 #define rsmimport_add(arg, key) \
1662         rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1663             RSM_STATE_NEW)
1664 
1665 #define rsmimport_rm(arg)       \
1666         rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1667 
1668 /*
1669  *      #define rsmimport_lookup(key)   \
1670  *      (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1671  */
1672 
1673 /*
1674  * increase the ref count and make the import segment point to the
1675  * shared data structure. Return a pointer to the share data struct
1676  * and the shared data struct is locked upon return
1677  */
1678 static rsm_import_share_t *
1679 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1680     rsmseg_t *segp)
1681 {
1682         uint_t          hash;
1683         rsmresource_t           *p;
1684         rsm_import_share_t      *shdatap;
1685         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1686 
1687         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1688 
1689         hash = rsmhash(key);
1690         /* lock table */
1691         rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1692         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1693             key, hash));
1694 
1695         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1696 
1697         for (; p; p = p->rsmrc_next) {
1698                 /*
1699                  * Look for an entry that is importing the same exporter
1700                  * with the share data structure allocated.
1701                  */
1702                 if ((p->rsmrc_key == key) &&
1703                     (p->rsmrc_node == node) &&
1704                     (p->rsmrc_adapter == adapter) &&
1705                     (((rsmseg_t *)p)->s_share != NULL)) {
1706                         shdatap = ((rsmseg_t *)p)->s_share;
1707                         break;
1708                 }
1709         }
1710 
1711         if (p == NULL) {
1712                 /* we are the first importer, create the shared data struct */
1713                 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1714                 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1715                 shdatap->rsmsi_segid = key;
1716                 shdatap->rsmsi_node = node;
1717                 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1718                 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1719         }
1720 
1721         rsmseglock_acquire(segp);
1722 
1723         /* we grab the shared lock before returning from this function */
1724         mutex_enter(&shdatap->rsmsi_lock);
1725 
1726         shdatap->rsmsi_refcnt++;
1727         segp->s_share = shdatap;
1728 
1729         rsmseglock_release(segp);
1730 
1731         rw_exit(&rsm_import_segs.rsmhash_rw);
1732 
1733         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1734 
1735         return (shdatap);
1736 }
1737 
1738 /*
1739  * the shared data structure should be locked before calling
1740  * rsmsharecv_signal().
1741  * Change the state and signal any waiting segments.
1742  */
1743 void
1744 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1745 {
1746         ASSERT(rsmsharelock_held(seg));
1747 
1748         if (seg->s_share->rsmsi_state == oldstate) {
1749                 seg->s_share->rsmsi_state = newstate;
1750                 cv_broadcast(&seg->s_share->rsmsi_cv);
1751         }
1752 }
1753 
1754 /*
1755  * Add to the hash table
1756  */
1757 static void
1758 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1759     void *cookie)
1760 {
1761 
1762         importing_token_t       *head;
1763         importing_token_t       *new_token;
1764         int                     index;
1765 
1766         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1767 
1768         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1769 
1770         new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1771         new_token->importing_node = node;
1772         new_token->key = key;
1773         new_token->import_segment_cookie = cookie;
1774         new_token->importing_adapter_hwaddr = hwaddr;
1775 
1776         index = rsmhash(key);
1777 
1778         mutex_enter(&importer_list.lock);
1779 
1780         head = importer_list.bucket[index];
1781         importer_list.bucket[index] = new_token;
1782         new_token->next = head;
1783         mutex_exit(&importer_list.lock);
1784 
1785         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1786 }
1787 
1788 static void
1789 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1790 {
1791 
1792         importing_token_t       *prev, *token = NULL;
1793         int                     index;
1794         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1795 
1796         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1797 
1798         index = rsmhash(key);
1799 
1800         mutex_enter(&importer_list.lock);
1801 
1802         token = importer_list.bucket[index];
1803 
1804         prev = token;
1805         while (token != NULL) {
1806                 if (token->importing_node == node &&
1807                     token->import_segment_cookie == cookie) {
1808                         if (prev == token)
1809                                 importer_list.bucket[index] = token->next;
1810                         else
1811                                 prev->next = token->next;
1812                         kmem_free((void *)token, sizeof (*token));
1813                         break;
1814                 } else {
1815                         prev = token;
1816                         token = token->next;
1817                 }
1818         }
1819 
1820         mutex_exit(&importer_list.lock);
1821 
1822         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1823 
1824 
1825 }
1826 
1827 /* **************************Segment Structure Management ************* */
1828 
1829 /*
1830  * Free segment structure
1831  */
1832 static void
1833 rsmseg_free(rsmseg_t *seg)
1834 {
1835 
1836         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1837 
1838         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1839 
1840         /* need to take seglock here to avoid race with rsmmap_unmap() */
1841         rsmseglock_acquire(seg);
1842         if (seg->s_ckl != NULL) {
1843                 /* Segment is still busy */
1844                 seg->s_state = RSM_STATE_END;
1845                 rsmseglock_release(seg);
1846                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1847                     "rsmseg_free done\n"));
1848                 return;
1849         }
1850 
1851         rsmseglock_release(seg);
1852 
1853         ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1854 
1855         /*
1856          * If it's an importer decrement the refcount
1857          * and if its down to zero free the shared data structure.
1858          * This is where failures during rsm_connect() are unrefcounted
1859          */
1860         if (seg->s_share != NULL) {
1861 
1862                 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1863 
1864                 rsmsharelock_acquire(seg);
1865 
1866                 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1867 
1868                 seg->s_share->rsmsi_refcnt--;
1869 
1870                 if (seg->s_share->rsmsi_refcnt == 0) {
1871                         rsmsharelock_release(seg);
1872                         mutex_destroy(&seg->s_share->rsmsi_lock);
1873                         cv_destroy(&seg->s_share->rsmsi_cv);
1874                         kmem_free((void *)(seg->s_share),
1875                             sizeof (rsm_import_share_t));
1876                 } else {
1877                         rsmsharelock_release(seg);
1878                 }
1879                 /*
1880                  * The following needs to be done after any
1881                  * rsmsharelock calls which use seg->s_share.
1882                  */
1883                 seg->s_share = NULL;
1884         }
1885 
1886         cv_destroy(&seg->s_cv);
1887         mutex_destroy(&seg->s_lock);
1888         rsmacl_free(seg->s_acl, seg->s_acl_len);
1889         rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1890         if (seg->s_adapter)
1891                 rsmka_release_adapter(seg->s_adapter);
1892 
1893         kmem_free((void *)seg, sizeof (*seg));
1894 
1895         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1896 
1897 }
1898 
1899 
1900 static rsmseg_t *
1901 rsmseg_alloc(minor_t num, struct cred *cred)
1902 {
1903         rsmseg_t        *new;
1904         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1905 
1906         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1907         /*
1908          * allocate memory for new segment. This should be a segkmem cache.
1909          */
1910         new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1911 
1912         new->s_state = RSM_STATE_NEW;
1913         new->s_minor = num;
1914         new->s_acl_len       = 0;
1915         new->s_cookie = NULL;
1916         new->s_adapter = NULL;
1917 
1918         new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1919         /* we don't have a key yet, will set at export/connect */
1920         new->s_uid  = crgetuid(cred);
1921         new->s_gid  = crgetgid(cred);
1922 
1923         mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1924         cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1925 
1926         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1927 
1928         return (new);
1929 }
1930 
1931 /* ******************************** Driver Open/Close/Poll *************** */
1932 
1933 /*ARGSUSED1*/
1934 static int
1935 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1936 {
1937         minor_t rnum;
1938         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1939 
1940         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1941         /*
1942          * Char only
1943          */
1944         if (otyp != OTYP_CHR) {
1945                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1946                 return (EINVAL);
1947         }
1948 
1949         /*
1950          * Only zero can be opened, clones are used for resources.
1951          */
1952         if (getminor(*devp) != RSM_DRIVER_MINOR) {
1953                 DBG_PRINTF((category, RSM_ERR,
1954                     "rsm_open: bad minor %d\n", getminor(*devp)));
1955                 return (ENODEV);
1956         }
1957 
1958         if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1959                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1960                 return (EPERM);
1961         }
1962 
1963         if (!(flag & FWRITE)) {
1964                 /*
1965                  * The library function _rsm_librsm_init calls open for
1966                  * /dev/rsm with flag set to O_RDONLY.  We want a valid
1967                  * file descriptor to be returned for minor device zero.
1968                  */
1969 
1970                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1971                     "rsm_open RDONLY done\n"));
1972                 return (DDI_SUCCESS);
1973         }
1974 
1975         /*
1976          * - allocate new minor number and segment.
1977          * - add segment to list of all segments.
1978          * - set minordev data to segment
1979          * - update devp argument to new device
1980          * - update s_cred to cred; make sure you do crhold(cred);
1981          */
1982 
1983         /* allocate a new resource number */
1984         if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1985                 /*
1986                  * We will bind this minor to a specific resource in first
1987                  * ioctl
1988                  */
1989                 *devp = makedevice(getmajor(*devp), rnum);
1990         } else {
1991                 return (EAGAIN);
1992         }
1993 
1994         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1995         return (DDI_SUCCESS);
1996 }
1997 
1998 static void
1999 rsmseg_close(rsmseg_t *seg, int force_flag)
2000 {
2001         int e = RSM_SUCCESS;
2002 
2003         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2004 
2005         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2006 
2007         rsmseglock_acquire(seg);
2008         if (!force_flag && (seg->s_hdr.rsmrc_type ==
2009             RSM_RESOURCE_EXPORT_SEGMENT)) {
2010                 /*
2011                  * If we are processing rsm_close wait for force_destroy
2012                  * processing to complete since force_destroy processing
2013                  * needs to finish first before we can free the segment.
2014                  * force_destroy is only for export segments
2015                  */
2016                 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2017                         cv_wait(&seg->s_cv, &seg->s_lock);
2018                 }
2019         }
2020         rsmseglock_release(seg);
2021 
2022         /* It's ok to read the state without a lock */
2023         switch (seg->s_state) {
2024         case RSM_STATE_EXPORT:
2025         case RSM_STATE_EXPORT_QUIESCING:
2026         case RSM_STATE_EXPORT_QUIESCED:
2027                 e = rsm_unpublish(seg, 1);
2028                 /* FALLTHRU */
2029         case RSM_STATE_BIND_QUIESCED:
2030                 /* FALLTHRU */
2031         case RSM_STATE_BIND:
2032                 e = rsm_unbind(seg);
2033                 if (e != RSM_SUCCESS && force_flag == 1)
2034                         return;
2035                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2036                 /* FALLTHRU */
2037         case RSM_STATE_NEW_QUIESCED:
2038                 rsmseglock_acquire(seg);
2039                 seg->s_state = RSM_STATE_NEW;
2040                 cv_broadcast(&seg->s_cv);
2041                 rsmseglock_release(seg);
2042                 break;
2043         case RSM_STATE_NEW:
2044                 break;
2045         case RSM_STATE_ZOMBIE:
2046                 /*
2047                  * Segments in this state have been removed off the
2048                  * exported segments list and have been unpublished
2049                  * and unbind. These segments have been removed during
2050                  * a callback to the rsm_export_force_destroy, which
2051                  * is called for the purpose of unlocking these
2052                  * exported memory segments when a process exits but
2053                  * leaves the segments locked down since rsm_close is
2054                  * is not called for the segments. This can happen
2055                  * when a process calls fork or exec and then exits.
2056                  * Once the segments are in the ZOMBIE state, all that
2057                  * remains is to destroy them when rsm_close is called.
2058                  * This is done here. Thus, for such segments the
2059                  * the state is changed to new so that later in this
2060                  * function rsmseg_free is called.
2061                  */
2062                 rsmseglock_acquire(seg);
2063                 seg->s_state = RSM_STATE_NEW;
2064                 rsmseglock_release(seg);
2065                 break;
2066         case RSM_STATE_MAP_QUIESCE:
2067         case RSM_STATE_ACTIVE:
2068                 /* Disconnect will handle the unmap */
2069         case RSM_STATE_CONN_QUIESCE:
2070         case RSM_STATE_CONNECT:
2071         case RSM_STATE_DISCONNECT:
2072                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2073                 (void) rsm_disconnect(seg);
2074                 break;
2075         case RSM_STATE_MAPPING:
2076                 /*FALLTHRU*/
2077         case RSM_STATE_END:
2078                 DBG_PRINTF((category, RSM_ERR,
2079                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2080                 break;
2081         default:
2082                 DBG_PRINTF((category, RSM_ERR,
2083                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2084                 break;
2085         }
2086 
2087         /*
2088          * check state.
2089          * - make sure you do crfree(s_cred);
2090          * release segment and minor number
2091          */
2092         ASSERT(seg->s_state == RSM_STATE_NEW);
2093 
2094         /*
2095          * The export_force_destroy callback is created to unlock
2096          * the exported segments of a process
2097          * when the process does a fork or exec and then exits calls this
2098          * function with the force flag set to 1 which indicates that the
2099          * segment state must be converted to ZOMBIE. This state means that the
2100          * segments still exist and have been unlocked and most importantly the
2101          * only operation allowed is to destroy them on an rsm_close.
2102          */
2103         if (force_flag) {
2104                 rsmseglock_acquire(seg);
2105                 seg->s_state = RSM_STATE_ZOMBIE;
2106                 rsmseglock_release(seg);
2107         } else {
2108                 rsmseg_free(seg);
2109         }
2110 
2111         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2112 }
2113 
2114 static int
2115 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2116 {
2117         minor_t rnum = getminor(dev);
2118         rsmresource_t *res;
2119         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2120 
2121         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2122 
2123         flag = flag; cred = cred;
2124 
2125         if (otyp != OTYP_CHR)
2126                 return (EINVAL);
2127 
2128         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2129 
2130         /*
2131          * At this point we are the last reference to the resource.
2132          * Free resource number from resource table.
2133          * It's ok to remove number before we free the segment.
2134          * We need to lock the resource to protect against remote calls.
2135          */
2136         if (rnum == RSM_DRIVER_MINOR ||
2137             (res = rsmresource_free(rnum)) == NULL) {
2138                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2139                 return (DDI_SUCCESS);
2140         }
2141 
2142         switch (res->rsmrc_type) {
2143         case RSM_RESOURCE_EXPORT_SEGMENT:
2144         case RSM_RESOURCE_IMPORT_SEGMENT:
2145                 rsmseg_close((rsmseg_t *)res, 0);
2146                 break;
2147         case RSM_RESOURCE_BAR:
2148                 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2149                 break;
2150         default:
2151                 break;
2152         }
2153 
2154         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2155 
2156         return (DDI_SUCCESS);
2157 }
2158 
2159 /*
2160  * rsm_inc_pgcnt
2161  *
2162  * Description: increment rsm page counter.
2163  *
2164  * Parameters:  pgcnt_t pnum;   number of pages to be used
2165  *
2166  * Returns:     RSM_SUCCESS     if memory limit not exceeded
2167  *              ENOSPC          if memory limit exceeded. In this case, the
2168  *                              page counter remains unchanged.
2169  *
2170  */
2171 static int
2172 rsm_inc_pgcnt(pgcnt_t pnum)
2173 {
2174         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2175         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2176                 return (RSM_SUCCESS);
2177         }
2178 
2179         mutex_enter(&rsm_pgcnt_lock);
2180 
2181         if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2182                 /* ensure that limits have not been exceeded */
2183                 mutex_exit(&rsm_pgcnt_lock);
2184                 return (RSMERR_INSUFFICIENT_MEM);
2185         }
2186 
2187         rsm_pgcnt += pnum;
2188         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2189             rsm_pgcnt));
2190         mutex_exit(&rsm_pgcnt_lock);
2191 
2192         return (RSM_SUCCESS);
2193 }
2194 
2195 /*
2196  * rsm_dec_pgcnt
2197  *
2198  * Description: decrement rsm page counter.
2199  *
2200  * Parameters:  pgcnt_t pnum;   number of pages freed
2201  *
2202  */
2203 static void
2204 rsm_dec_pgcnt(pgcnt_t pnum)
2205 {
2206         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2207 
2208         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2209                 return;
2210         }
2211 
2212         mutex_enter(&rsm_pgcnt_lock);
2213         ASSERT(rsm_pgcnt >= pnum);
2214         rsm_pgcnt -= pnum;
2215         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2216             rsm_pgcnt));
2217         mutex_exit(&rsm_pgcnt_lock);
2218 }
2219 
2220 static struct umem_callback_ops rsm_as_ops = {
2221         UMEM_CALLBACK_VERSION, /* version number */
2222         rsm_export_force_destroy,
2223 };
2224 
2225 static int
2226 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2227     proc_t *procp)
2228 {
2229         int error = RSM_SUCCESS;
2230         ulong_t pnum;
2231         struct umem_callback_ops *callbackops = &rsm_as_ops;
2232 
2233         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2234 
2235         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2236 
2237         /*
2238          * Make sure vaddr and len are aligned on a page boundary
2239          */
2240         if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2241                 return (RSMERR_BAD_ADDR);
2242         }
2243 
2244         if (len & (PAGESIZE - 1)) {
2245                 return (RSMERR_BAD_LENGTH);
2246         }
2247 
2248         /*
2249          * Find number of pages
2250          */
2251         pnum = btopr(len);
2252         error = rsm_inc_pgcnt(pnum);
2253         if (error != RSM_SUCCESS) {
2254                 DBG_PRINTF((category, RSM_ERR,
2255                     "rsm_bind_pages:mem limit exceeded\n"));
2256                 return (RSMERR_INSUFFICIENT_MEM);
2257         }
2258 
2259         error = umem_lockmemory(vaddr, len,
2260             DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2261             cookie,
2262             callbackops, procp);
2263 
2264         if (error) {
2265                 rsm_dec_pgcnt(pnum);
2266                 DBG_PRINTF((category, RSM_ERR,
2267                     "rsm_bind_pages:ddi_umem_lock failed\n"));
2268                 /*
2269                  * ddi_umem_lock, in the case of failure, returns one of
2270                  * the following three errors. These are translated into
2271                  * the RSMERR namespace and returned.
2272                  */
2273                 if (error == EFAULT)
2274                         return (RSMERR_BAD_ADDR);
2275                 else if (error == EACCES)
2276                         return (RSMERR_PERM_DENIED);
2277                 else
2278                         return (RSMERR_INSUFFICIENT_MEM);
2279         }
2280 
2281         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2282 
2283         return (error);
2284 
2285 }
2286 
2287 static int
2288 rsm_unbind_pages(rsmseg_t *seg)
2289 {
2290         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2291 
2292         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2293 
2294         ASSERT(rsmseglock_held(seg));
2295 
2296         if (seg->s_cookie != NULL) {
2297                 /* unlock address range */
2298                 ddi_umem_unlock(seg->s_cookie);
2299                 rsm_dec_pgcnt(btopr(seg->s_len));
2300                 seg->s_cookie = NULL;
2301         }
2302 
2303         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2304 
2305         return (RSM_SUCCESS);
2306 }
2307 
2308 
2309 static int
2310 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2311 {
2312         int e;
2313         adapter_t *adapter;
2314         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2315 
2316         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2317 
2318         adapter = rsm_getadapter(msg, mode);
2319         if (adapter == NULL) {
2320                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2321                     "rsm_bind done:no adapter\n"));
2322                 return (RSMERR_CTLR_NOT_PRESENT);
2323         }
2324 
2325         /* lock address range */
2326         if (msg->vaddr == NULL) {
2327                 rsmka_release_adapter(adapter);
2328                 DBG_PRINTF((category, RSM_ERR,
2329                     "rsm: rsm_bind done: invalid vaddr\n"));
2330                 return (RSMERR_BAD_ADDR);
2331         }
2332         if (msg->len <= 0) {
2333                 rsmka_release_adapter(adapter);
2334                 DBG_PRINTF((category, RSM_ERR,
2335                     "rsm_bind: invalid length\n"));
2336                 return (RSMERR_BAD_LENGTH);
2337         }
2338 
2339         /* Lock segment */
2340         rsmseglock_acquire(seg);
2341 
2342         while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2343                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2344                         DBG_PRINTF((category, RSM_DEBUG,
2345                             "rsm_bind done: cv_wait INTERRUPTED"));
2346                         rsmka_release_adapter(adapter);
2347                         rsmseglock_release(seg);
2348                         return (RSMERR_INTERRUPTED);
2349                 }
2350         }
2351 
2352         ASSERT(seg->s_state == RSM_STATE_NEW);
2353 
2354         ASSERT(seg->s_cookie == NULL);
2355 
2356         e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2357         if (e == RSM_SUCCESS) {
2358                 seg->s_flags |= RSM_USER_MEMORY;
2359                 if (msg->perm & RSM_ALLOW_REBIND) {
2360                         seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2361                 }
2362                 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2363                         seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2364                 }
2365                 seg->s_region.r_vaddr = msg->vaddr;
2366                 /*
2367                  * Set the s_pid value in the segment structure. This is used
2368                  * to identify exported segments belonging to a particular
2369                  * process so that when the process exits, these segments can
2370                  * be unlocked forcefully even if rsm_close is not called on
2371                  * process exit since there maybe other processes referencing
2372                  * them (for example on a fork or exec).
2373                  * The s_pid value is also used to authenticate the process
2374                  * doing a publish or unpublish on the export segment. Only
2375                  * the creator of the export segment has a right to do a
2376                  * publish or unpublish and unbind on the segment.
2377                  */
2378                 seg->s_pid = ddi_get_pid();
2379                 seg->s_len = msg->len;
2380                 seg->s_state = RSM_STATE_BIND;
2381                 seg->s_adapter = adapter;
2382                 seg->s_proc = curproc;
2383         } else {
2384                 rsmka_release_adapter(adapter);
2385                 DBG_PRINTF((category, RSM_WARNING,
2386                     "unable to lock down pages\n"));
2387         }
2388 
2389         msg->rnum = seg->s_minor;
2390         /* Unlock segment */
2391         rsmseglock_release(seg);
2392 
2393         if (e == RSM_SUCCESS) {
2394                 /* copyout the resource number */
2395 #ifdef _MULTI_DATAMODEL
2396                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2397                         rsm_ioctlmsg32_t msg32;
2398 
2399                         msg32.rnum = msg->rnum;
2400                         if (ddi_copyout((caddr_t)&msg32.rnum,
2401                             (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2402                             sizeof (minor_t), mode)) {
2403                                 rsmka_release_adapter(adapter);
2404                                 e = RSMERR_BAD_ADDR;
2405                         }
2406                 }
2407 #endif
2408                 if (ddi_copyout((caddr_t)&msg->rnum,
2409                     (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2410                     sizeof (minor_t), mode)) {
2411                         rsmka_release_adapter(adapter);
2412                         e = RSMERR_BAD_ADDR;
2413                 }
2414         }
2415 
2416         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2417 
2418         return (e);
2419 }
2420 
2421 static void
2422 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2423     rsm_memseg_id_t ex_segid, ddi_umem_cookie_t cookie)
2424 {
2425         rsmresource_t   *p = NULL;
2426         rsmhash_table_t *rhash = &rsm_import_segs;
2427         uint_t          index;
2428 
2429         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2430             "rsm_remap_local_importers enter\n"));
2431 
2432         index = rsmhash(ex_segid);
2433 
2434         rw_enter(&rhash->rsmhash_rw, RW_READER);
2435 
2436         p = rsmhash_getbkt(rhash, index);
2437 
2438         for (; p; p = p->rsmrc_next) {
2439                 rsmseg_t *seg = (rsmseg_t *)p;
2440                 rsmseglock_acquire(seg);
2441                 /*
2442                  * Change the s_cookie value of only the local importers
2443                  * which have been mapped (in state RSM_STATE_ACTIVE).
2444                  * Note that there is no need to change the s_cookie value
2445                  * if the imported segment is in RSM_STATE_MAPPING since
2446                  * eventually the s_cookie will be updated via the mapping
2447                  * functionality.
2448                  */
2449                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2450                     (seg->s_state == RSM_STATE_ACTIVE)) {
2451                         seg->s_cookie = cookie;
2452                 }
2453                 rsmseglock_release(seg);
2454         }
2455         rw_exit(&rhash->rsmhash_rw);
2456 
2457         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2458             "rsm_remap_local_importers done\n"));
2459 }
2460 
2461 static int
2462 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2463 {
2464         int e;
2465         adapter_t *adapter;
2466         ddi_umem_cookie_t cookie;
2467         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2468 
2469         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2470 
2471         /* Check for permissions to rebind */
2472         if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2473                 return (RSMERR_REBIND_NOT_ALLOWED);
2474         }
2475 
2476         if (seg->s_pid != ddi_get_pid() &&
2477             ddi_get_pid() != 0) {
2478                 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2479                 return (RSMERR_NOT_CREATOR);
2480         }
2481 
2482         /*
2483          * We will not be allowing partial rebind and hence length passed
2484          * in must be same as segment length
2485          */
2486         if (msg->vaddr == NULL) {
2487                 DBG_PRINTF((category, RSM_ERR,
2488                     "rsm_rebind done: null msg->vaddr\n"));
2489                 return (RSMERR_BAD_ADDR);
2490         }
2491         if (msg->len != seg->s_len) {
2492                 DBG_PRINTF((category, RSM_ERR,
2493                     "rsm_rebind: invalid length\n"));
2494                 return (RSMERR_BAD_LENGTH);
2495         }
2496 
2497         /* Lock segment */
2498         rsmseglock_acquire(seg);
2499 
2500         while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2501             (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2502             (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2503                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2504                         rsmseglock_release(seg);
2505                         DBG_PRINTF((category, RSM_DEBUG,
2506                             "rsm_rebind done: cv_wait INTERRUPTED"));
2507                         return (RSMERR_INTERRUPTED);
2508                 }
2509         }
2510 
2511         /* verify segment state */
2512         if ((seg->s_state != RSM_STATE_BIND) &&
2513             (seg->s_state != RSM_STATE_EXPORT)) {
2514                 /* Unlock segment */
2515                 rsmseglock_release(seg);
2516                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2517                     "rsm_rebind done: invalid state\n"));
2518                 return (RSMERR_BAD_SEG_HNDL);
2519         }
2520 
2521         ASSERT(seg->s_cookie != NULL);
2522 
2523         if (msg->vaddr == seg->s_region.r_vaddr) {
2524                 rsmseglock_release(seg);
2525                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2526                 return (RSM_SUCCESS);
2527         }
2528 
2529         e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2530         if (e == RSM_SUCCESS) {
2531                 struct buf *xbuf;
2532                 dev_t sdev = 0;
2533                 rsm_memory_local_t mem;
2534 
2535                 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2536                     sdev, 0, NULL, DDI_UMEM_SLEEP);
2537                 ASSERT(xbuf != NULL);
2538 
2539                 mem.ms_type = RSM_MEM_BUF;
2540                 mem.ms_bp = xbuf;
2541 
2542                 adapter = seg->s_adapter;
2543                 e = adapter->rsmpi_ops->rsm_rebind(
2544                     seg->s_handle.out, 0, &mem,
2545                     RSM_RESOURCE_DONTWAIT, NULL);
2546 
2547                 if (e == RSM_SUCCESS) {
2548                         /*
2549                          * unbind the older pages, and unload local importers;
2550                          * but don't disconnect importers
2551                          */
2552                         (void) rsm_unbind_pages(seg);
2553                         seg->s_cookie = cookie;
2554                         seg->s_region.r_vaddr = msg->vaddr;
2555                         rsm_remap_local_importers(my_nodeid, seg->s_segid,
2556                             cookie);
2557                 } else {
2558                         /*
2559                          * Unbind the pages associated with "cookie" by the
2560                          * rsm_bind_pages calls prior to this. This is
2561                          * similar to what is done in the rsm_unbind_pages
2562                          * routine for the seg->s_cookie.
2563                          */
2564                         ddi_umem_unlock(cookie);
2565                         rsm_dec_pgcnt(btopr(msg->len));
2566                         DBG_PRINTF((category, RSM_ERR,
2567                             "rsm_rebind failed with %d\n", e));
2568                 }
2569                 /*
2570                  * At present there is no dependency on the existence of xbuf.
2571                  * So we can free it here. If in the future this changes, it can
2572                  * be freed sometime during the segment destroy.
2573                  */
2574                 freerbuf(xbuf);
2575         }
2576 
2577         /* Unlock segment */
2578         rsmseglock_release(seg);
2579 
2580         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2581 
2582         return (e);
2583 }
2584 
2585 static int
2586 rsm_unbind(rsmseg_t *seg)
2587 {
2588         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2589 
2590         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2591 
2592         rsmseglock_acquire(seg);
2593 
2594         /* verify segment state */
2595         if ((seg->s_state != RSM_STATE_BIND) &&
2596             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2597                 rsmseglock_release(seg);
2598                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2599                     "rsm_unbind: invalid state\n"));
2600                 return (RSMERR_BAD_SEG_HNDL);
2601         }
2602 
2603         /* unlock current range */
2604         (void) rsm_unbind_pages(seg);
2605 
2606         if (seg->s_state == RSM_STATE_BIND) {
2607                 seg->s_state = RSM_STATE_NEW;
2608         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2609                 seg->s_state = RSM_STATE_NEW_QUIESCED;
2610         }
2611 
2612         rsmseglock_release(seg);
2613 
2614         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2615 
2616         return (RSM_SUCCESS);
2617 }
2618 
2619 /* **************************** Exporter Access List Management ******* */
2620 static void
2621 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2622 {
2623         int     acl_sz;
2624         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2625 
2626         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2627 
2628         /* acl could be NULL */
2629 
2630         if (acl != NULL && acl_len > 0) {
2631                 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2632                 kmem_free((void *)acl, acl_sz);
2633         }
2634 
2635         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2636 }
2637 
2638 static void
2639 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2640 {
2641         int     acl_sz;
2642         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2643 
2644         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2645 
2646         if (acl != NULL && acl_len > 0) {
2647                 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2648                 kmem_free((void *)acl, acl_sz);
2649         }
2650 
2651         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2652 
2653 }
2654 
2655 static int
2656 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2657     rsmapi_access_entry_t **list, int *len, int loopback)
2658 {
2659         rsmapi_access_entry_t *acl;
2660         int     acl_len;
2661         int i;
2662         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2663 
2664         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2665 
2666         *len = 0;
2667         *list = NULL;
2668 
2669         acl_len = msg->acl_len;
2670         if ((loopback && acl_len > 1) || (acl_len < 0) ||
2671             (acl_len > MAX_NODES)) {
2672                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2673                     "rsmacl_build done: acl invalid\n"));
2674                 return (RSMERR_BAD_ACL);
2675         }
2676 
2677         if (acl_len > 0 && acl_len <= MAX_NODES) {
2678                 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2679 
2680                 acl = kmem_alloc(acl_size, KM_SLEEP);
2681 
2682                 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2683                     acl_size, mode)) {
2684                         kmem_free((void *) acl, acl_size);
2685                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2686                             "rsmacl_build done: BAD_ADDR\n"));
2687                         return (RSMERR_BAD_ADDR);
2688                 }
2689 
2690                 /*
2691                  * Verify access list
2692                  */
2693                 for (i = 0; i < acl_len; i++) {
2694                         if (acl[i].ae_node > MAX_NODES ||
2695                             (loopback && (acl[i].ae_node != my_nodeid)) ||
2696                             acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2697                                 /* invalid entry */
2698                                 kmem_free((void *) acl, acl_size);
2699                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2700                                     "rsmacl_build done: EINVAL\n"));
2701                                 return (RSMERR_BAD_ACL);
2702                         }
2703                 }
2704 
2705                 *len = acl_len;
2706                 *list = acl;
2707         }
2708 
2709         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2710 
2711         return (DDI_SUCCESS);
2712 }
2713 
2714 static int
2715 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2716     int acl_len, adapter_t *adapter)
2717 {
2718         rsm_access_entry_t *acl;
2719         rsm_addr_t hwaddr;
2720         int i;
2721         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2722 
2723         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2724 
2725         if (src != NULL) {
2726                 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2727                 acl = kmem_alloc(acl_size, KM_SLEEP);
2728 
2729                 /*
2730                  * translate access list
2731                  */
2732                 for (i = 0; i < acl_len; i++) {
2733                         if (src[i].ae_node == my_nodeid) {
2734                                 acl[i].ae_addr = adapter->hwaddr;
2735                         } else {
2736                                 hwaddr = get_remote_hwaddr(adapter,
2737                                     src[i].ae_node);
2738                                 if ((int64_t)hwaddr < 0) {
2739                                         /* invalid hwaddr */
2740                                         kmem_free((void *) acl, acl_size);
2741                                         DBG_PRINTF((category,
2742                                             RSM_DEBUG_VERBOSE,
2743                                             "rsmpiacl_create done:"
2744                                             "EINVAL hwaddr\n"));
2745                                         return (RSMERR_INTERNAL_ERROR);
2746                                 }
2747                                 acl[i].ae_addr = hwaddr;
2748                         }
2749                         /* rsmpi understands only RSM_PERM_XXXX */
2750                         acl[i].ae_permission =
2751                             src[i].ae_permission & RSM_PERM_RDWR;
2752                 }
2753                 *dest = acl;
2754         } else {
2755                 *dest = NULL;
2756         }
2757 
2758         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2759 
2760         return (RSM_SUCCESS);
2761 }
2762 
2763 static int
2764 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2765     rsmipc_reply_t *reply)
2766 {
2767 
2768         int             i;
2769         rsmseg_t        *seg;
2770         rsm_memseg_id_t key = req->rsmipc_key;
2771         rsm_permission_t perm = req->rsmipc_perm;
2772         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2773 
2774         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2775             "rsmsegacl_validate enter\n"));
2776 
2777         /*
2778          * Find segment and grab its lock. The reason why we grab the segment
2779          * lock in side the search is to avoid the race when the segment is
2780          * being deleted and we already have a pointer to it.
2781          */
2782         seg = rsmexport_lookup(key);
2783         if (!seg) {
2784                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2785                     "rsmsegacl_validate done: %u ENXIO\n", key));
2786                 return (RSMERR_SEG_NOT_PUBLISHED);
2787         }
2788 
2789         ASSERT(rsmseglock_held(seg));
2790         ASSERT(seg->s_state == RSM_STATE_EXPORT);
2791 
2792         /*
2793          * We implement a 2-level protection scheme.
2794          * First, we check if local/remote host has access rights.
2795          * Second, we check if the user has access rights.
2796          *
2797          * This routine only validates the rnode access_list
2798          */
2799         if (seg->s_acl_len > 0) {
2800                 /*
2801                  * Check host access list
2802                  */
2803                 ASSERT(seg->s_acl != NULL);
2804                 for (i = 0; i < seg->s_acl_len; i++) {
2805                         if (seg->s_acl[i].ae_node == rnode) {
2806                                 perm &= seg->s_acl[i].ae_permission;
2807                                 goto found;
2808                         }
2809                 }
2810                 /* rnode is not found in the list */
2811                 rsmseglock_release(seg);
2812                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2813                     "rsmsegacl_validate done: EPERM\n"));
2814                 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2815         } else {
2816                 /* use default owner creation umask */
2817                 perm &= seg->s_mode;
2818         }
2819 
2820 found:
2821         /* update perm for this node */
2822         reply->rsmipc_mode = perm;
2823         reply->rsmipc_uid = seg->s_uid;
2824         reply->rsmipc_gid = seg->s_gid;
2825         reply->rsmipc_segid = seg->s_segid;
2826         reply->rsmipc_seglen = seg->s_len;
2827 
2828         /*
2829          * Perm of requesting node is valid; source will validate user
2830          */
2831         rsmseglock_release(seg);
2832 
2833         /*
2834          * Add the importer to the list right away, if connect fails
2835          * the importer will ask the exporter to remove it.
2836          */
2837         importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2838             req->rsmipc_segment_cookie);
2839 
2840         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2841 
2842         return (RSM_SUCCESS);
2843 }
2844 
2845 
2846 /* ************************** Exporter Calls ************************* */
2847 
2848 static int
2849 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2850 {
2851         int                     e;
2852         int                     acl_len;
2853         rsmapi_access_entry_t   *acl;
2854         rsm_access_entry_t      *rsmpi_acl;
2855         rsm_memory_local_t      mem;
2856         struct buf              *xbuf;
2857         dev_t                   sdev = 0;
2858         adapter_t               *adapter;
2859         rsm_memseg_id_t         segment_id = 0;
2860         int                     loopback_flag = 0;
2861         int                     create_flags = 0;
2862         rsm_resource_callback_t callback_flag;
2863         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2864 
2865         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2866 
2867         if (seg->s_adapter == &loopback_adapter)
2868                 loopback_flag = 1;
2869 
2870         if (seg->s_pid != ddi_get_pid() &&
2871             ddi_get_pid() != 0) {
2872                 DBG_PRINTF((category, RSM_ERR,
2873                     "rsm_publish: Not creator\n"));
2874                 return (RSMERR_NOT_CREATOR);
2875         }
2876 
2877         /*
2878          * Get per node access list
2879          */
2880         e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2881         if (e != DDI_SUCCESS) {
2882                 DBG_PRINTF((category, RSM_ERR,
2883                     "rsm_publish done: rsmacl_build failed\n"));
2884                 return (e);
2885         }
2886 
2887         /*
2888          * The application provided msg->key is used for resolving a
2889          * segment id according to the following:
2890          *    key = 0                   Kernel Agent selects the segment id
2891          *    key <= RSM_DLPI_ID_END Reserved for system usage except
2892          *                              RSMLIB range
2893          *    key < RSM_USER_APP_ID_BASE segment id = key
2894          *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2895          *
2896          * rsm_nextavail_segmentid is initialized to 0x80000000 and
2897          * overflows to zero after 0x80000000 allocations.
2898          * An algorithm is needed which allows reinitialization and provides
2899          * for reallocation after overflow.  For now, ENOMEM is returned
2900          * once the overflow condition has occurred.
2901          */
2902         if (msg->key == 0) {
2903                 mutex_enter(&rsm_lock);
2904                 segment_id = rsm_nextavail_segmentid;
2905                 if (segment_id != 0) {
2906                         rsm_nextavail_segmentid++;
2907                         mutex_exit(&rsm_lock);
2908                 } else {
2909                         mutex_exit(&rsm_lock);
2910                         DBG_PRINTF((category, RSM_ERR,
2911                             "rsm_publish done: no more keys avlbl\n"));
2912                         return (RSMERR_INSUFFICIENT_RESOURCES);
2913                 }
2914         } else  if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2915                 /* range reserved for internal use by base/ndi libraries */
2916                 segment_id = msg->key;
2917         else    if (msg->key <= RSM_DLPI_ID_END)
2918                 return (RSMERR_RESERVED_SEGID);
2919         else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2920                 segment_id = msg->key;
2921         else {
2922                 DBG_PRINTF((category, RSM_ERR,
2923                     "rsm_publish done: invalid key %u\n", msg->key));
2924                 return (RSMERR_RESERVED_SEGID);
2925         }
2926 
2927         /* Add key to exportlist; The segment lock is held on success */
2928         e = rsmexport_add(seg, segment_id);
2929         if (e) {
2930                 rsmacl_free(acl, acl_len);
2931                 DBG_PRINTF((category, RSM_ERR,
2932                     "rsm_publish done: export_add failed: %d\n", e));
2933                 return (e);
2934         }
2935 
2936         seg->s_segid = segment_id;
2937 
2938         if ((seg->s_state != RSM_STATE_BIND) &&
2939             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2940                 /* state changed since then, free acl and return */
2941                 rsmseglock_release(seg);
2942                 rsmexport_rm(seg);
2943                 rsmacl_free(acl, acl_len);
2944                 DBG_PRINTF((category, RSM_ERR,
2945                     "rsm_publish done: segment in wrong state: %d\n",
2946                     seg->s_state));
2947                 return (RSMERR_BAD_SEG_HNDL);
2948         }
2949 
2950         /*
2951          * If this is for a local memory handle and permissions are zero,
2952          * then the surrogate segment is very large and we want to skip
2953          * allocation of DVMA space.
2954          *
2955          * Careful!  If the user didn't use an ACL list, acl will be a NULL
2956          * pointer.  Check that before dereferencing it.
2957          */
2958         if (acl != (rsmapi_access_entry_t *)NULL) {
2959                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2960                         goto skipdriver;
2961         }
2962 
2963         /* create segment  */
2964         xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2965             sdev, 0, NULL, DDI_UMEM_SLEEP);
2966         ASSERT(xbuf != NULL);
2967 
2968         mem.ms_type = RSM_MEM_BUF;
2969         mem.ms_bp = xbuf;
2970 
2971         /* This call includes a bind operations */
2972 
2973         adapter = seg->s_adapter;
2974         /*
2975          * create a acl list with hwaddr for RSMPI publish
2976          */
2977         e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2978 
2979         if (e != RSM_SUCCESS) {
2980                 rsmseglock_release(seg);
2981                 rsmexport_rm(seg);
2982                 rsmacl_free(acl, acl_len);
2983                 freerbuf(xbuf);
2984                 DBG_PRINTF((category, RSM_ERR,
2985                     "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2986                 return (e);
2987         }
2988 
2989         if (seg->s_state == RSM_STATE_BIND) {
2990                 /* create segment  */
2991 
2992                 /* This call includes a bind operations */
2993 
2994                 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2995                         create_flags = RSM_ALLOW_UNBIND_REBIND;
2996                 }
2997 
2998                 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2999                         callback_flag  = RSM_RESOURCE_DONTWAIT;
3000                 } else {
3001                         callback_flag  = RSM_RESOURCE_SLEEP;
3002                 }
3003 
3004                 e = adapter->rsmpi_ops->rsm_seg_create(
3005                     adapter->rsmpi_handle,
3006                     &seg->s_handle.out, seg->s_len,
3007                     create_flags, &mem,
3008                     callback_flag, NULL);
3009                 /*
3010                  * At present there is no dependency on the existence of xbuf.
3011                  * So we can free it here. If in the future this changes, it can
3012                  * be freed sometime during the segment destroy.
3013                  */
3014                 freerbuf(xbuf);
3015 
3016                 if (e != RSM_SUCCESS) {
3017                         rsmseglock_release(seg);
3018                         rsmexport_rm(seg);
3019                         rsmacl_free(acl, acl_len);
3020                         rsmpiacl_free(rsmpi_acl, acl_len);
3021                         DBG_PRINTF((category, RSM_ERR,
3022                             "rsm_publish done: export_create failed: %d\n", e));
3023                         /*
3024                          * The following assertion ensures that the two errors
3025                          * related to the length and its alignment do not occur
3026                          * since they have been checked during export_create
3027                          */
3028                         ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3029                             e != RSMERR_BAD_LENGTH);
3030                         if (e == RSMERR_NOT_MEM)
3031                                 e = RSMERR_INSUFFICIENT_MEM;
3032 
3033                         return (e);
3034                 }
3035                 /* export segment, this should create an IMMU mapping */
3036                 e = adapter->rsmpi_ops->rsm_publish(
3037                     seg->s_handle.out,
3038                     rsmpi_acl, acl_len,
3039                     seg->s_segid,
3040                     RSM_RESOURCE_DONTWAIT, NULL);
3041 
3042                 if (e != RSM_SUCCESS) {
3043                         adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3044                         rsmseglock_release(seg);
3045                         rsmexport_rm(seg);
3046                         rsmacl_free(acl, acl_len);
3047                         rsmpiacl_free(rsmpi_acl, acl_len);
3048                         DBG_PRINTF((category, RSM_ERR,
3049                             "rsm_publish done: export_publish failed: %d\n",
3050                             e));
3051                         return (e);
3052                 }
3053         }
3054 
3055         seg->s_acl_in = rsmpi_acl;
3056 
3057 skipdriver:
3058         /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3059         seg->s_acl_len       = acl_len;
3060         seg->s_acl   = acl;
3061 
3062         if (seg->s_state == RSM_STATE_BIND) {
3063                 seg->s_state = RSM_STATE_EXPORT;
3064         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3065                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3066                 cv_broadcast(&seg->s_cv);
3067         }
3068 
3069         rsmseglock_release(seg);
3070 
3071         /*
3072          * If the segment id was solicited, then return it in
3073          * the original incoming message.
3074          */
3075         if (msg->key == 0) {
3076                 msg->key = segment_id;
3077 #ifdef _MULTI_DATAMODEL
3078                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3079                         rsm_ioctlmsg32_t msg32;
3080 
3081                         msg32.key = msg->key;
3082                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3083                             "rsm_publish done\n"));
3084                         return (ddi_copyout((caddr_t)&msg32,
3085                             (caddr_t)dataptr, sizeof (msg32), mode));
3086                 }
3087 #endif
3088                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3089                     "rsm_publish done\n"));
3090                 return (ddi_copyout((caddr_t)msg,
3091                     (caddr_t)dataptr, sizeof (*msg), mode));
3092         }
3093 
3094         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3095         return (DDI_SUCCESS);
3096 }
3097 
3098 /*
3099  * This function modifies the access control list of an already published
3100  * segment.  There is no effect on import segments which are already
3101  * connected.
3102  */
3103 static int
3104 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3105 {
3106         rsmapi_access_entry_t   *new_acl, *old_acl, *tmp_acl;
3107         rsm_access_entry_t      *rsmpi_new_acl, *rsmpi_old_acl;
3108         int                     new_acl_len, old_acl_len, tmp_acl_len;
3109         int                     e, i;
3110         adapter_t               *adapter;
3111         int                     loopback_flag = 0;
3112         rsm_memseg_id_t         key;
3113         rsm_permission_t        permission;
3114         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3115 
3116         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3117 
3118         if ((seg->s_state != RSM_STATE_EXPORT) &&
3119             (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3120             (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3121                 return (RSMERR_SEG_NOT_PUBLISHED);
3122 
3123         if (seg->s_pid != ddi_get_pid() &&
3124             ddi_get_pid() != 0) {
3125                 DBG_PRINTF((category, RSM_ERR,
3126                     "rsm_republish: Not owner\n"));
3127                 return (RSMERR_NOT_CREATOR);
3128         }
3129 
3130         if (seg->s_adapter == &loopback_adapter)
3131                 loopback_flag = 1;
3132 
3133         /*
3134          * Build new list first
3135          */
3136         e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3137         if (e) {
3138                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3139                     "rsm_republish done: rsmacl_build failed %d", e));
3140                 return (e);
3141         }
3142 
3143         /* Lock segment */
3144         rsmseglock_acquire(seg);
3145         /*
3146          * a republish is in progress - REPUBLISH message is being
3147          * sent to the importers so wait for it to complete OR
3148          * wait till DR completes
3149          */
3150         while (((seg->s_state == RSM_STATE_EXPORT) &&
3151             (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3152             (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3153             (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3154                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3155                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3156                             "rsm_republish done: cv_wait  INTERRUPTED"));
3157                         rsmseglock_release(seg);
3158                         rsmacl_free(new_acl, new_acl_len);
3159                         return (RSMERR_INTERRUPTED);
3160                 }
3161         }
3162 
3163         /* recheck if state is valid */
3164         if (seg->s_state != RSM_STATE_EXPORT) {
3165                 rsmseglock_release(seg);
3166                 rsmacl_free(new_acl, new_acl_len);
3167                 return (RSMERR_SEG_NOT_PUBLISHED);
3168         }
3169 
3170         key = seg->s_key;
3171         old_acl = seg->s_acl;
3172         old_acl_len = seg->s_acl_len;
3173 
3174         seg->s_acl = new_acl;
3175         seg->s_acl_len = new_acl_len;
3176 
3177         /*
3178          * This call will only be meaningful if and when the interconnect
3179          * layer makes use of the access list
3180          */
3181         adapter = seg->s_adapter;
3182         /*
3183          * create a acl list with hwaddr for RSMPI publish
3184          */
3185         e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3186 
3187         if (e != RSM_SUCCESS) {
3188                 seg->s_acl = old_acl;
3189                 seg->s_acl_len = old_acl_len;
3190                 rsmseglock_release(seg);
3191                 rsmacl_free(new_acl, new_acl_len);
3192                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3193                     "rsm_republish done: rsmpiacl_create failed %d", e));
3194                 return (e);
3195         }
3196         rsmpi_old_acl = seg->s_acl_in;
3197         seg->s_acl_in = rsmpi_new_acl;
3198 
3199         e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3200             seg->s_acl_in, seg->s_acl_len,
3201             RSM_RESOURCE_DONTWAIT, NULL);
3202 
3203         if (e != RSM_SUCCESS) {
3204                 seg->s_acl = old_acl;
3205                 seg->s_acl_in = rsmpi_old_acl;
3206                 seg->s_acl_len = old_acl_len;
3207                 rsmseglock_release(seg);
3208                 rsmacl_free(new_acl, new_acl_len);
3209                 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3210 
3211                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3212                     "rsm_republish done: rsmpi republish failed %d\n", e));
3213                 return (e);
3214         }
3215 
3216         /* create a tmp copy of the new acl */
3217         tmp_acl_len = new_acl_len;
3218         if (tmp_acl_len > 0) {
3219                 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3220                 for (i = 0; i < tmp_acl_len; i++) {
3221                         tmp_acl[i].ae_node = new_acl[i].ae_node;
3222                         tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3223                 }
3224                 /*
3225                  * The default permission of a node which was in the old
3226                  * ACL but not in the new ACL is 0 ie no access.
3227                  */
3228                 permission = 0;
3229         } else {
3230                 /*
3231                  * NULL acl means all importers can connect and
3232                  * default permission will be owner creation umask
3233                  */
3234                 tmp_acl = NULL;
3235                 permission = seg->s_mode;
3236         }
3237 
3238         /* make other republishers to wait for republish to complete */
3239         seg->s_flags |= RSM_REPUBLISH_WAIT;
3240 
3241         rsmseglock_release(seg);
3242 
3243         /* send the new perms to the importing nodes */
3244         rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3245 
3246         rsmseglock_acquire(seg);
3247         seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3248         /* wake up any one waiting for republish to complete */
3249         cv_broadcast(&seg->s_cv);
3250         rsmseglock_release(seg);
3251 
3252         rsmacl_free(tmp_acl, tmp_acl_len);
3253         rsmacl_free(old_acl, old_acl_len);
3254         rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3255 
3256         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3257         return (DDI_SUCCESS);
3258 }
3259 
3260 static int
3261 rsm_unpublish(rsmseg_t *seg, int mode)
3262 {
3263         rsmapi_access_entry_t   *acl;
3264         rsm_access_entry_t      *rsmpi_acl;
3265         int                     acl_len;
3266         int                     e;
3267         adapter_t *adapter;
3268         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3269 
3270         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3271 
3272         if (seg->s_pid != ddi_get_pid() &&
3273             ddi_get_pid() != 0) {
3274                 DBG_PRINTF((category, RSM_ERR,
3275                     "rsm_unpublish: Not creator\n"));
3276                 return (RSMERR_NOT_CREATOR);
3277         }
3278 
3279         rsmseglock_acquire(seg);
3280         /*
3281          * wait for QUIESCING to complete here before rsmexport_rm
3282          * is called because the SUSPEND_COMPLETE mesg which changes
3283          * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3284          * signals the cv_wait needs to find it in the hashtable.
3285          */
3286         while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3287             ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3288                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3289                         rsmseglock_release(seg);
3290                         DBG_PRINTF((category, RSM_ERR,
3291                             "rsm_unpublish done: cv_wait INTR qscing"
3292                             "getv/putv in progress"));
3293                         return (RSMERR_INTERRUPTED);
3294                 }
3295         }
3296 
3297         /* verify segment state */
3298         if ((seg->s_state != RSM_STATE_EXPORT) &&
3299             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3300                 rsmseglock_release(seg);
3301                 DBG_PRINTF((category, RSM_ERR,
3302                     "rsm_unpublish done: bad state %x\n", seg->s_state));
3303                 return (RSMERR_SEG_NOT_PUBLISHED);
3304         }
3305 
3306         rsmseglock_release(seg);
3307 
3308         rsmexport_rm(seg);
3309 
3310         rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3311 
3312         rsmseglock_acquire(seg);
3313         /*
3314          * wait for republish to complete
3315          */
3316         while ((seg->s_state == RSM_STATE_EXPORT) &&
3317             (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3318                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3319                         DBG_PRINTF((category, RSM_ERR,
3320                             "rsm_unpublish done: cv_wait INTR repubing"));
3321                         rsmseglock_release(seg);
3322                         return (RSMERR_INTERRUPTED);
3323                 }
3324         }
3325 
3326         if ((seg->s_state != RSM_STATE_EXPORT) &&
3327             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3328                 DBG_PRINTF((category, RSM_ERR,
3329                     "rsm_unpublish done: invalid state"));
3330                 rsmseglock_release(seg);
3331                 return (RSMERR_SEG_NOT_PUBLISHED);
3332         }
3333 
3334         /*
3335          * check for putv/get surrogate segment which was not published
3336          * to the driver.
3337          *
3338          * Be certain to see if there is an ACL first!  If this segment was
3339          * not published with an ACL, acl will be a null pointer.  Check
3340          * that before dereferencing it.
3341          */
3342         acl = seg->s_acl;
3343         if (acl != (rsmapi_access_entry_t *)NULL) {
3344                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3345                         goto bypass;
3346         }
3347 
3348         /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3349         if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3350                 goto bypass;
3351 
3352         adapter = seg->s_adapter;
3353         for (;;) {
3354                 if (seg->s_state != RSM_STATE_EXPORT) {
3355                         rsmseglock_release(seg);
3356                         DBG_PRINTF((category, RSM_ERR,
3357                             "rsm_unpublish done: bad state %x\n",
3358                             seg->s_state));
3359                         return (RSMERR_SEG_NOT_PUBLISHED);
3360                 }
3361 
3362                 /* unpublish from adapter */
3363                 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3364 
3365                 if (e == RSM_SUCCESS) {
3366                         break;
3367                 }
3368 
3369                 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3370                         /*
3371                          * wait for unpublish to succeed, it's busy.
3372                          */
3373                         seg->s_flags |= RSM_EXPORT_WAIT;
3374 
3375                         /* wait for a max of 1 ms - this is an empirical */
3376                         /* value that was found by some minimal testing  */
3377                         /* can be fine tuned when we have better numbers */
3378                         /* A long term fix would be to send cv_signal    */
3379                         /* from the intr callback routine                */
3380                         /* currently nobody signals this wait            */
3381                         (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3382                             drv_usectohz(1000), TR_CLOCK_TICK);
3383 
3384                         DBG_PRINTF((category, RSM_ERR,
3385                             "rsm_unpublish: SEG_IN_USE\n"));
3386 
3387                         seg->s_flags &= ~RSM_EXPORT_WAIT;
3388                 } else {
3389                         if (mode == 1) {
3390                                 DBG_PRINTF((category, RSM_ERR,
3391                                     "rsm:rsmpi unpublish err %x\n", e));
3392                                 seg->s_state = RSM_STATE_BIND;
3393                         }
3394                         rsmseglock_release(seg);
3395                         return (e);
3396                 }
3397         }
3398 
3399         /* Free segment */
3400         e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3401 
3402         if (e != RSM_SUCCESS) {
3403                 DBG_PRINTF((category, RSM_ERR,
3404                     "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3405                     seg->s_key, e));
3406         }
3407 
3408 bypass:
3409         acl = seg->s_acl;
3410         rsmpi_acl = seg->s_acl_in;
3411         acl_len = seg->s_acl_len;
3412 
3413         seg->s_acl = NULL;
3414         seg->s_acl_in = NULL;
3415         seg->s_acl_len = 0;
3416 
3417         if (seg->s_state == RSM_STATE_EXPORT) {
3418                 seg->s_state = RSM_STATE_BIND;
3419         } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3420                 seg->s_state = RSM_STATE_BIND_QUIESCED;
3421                 cv_broadcast(&seg->s_cv);
3422         }
3423 
3424         rsmseglock_release(seg);
3425 
3426         rsmacl_free(acl, acl_len);
3427         rsmpiacl_free(rsmpi_acl, acl_len);
3428 
3429         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3430 
3431         return (DDI_SUCCESS);
3432 }
3433 
3434 /*
3435  * Called from rsm_unpublish to force an unload and disconnection of all
3436  * importers of the unpublished segment.
3437  *
3438  * First build the list of segments requiring a force disconnect, then
3439  * send a request for each.
3440  */
3441 static void
3442 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3443     rsm_node_id_t ex_nodeid)
3444 {
3445         rsmipc_request_t        request;
3446         importing_token_t       *prev_token, *token, *tmp_token, *tokp;
3447         importing_token_t       *force_disconnect_list = NULL;
3448         int                     index;
3449 
3450         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3451             "rsm_send_importer_disconnects enter\n"));
3452 
3453         index = rsmhash(ex_segid);
3454 
3455         mutex_enter(&importer_list.lock);
3456 
3457         prev_token = NULL;
3458         token = importer_list.bucket[index];
3459 
3460         while (token != NULL) {
3461                 if (token->key == ex_segid) {
3462                         /*
3463                          * take it off the importer list and add it
3464                          * to the force disconnect list.
3465                          */
3466                         if (prev_token == NULL)
3467                                 importer_list.bucket[index] = token->next;
3468                         else
3469                                 prev_token->next = token->next;
3470                         tmp_token = token;
3471                         token = token->next;
3472                         if (force_disconnect_list == NULL) {
3473                                 force_disconnect_list = tmp_token;
3474                                 tmp_token->next = NULL;
3475                         } else {
3476                                 tokp = force_disconnect_list;
3477                                 /*
3478                                  * make sure that the tmp_token's node
3479                                  * is not already on the force disconnect
3480                                  * list.
3481                                  */
3482                                 while (tokp != NULL) {
3483                                         if (tokp->importing_node ==
3484                                             tmp_token->importing_node) {
3485                                                 break;
3486                                         }
3487                                         tokp = tokp->next;
3488                                 }
3489                                 if (tokp == NULL) {
3490                                         tmp_token->next =
3491                                             force_disconnect_list;
3492                                         force_disconnect_list = tmp_token;
3493                                 } else {
3494                                         kmem_free((void *)tmp_token,
3495                                             sizeof (*token));
3496                                 }
3497                         }
3498 
3499                 } else {
3500                         prev_token = token;
3501                         token = token->next;
3502                 }
3503         }
3504         mutex_exit(&importer_list.lock);
3505 
3506         token = force_disconnect_list;
3507         while (token != NULL) {
3508                 if (token->importing_node == my_nodeid) {
3509                         rsm_force_unload(ex_nodeid, ex_segid,
3510                             DISCONNECT);
3511                 } else {
3512                         request.rsmipc_hdr.rsmipc_type =
3513                             RSMIPC_MSG_DISCONNECT;
3514                         request.rsmipc_key = token->key;
3515                         for (;;) {
3516                                 if (rsmipc_send(token->importing_node,
3517                                     &request,
3518                                     RSM_NO_REPLY) == RSM_SUCCESS) {
3519                                         break;
3520                                 } else {
3521                                         delay(drv_usectohz(10000));
3522                                 }
3523                         }
3524                 }
3525                 tmp_token = token;
3526                 token = token->next;
3527                 kmem_free((void *)tmp_token, sizeof (*token));
3528         }
3529 
3530         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3531             "rsm_send_importer_disconnects done\n"));
3532 }
3533 
3534 /*
3535  * This function is used as a callback for unlocking the pages locked
3536  * down by a process which then does a fork or an exec.
3537  * It marks the export segments corresponding to umem cookie given by
3538  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3539  * destroyed later when an rsm_close occurs).
3540  */
3541 static void
3542 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3543 {
3544         rsmresource_blk_t *blk;
3545         rsmresource_t *p;
3546         rsmseg_t *eseg = NULL;
3547         int i, j;
3548         int found = 0;
3549 
3550         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3551             "rsm_export_force_destroy enter\n"));
3552 
3553         /*
3554          * Walk the resource list and locate the export segment (either
3555          * in the BIND or the EXPORT state) which corresponds to the
3556          * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3557          * Change the state to ZOMBIE by calling rsmseg_close with the
3558          * force_flag argument (the second argument) set to 1. Also,
3559          * unpublish and unbind the segment, but don't free it. Free it
3560          * only on a rsm_close call for the segment.
3561          */
3562         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3563 
3564         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3565                 blk = rsm_resource.rsmrc_root[i];
3566                 if (blk == NULL) {
3567                         continue;
3568                 }
3569 
3570                 for (j = 0; j < RSMRC_BLKSZ; j++) {
3571                         p = blk->rsmrcblk_blks[j];
3572                         if ((p != NULL) && (p != RSMRC_RESERVED) &&
3573                             (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3574                                 eseg = (rsmseg_t *)p;
3575                                 if (eseg->s_cookie != ck)
3576                                         continue; /* continue searching */
3577                                 /*
3578                                  * Found the segment, set flag to indicate
3579                                  * force destroy processing is in progress
3580                                  */
3581                                 rsmseglock_acquire(eseg);
3582                                 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3583                                 rsmseglock_release(eseg);
3584                                 found = 1;
3585                                 break;
3586                         }
3587                 }
3588 
3589                 if (found)
3590                         break;
3591         }
3592 
3593         rw_exit(&rsm_resource.rsmrc_lock);
3594 
3595         if (found) {
3596                 ASSERT(eseg != NULL);
3597                 /* call rsmseg_close with force flag set to 1 */
3598                 rsmseg_close(eseg, 1);
3599                 /*
3600                  * force destroy processing done, clear flag and signal any
3601                  * thread waiting in rsmseg_close.
3602                  */
3603                 rsmseglock_acquire(eseg);
3604                 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3605                 cv_broadcast(&eseg->s_cv);
3606                 rsmseglock_release(eseg);
3607         }
3608 
3609         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3610             "rsm_export_force_destroy done\n"));
3611 }
3612 
3613 /* ******************************* Remote Calls *********************** */
3614 static void
3615 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3616 {
3617         rsmipc_reply_t reply;
3618         DBG_DEFINE(category,
3619             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3620 
3621         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3622             "rsm_intr_segconnect enter\n"));
3623 
3624         reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3625 
3626         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3627         reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3628 
3629         (void) rsmipc_send(src, NULL, &reply);
3630 
3631         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3632             "rsm_intr_segconnect done\n"));
3633 }
3634 
3635 
3636 /*
3637  * When an exported segment is unpublished the exporter sends an ipc
3638  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3639  * calls this function.  The import list is scanned; segments which match the
3640  * exported segment id are unloaded and disconnected.
3641  *
3642  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3643  *
3644  */
3645 static void
3646 rsm_force_unload(rsm_node_id_t src_nodeid, rsm_memseg_id_t ex_segid,
3647     boolean_t disconnect_flag)
3648 {
3649         rsmresource_t   *p = NULL;
3650         rsmhash_table_t *rhash = &rsm_import_segs;
3651         uint_t          index;
3652         DBG_DEFINE(category,
3653             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3654 
3655         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3656 
3657         index = rsmhash(ex_segid);
3658 
3659         rw_enter(&rhash->rsmhash_rw, RW_READER);
3660 
3661         p = rsmhash_getbkt(rhash, index);
3662 
3663         for (; p; p = p->rsmrc_next) {
3664                 rsmseg_t *seg = (rsmseg_t *)p;
3665                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3666                         /*
3667                          * In order to make rsmseg_unload and rsm_force_unload
3668                          * thread safe, acquire the segment lock here.
3669                          * rsmseg_unload is responsible for releasing the lock.
3670                          * rsmseg_unload releases the lock just before a call
3671                          * to rsmipc_send or in case of an early exit which
3672                          * occurs if the segment was in the state
3673                          * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3674                          */
3675                         rsmseglock_acquire(seg);
3676                         if (disconnect_flag)
3677                                 seg->s_flags |= RSM_FORCE_DISCONNECT;
3678                         rsmseg_unload(seg);
3679                 }
3680         }
3681         rw_exit(&rhash->rsmhash_rw);
3682 
3683         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3684 }
3685 
3686 static void
3687 rsm_intr_reply(rsmipc_msghdr_t *msg)
3688 {
3689         /*
3690          * Find slot for cookie in reply.
3691          * Match sequence with sequence in cookie
3692          * If no match; return
3693          * Try to grap lock of slot, if locked return
3694          * copy data into reply slot area
3695          * signal waiter
3696          */
3697         rsmipc_slot_t   *slot;
3698         rsmipc_cookie_t *cookie;
3699         void *data = (void *) msg;
3700         size_t size = sizeof (rsmipc_reply_t);
3701         DBG_DEFINE(category,
3702             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3703 
3704         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3705 
3706         cookie = &msg->rsmipc_cookie;
3707         if (cookie->ic.index >= RSMIPC_SZ) {
3708                 DBG_PRINTF((category, RSM_ERR,
3709                     "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3710                 return;
3711         }
3712 
3713         ASSERT(cookie->ic.index < RSMIPC_SZ);
3714         slot = &rsm_ipc.slots[cookie->ic.index];
3715         mutex_enter(&slot->rsmipc_lock);
3716         if (slot->rsmipc_cookie.value == cookie->value) {
3717                 /* found a match */
3718                 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3719                         bcopy(data, slot->rsmipc_data, size);
3720                         RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3721                         cv_signal(&slot->rsmipc_cv);
3722                 }
3723         } else {
3724                 DBG_PRINTF((category, RSM_DEBUG,
3725                     "rsm: rsm_intr_reply mismatched reply %d\n",
3726                     cookie->ic.index));
3727         }
3728         mutex_exit(&slot->rsmipc_lock);
3729         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3730 }
3731 
3732 /*
3733  * This function gets dispatched on the worker thread when we receive
3734  * the SQREADY message. This function sends the SQREADY_ACK message.
3735  */
3736 static void
3737 rsm_sqready_ack_deferred(void *arg)
3738 {
3739         path_t  *path = (path_t *)arg;
3740         DBG_DEFINE(category,
3741             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3742 
3743         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3744             "rsm_sqready_ack_deferred enter\n"));
3745 
3746         mutex_enter(&path->mutex);
3747 
3748         /*
3749          * If path is not active no point in sending the ACK
3750          * because the whole SQREADY protocol will again start
3751          * when the path becomes active.
3752          */
3753         if (path->state != RSMKA_PATH_ACTIVE) {
3754                 /*
3755                  * decrement the path refcnt incremented in rsm_proc_sqready
3756                  */
3757                 PATH_RELE_NOLOCK(path);
3758                 mutex_exit(&path->mutex);
3759                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3760                     "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3761                 return;
3762         }
3763 
3764         /* send an SQREADY_ACK message */
3765         (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3766 
3767         /* initialize credits to the max level */
3768         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3769 
3770         /* wake up any send that is waiting for credits */
3771         cv_broadcast(&path->sendq_token.sendq_cv);
3772 
3773         /*
3774          * decrement the path refcnt since we incremented it in
3775          * rsm_proc_sqready
3776          */
3777         PATH_RELE_NOLOCK(path);
3778 
3779         mutex_exit(&path->mutex);
3780 
3781         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3782             "rsm_sqready_ack_deferred done\n"));
3783 }
3784 
3785 /*
3786  * Process the SQREADY message
3787  */
3788 static void
3789 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3790     rsm_intr_hand_arg_t arg)
3791 {
3792         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3793         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3794         path_t                  *path;
3795         DBG_DEFINE(category,
3796             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3797 
3798         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3799 
3800         /* look up the path - incr the path refcnt */
3801         path = rsm_find_path(hdlr_argp->adapter_name,
3802             hdlr_argp->adapter_instance, src_hwaddr);
3803 
3804         /*
3805          * No path exists or path is not active - drop the message
3806          */
3807         if (path == NULL) {
3808                 DBG_PRINTF((category, RSM_DEBUG,
3809                     "rsm_proc_sqready done: msg dropped no path\n"));
3810                 return;
3811         }
3812 
3813         mutex_exit(&path->mutex);
3814 
3815         /* drain any tasks from the previous incarnation */
3816         taskq_wait(path->recv_taskq);
3817 
3818         mutex_enter(&path->mutex);
3819         /*
3820          * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3821          * in the meanwhile we received an SQREADY message, blindly reset
3822          * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3823          * and forget about the SQREADY that we sent.
3824          */
3825         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3826 
3827         if (path->state != RSMKA_PATH_ACTIVE) {
3828                 /* decr refcnt and drop the mutex */
3829                 PATH_RELE_NOLOCK(path);
3830                 mutex_exit(&path->mutex);
3831                 DBG_PRINTF((category, RSM_DEBUG,
3832                     "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3833                 return;
3834         }
3835 
3836         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3837             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3838 
3839         /*
3840          * The sender's local incarnation number is our remote incarnation
3841          * number save it in the path data structure
3842          */
3843         path->remote_incn = msg->rsmipc_local_incn;
3844         path->sendq_token.msgbuf_avail = 0;
3845         path->procmsg_cnt = 0;
3846 
3847         /*
3848          * path is active - dispatch task to send SQREADY_ACK - remember
3849          * RSMPI calls can't be done in interrupt context
3850          *
3851          * We can use the recv_taskq to send because the remote endpoint
3852          * cannot start sending messages till it receives SQREADY_ACK hence
3853          * at this point there are no tasks on recv_taskq.
3854          *
3855          * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3856          */
3857         (void) taskq_dispatch(path->recv_taskq,
3858             rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3859 
3860         mutex_exit(&path->mutex);
3861 
3862 
3863         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3864 }
3865 
3866 /*
3867  * Process the SQREADY_ACK message
3868  */
3869 static void
3870 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3871     rsm_intr_hand_arg_t arg)
3872 {
3873         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3874         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3875         path_t                  *path;
3876         DBG_DEFINE(category,
3877             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3878 
3879         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3880             "rsm_proc_sqready_ack enter\n"));
3881 
3882         /* look up the path - incr the path refcnt */
3883         path = rsm_find_path(hdlr_argp->adapter_name,
3884             hdlr_argp->adapter_instance, src_hwaddr);
3885 
3886         /*
3887          * drop the message if - no path exists or path is not active
3888          * or if its not waiting for SQREADY_ACK message
3889          */
3890         if (path == NULL) {
3891                 DBG_PRINTF((category, RSM_DEBUG,
3892                     "rsm_proc_sqready_ack done: msg dropped no path\n"));
3893                 return;
3894         }
3895 
3896         if ((path->state != RSMKA_PATH_ACTIVE) ||
3897             !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3898                 /* decrement the refcnt */
3899                 PATH_RELE_NOLOCK(path);
3900                 mutex_exit(&path->mutex);
3901                 DBG_PRINTF((category, RSM_DEBUG,
3902                     "rsm_proc_sqready_ack done: msg dropped\n"));
3903                 return;
3904         }
3905 
3906         /*
3907          * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3908          * sent, if not drop it.
3909          */
3910         if (path->local_incn != msghdr->rsmipc_incn) {
3911                 /* decrement the refcnt */
3912                 PATH_RELE_NOLOCK(path);
3913                 mutex_exit(&path->mutex);
3914                 DBG_PRINTF((category, RSM_DEBUG,
3915                     "rsm_proc_sqready_ack done: msg old incn %lld\n",
3916                     msghdr->rsmipc_incn));
3917                 return;
3918         }
3919 
3920         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3921             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3922 
3923         /*
3924          * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3925          */
3926         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3927 
3928         /* save the remote sendq incn number */
3929         path->remote_incn = msg->rsmipc_local_incn;
3930 
3931         /* initialize credits to the max level */
3932         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3933 
3934         /* wake up any send that is waiting for credits */
3935         cv_broadcast(&path->sendq_token.sendq_cv);
3936 
3937         /* decrement the refcnt */
3938         PATH_RELE_NOLOCK(path);
3939 
3940         mutex_exit(&path->mutex);
3941 
3942         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3943             "rsm_proc_sqready_ack done\n"));
3944 }
3945 
3946 /*
3947  * process the RSMIPC_MSG_CREDIT message
3948  */
3949 static void
3950 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3951     rsm_intr_hand_arg_t arg)
3952 {
3953         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3954         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3955         path_t                  *path;
3956         DBG_DEFINE(category,
3957             RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3958             RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3959 
3960         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3961 
3962         /* look up the path - incr the path refcnt */
3963         path = rsm_find_path(hdlr_argp->adapter_name,
3964             hdlr_argp->adapter_instance, src_hwaddr);
3965 
3966         if (path == NULL) {
3967                 DBG_PRINTF((category, RSM_DEBUG,
3968                     "rsm_add_credits enter: path not found\n"));
3969                 return;
3970         }
3971 
3972         /* the path is not active - discard credits */
3973         if (path->state != RSMKA_PATH_ACTIVE) {
3974                 PATH_RELE_NOLOCK(path);
3975                 mutex_exit(&path->mutex);
3976                 DBG_PRINTF((category, RSM_DEBUG,
3977                     "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3978                 return;
3979         }
3980 
3981         /*
3982          * Check if these credits are for current incarnation of the path.
3983          */
3984         if (path->local_incn != msghdr->rsmipc_incn) {
3985                 /* decrement the refcnt */
3986                 PATH_RELE_NOLOCK(path);
3987                 mutex_exit(&path->mutex);
3988                 DBG_PRINTF((category, RSM_DEBUG,
3989                     "rsm_add_credits enter: old incn %lld\n",
3990                     msghdr->rsmipc_incn));
3991                 return;
3992         }
3993 
3994         DBG_PRINTF((category, RSM_DEBUG,
3995             "rsm_add_credits:path=%lx new-creds=%d "
3996             "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3997             path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
3998             src_hwaddr));
3999 
4000 
4001         /* add credits to the path's sendq */
4002         path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4003 
4004         ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4005 
4006         /* wake up any send that is waiting for credits */
4007         cv_broadcast(&path->sendq_token.sendq_cv);
4008 
4009         /* decrement the refcnt */
4010         PATH_RELE_NOLOCK(path);
4011 
4012         mutex_exit(&path->mutex);
4013 
4014         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4015 }
4016 
4017 static void
4018 rsm_intr_event(rsmipc_request_t *msg)
4019 {
4020         rsmseg_t        *seg;
4021         rsmresource_t   *p;
4022         rsm_node_id_t   src_node;
4023         DBG_DEFINE(category,
4024             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4025 
4026         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4027 
4028         src_node = msg->rsmipc_hdr.rsmipc_src;
4029 
4030         if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4031                 /* This is for an import segment */
4032                 uint_t hashval = rsmhash(msg->rsmipc_key);
4033 
4034                 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4035 
4036                 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4037 
4038                 for (; p; p = p->rsmrc_next) {
4039                         if ((p->rsmrc_key == msg->rsmipc_key) &&
4040                             (p->rsmrc_node == src_node)) {
4041                                 seg = (rsmseg_t *)p;
4042                                 rsmseglock_acquire(seg);
4043 
4044                                 atomic_inc_32(&seg->s_pollevent);
4045 
4046                                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4047                                         pollwakeup(&seg->s_poll, POLLRDNORM);
4048 
4049                                 rsmseglock_release(seg);
4050                         }
4051                 }
4052 
4053                 rw_exit(&rsm_import_segs.rsmhash_rw);
4054         } else {
4055                 /* This is for an export segment */
4056                 seg = rsmexport_lookup(msg->rsmipc_key);
4057                 if (!seg) {
4058                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4059                             "rsm_intr_event done: exp seg not found\n"));
4060                         return;
4061                 }
4062 
4063                 ASSERT(rsmseglock_held(seg));
4064 
4065                 atomic_inc_32(&seg->s_pollevent);
4066 
4067                 /*
4068                  * We must hold the segment lock here, or else the segment
4069                  * can be freed while pollwakeup is using it. This implies
4070                  * that we MUST NOT grab the segment lock during rsm_chpoll,
4071                  * as outlined in the chpoll(2) man page.
4072                  */
4073                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4074                         pollwakeup(&seg->s_poll, POLLRDNORM);
4075 
4076                 rsmseglock_release(seg);
4077         }
4078 
4079         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4080 }
4081 
4082 /*
4083  * The exporter did a republish and changed the ACL - this change is only
4084  * visible to new importers.
4085  */
4086 static void
4087 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4088     rsm_permission_t perm)
4089 {
4090 
4091         rsmresource_t   *p;
4092         rsmseg_t        *seg;
4093         uint_t          hashval = rsmhash(key);
4094         DBG_DEFINE(category,
4095             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4096 
4097         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4098 
4099         rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4100 
4101         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4102 
4103         for (; p; p = p->rsmrc_next) {
4104                 /*
4105                  * find the importer and update the permission in the shared
4106                  * data structure. Any new importers will use the new perms
4107                  */
4108                 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4109                         seg = (rsmseg_t *)p;
4110 
4111                         rsmseglock_acquire(seg);
4112                         rsmsharelock_acquire(seg);
4113                         seg->s_share->rsmsi_mode = perm;
4114                         rsmsharelock_release(seg);
4115                         rsmseglock_release(seg);
4116 
4117                         break;
4118                 }
4119         }
4120 
4121         rw_exit(&rsm_import_segs.rsmhash_rw);
4122 
4123         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4124 }
4125 
4126 void
4127 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4128 {
4129         int             done = 1; /* indicate all SUSPENDS have been acked */
4130         list_element_t  *elem;
4131         DBG_DEFINE(category,
4132             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4133 
4134         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4135             "rsm_suspend_complete enter\n"));
4136 
4137         mutex_enter(&rsm_suspend_list.list_lock);
4138 
4139         if (rsm_suspend_list.list_head == NULL) {
4140                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4141                     "rsm_suspend_complete done: suspend_list is empty\n"));
4142                 mutex_exit(&rsm_suspend_list.list_lock);
4143                 return;
4144         }
4145 
4146         elem = rsm_suspend_list.list_head;
4147         while (elem != NULL) {
4148                 if (elem->nodeid == src_node) {
4149                         /* clear the pending flag for the node */
4150                         elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4151                         elem->flags |= flag;
4152                 }
4153 
4154                 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4155                         done = 0; /* still some nodes have not yet ACKED */
4156 
4157                 elem = elem->next;
4158         }
4159 
4160         mutex_exit(&rsm_suspend_list.list_lock);
4161 
4162         if (!done) {
4163                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4164                     "rsm_suspend_complete done: acks pending\n"));
4165                 return;
4166         }
4167         /*
4168          * Now that we are done with suspending all the remote importers
4169          * time to quiesce the local exporters
4170          */
4171         exporter_quiesce();
4172 
4173         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4174             "rsm_suspend_complete done\n"));
4175 }
4176 
4177 static void
4178 exporter_quiesce()
4179 {
4180         int             i, e;
4181         rsmresource_t   *current;
4182         rsmseg_t        *seg;
4183         adapter_t       *adapter;
4184         DBG_DEFINE(category,
4185             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4186 
4187         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4188         /*
4189          * The importers send a SUSPEND_COMPLETE to the exporter node
4190          *      Unpublish, unbind the export segment and
4191          *      move the segments to the EXPORT_QUIESCED state
4192          */
4193 
4194         rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4195 
4196         for (i = 0; i < rsm_hash_size; i++) {
4197                 current = rsm_export_segs.bucket[i];
4198                 while (current != NULL) {
4199                         seg = (rsmseg_t *)current;
4200                         rsmseglock_acquire(seg);
4201                         if (current->rsmrc_state ==
4202                             RSM_STATE_EXPORT_QUIESCING) {
4203                                 adapter = seg->s_adapter;
4204                                 /*
4205                                  * some local memory handles are not published
4206                                  * check if it was published
4207                                  */
4208                                 if ((seg->s_acl == NULL) ||
4209                                     (seg->s_acl[0].ae_node != my_nodeid) ||
4210                                     (seg->s_acl[0].ae_permission != 0)) {
4211 
4212                                         e = adapter->rsmpi_ops->rsm_unpublish(
4213                                             seg->s_handle.out);
4214                                         DBG_PRINTF((category, RSM_DEBUG,
4215                                             "exporter_quiesce:unpub %d\n", e));
4216 
4217                                         e = adapter->rsmpi_ops->rsm_seg_destroy(
4218                                             seg->s_handle.out);
4219 
4220                                         DBG_PRINTF((category, RSM_DEBUG,
4221                                             "exporter_quiesce:destroy %d\n",
4222                                             e));
4223                                 }
4224 
4225                                 (void) rsm_unbind_pages(seg);
4226                                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4227                                 cv_broadcast(&seg->s_cv);
4228                         }
4229                         rsmseglock_release(seg);
4230                         current = current->rsmrc_next;
4231                 }
4232         }
4233         rw_exit(&rsm_export_segs.rsmhash_rw);
4234 
4235         /*
4236          * All the local segments we are done with the pre-del processing
4237          * - time to move to PREDEL_COMPLETED.
4238          */
4239 
4240         mutex_enter(&rsm_drv_data.drv_lock);
4241 
4242         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4243 
4244         rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4245 
4246         cv_broadcast(&rsm_drv_data.drv_cv);
4247 
4248         mutex_exit(&rsm_drv_data.drv_lock);
4249 
4250         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4251 }
4252 
4253 static void
4254 importer_suspend(rsm_node_id_t src_node)
4255 {
4256         int             i;
4257         int             susp_flg; /* true means already suspended */
4258         int             num_importers;
4259         rsmresource_t   *p = NULL, *curp;
4260         rsmhash_table_t *rhash = &rsm_import_segs;
4261         rsmseg_t        *seg;
4262         rsmipc_request_t request;
4263         DBG_DEFINE(category,
4264             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4265 
4266         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4267 
4268         rw_enter(&rhash->rsmhash_rw, RW_READER);
4269         for (i = 0; i < rsm_hash_size; i++) {
4270                 p = rhash->bucket[i];
4271 
4272                 /*
4273                  * Suspend all importers with same <node, key> pair.
4274                  * After the last one of the shared importers has been
4275                  * suspended - suspend the shared mappings/connection.
4276                  */
4277                 for (; p; p = p->rsmrc_next) {
4278                         rsmseg_t *first = (rsmseg_t *)p;
4279                         if ((first->s_node != src_node) ||
4280                             (first->s_state == RSM_STATE_DISCONNECT))
4281                                 continue; /* go to next entry */
4282                         /*
4283                          * search the rest of the bucket for
4284                          * other siblings (imprtrs with the same key)
4285                          * of "first" and suspend them.
4286                          * All importers with same key fall in
4287                          * the same bucket.
4288                          */
4289                         num_importers = 0;
4290                         for (curp = p; curp; curp = curp->rsmrc_next) {
4291                                 seg = (rsmseg_t *)curp;
4292 
4293                                 rsmseglock_acquire(seg);
4294 
4295                                 if ((seg->s_node != first->s_node) ||
4296                                     (seg->s_key != first->s_key) ||
4297                                     (seg->s_state == RSM_STATE_DISCONNECT)) {
4298                                         /*
4299                                          * either not a peer segment or its a
4300                                          * disconnected segment - skip it
4301                                          */
4302                                         rsmseglock_release(seg);
4303                                         continue;
4304                                 }
4305 
4306                                 rsmseg_suspend(seg, &susp_flg);
4307 
4308                                 if (susp_flg) { /* seg already suspended */
4309                                         rsmseglock_release(seg);
4310                                         break; /* the inner for loop */
4311                                 }
4312 
4313                                 num_importers++;
4314                                 rsmsharelock_acquire(seg);
4315                                 /*
4316                                  * we've processed all importers that are
4317                                  * siblings of "first"
4318                                  */
4319                                 if (num_importers ==
4320                                     seg->s_share->rsmsi_refcnt) {
4321                                         rsmsharelock_release(seg);
4322                                         rsmseglock_release(seg);
4323                                         break;
4324                                 }
4325                                 rsmsharelock_release(seg);
4326                                 rsmseglock_release(seg);
4327                         }
4328 
4329                         /*
4330                          * All the importers with the same key and
4331                          * nodeid as "first" have been suspended.
4332                          * Now suspend the shared connect/mapping.
4333                          * This is done only once.
4334                          */
4335                         if (!susp_flg) {
4336                                 rsmsegshare_suspend(seg);
4337                         }
4338                 }
4339         }
4340 
4341         rw_exit(&rhash->rsmhash_rw);
4342 
4343         /* send an ACK for SUSPEND message */
4344         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4345         (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4346 
4347 
4348         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4349 
4350 }
4351 
4352 static void
4353 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4354 {
4355         int             recheck_state;
4356         rsmcookie_t     *hdl;
4357         DBG_DEFINE(category,
4358             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4359 
4360         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4361             "rsmseg_suspend enter: key=%u\n", seg->s_key));
4362 
4363         *susp_flg = 0;
4364 
4365         ASSERT(rsmseglock_held(seg));
4366         /* wait if putv/getv is in progress */
4367         while (seg->s_rdmacnt > 0)
4368                 cv_wait(&seg->s_cv, &seg->s_lock);
4369 
4370         do {
4371                 recheck_state = 0;
4372 
4373                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4374                     "rsmseg_suspend:segment %x state=%d\n",
4375                     seg->s_key, seg->s_state));
4376 
4377                 switch (seg->s_state) {
4378                 case RSM_STATE_NEW:
4379                         /* not a valid state */
4380                         break;
4381                 case RSM_STATE_CONNECTING:
4382                         seg->s_state = RSM_STATE_ABORT_CONNECT;
4383                         break;
4384                 case RSM_STATE_ABORT_CONNECT:
4385                         break;
4386                 case RSM_STATE_CONNECT:
4387                         seg->s_handle.in = NULL;
4388                         seg->s_state = RSM_STATE_CONN_QUIESCE;
4389                         break;
4390                 case RSM_STATE_MAPPING:
4391                         /* wait until segment leaves the mapping state */
4392                         while (seg->s_state == RSM_STATE_MAPPING)
4393                                 cv_wait(&seg->s_cv, &seg->s_lock);
4394                         recheck_state = 1;
4395                         break;
4396                 case RSM_STATE_ACTIVE:
4397                         /* unload the mappings */
4398                         if (seg->s_ckl != NULL) {
4399                                 hdl = seg->s_ckl;
4400                                 for (; hdl != NULL; hdl = hdl->c_next) {
4401                                         (void) devmap_unload(hdl->c_dhp,
4402                                             hdl->c_off, hdl->c_len);
4403                                 }
4404                         }
4405                         seg->s_mapinfo = NULL;
4406                         seg->s_state = RSM_STATE_MAP_QUIESCE;
4407                         break;
4408                 case RSM_STATE_CONN_QUIESCE:
4409                         /* FALLTHRU */
4410                 case RSM_STATE_MAP_QUIESCE:
4411                         /* rsmseg_suspend already done for seg */
4412                         *susp_flg = 1;
4413                         break;
4414                 case RSM_STATE_DISCONNECT:
4415                         break;
4416                 default:
4417                         ASSERT(0); /* invalid state */
4418                 }
4419         } while (recheck_state);
4420 
4421         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4422 }
4423 
4424 static void
4425 rsmsegshare_suspend(rsmseg_t *seg)
4426 {
4427         int                     e;
4428         adapter_t               *adapter;
4429         rsm_import_share_t      *sharedp;
4430         DBG_DEFINE(category,
4431             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4432 
4433         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4434             "rsmsegshare_suspend enter\n"));
4435 
4436         rsmseglock_acquire(seg);
4437         rsmsharelock_acquire(seg);
4438 
4439         sharedp = seg->s_share;
4440         adapter = seg->s_adapter;
4441         switch (sharedp->rsmsi_state) {
4442         case RSMSI_STATE_NEW:
4443                 break;
4444         case RSMSI_STATE_CONNECTING:
4445                 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4446                 break;
4447         case RSMSI_STATE_ABORT_CONNECT:
4448                 break;
4449         case RSMSI_STATE_CONNECTED:
4450                 /* do the rsmpi disconnect */
4451                 if (sharedp->rsmsi_node != my_nodeid) {
4452                         e = adapter->rsmpi_ops->
4453                             rsm_disconnect(sharedp->rsmsi_handle);
4454 
4455                         DBG_PRINTF((category, RSM_DEBUG,
4456                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4457                             sharedp->rsmsi_segid, e));
4458                 }
4459 
4460                 sharedp->rsmsi_handle = NULL;
4461 
4462                 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4463                 break;
4464         case RSMSI_STATE_CONN_QUIESCE:
4465                 break;
4466         case RSMSI_STATE_MAPPED:
4467                 /* do the rsmpi unmap and disconnect */
4468                 if (sharedp->rsmsi_node != my_nodeid) {
4469                         e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4470 
4471                         DBG_PRINTF((category, RSM_DEBUG,
4472                             "rsmshare_suspend: rsmpi unmap %d\n", e));
4473 
4474                         e = adapter->rsmpi_ops->
4475                             rsm_disconnect(sharedp->rsmsi_handle);
4476                         DBG_PRINTF((category, RSM_DEBUG,
4477                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4478                             sharedp->rsmsi_segid, e));
4479                 }
4480 
4481                 sharedp->rsmsi_handle = NULL;
4482 
4483                 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4484                 break;
4485         case RSMSI_STATE_MAP_QUIESCE:
4486                 break;
4487         case RSMSI_STATE_DISCONNECTED:
4488                 break;
4489         default:
4490                 ASSERT(0); /* invalid state */
4491         }
4492 
4493         rsmsharelock_release(seg);
4494         rsmseglock_release(seg);
4495 
4496         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4497             "rsmsegshare_suspend done\n"));
4498 }
4499 
4500 /*
4501  * This should get called on receiving a RESUME message or from
4502  * the pathmanger if the node undergoing DR dies.
4503  */
4504 static void
4505 importer_resume(rsm_node_id_t src_node)
4506 {
4507         int             i;
4508         rsmresource_t   *p = NULL;
4509         rsmhash_table_t *rhash = &rsm_import_segs;
4510         void            *cookie;
4511         DBG_DEFINE(category,
4512             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4513 
4514         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4515 
4516         rw_enter(&rhash->rsmhash_rw, RW_READER);
4517 
4518         for (i = 0; i < rsm_hash_size; i++) {
4519                 p = rhash->bucket[i];
4520 
4521                 for (; p; p = p->rsmrc_next) {
4522                         rsmseg_t *seg = (rsmseg_t *)p;
4523 
4524                         rsmseglock_acquire(seg);
4525 
4526                         /* process only importers of node undergoing DR */
4527                         if (seg->s_node != src_node) {
4528                                 rsmseglock_release(seg);
4529                                 continue;
4530                         }
4531 
4532                         if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4533                                 rsmipc_request_t        request;
4534                                 /*
4535                                  * rsmpi map/connect failed
4536                                  * inform the exporter so that it can
4537                                  * remove the importer.
4538                                  */
4539                                 request.rsmipc_hdr.rsmipc_type =
4540                                     RSMIPC_MSG_NOTIMPORTING;
4541                                 request.rsmipc_key = seg->s_segid;
4542                                 request.rsmipc_segment_cookie = cookie;
4543                                 rsmseglock_release(seg);
4544                                 (void) rsmipc_send(seg->s_node, &request,
4545                                     RSM_NO_REPLY);
4546                         } else {
4547                                 rsmseglock_release(seg);
4548                         }
4549                 }
4550         }
4551 
4552         rw_exit(&rhash->rsmhash_rw);
4553 
4554         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4555 }
4556 
4557 static int
4558 rsmseg_resume(rsmseg_t *seg, void **cookie)
4559 {
4560         int                     e;
4561         int                     retc;
4562         off_t                   dev_offset;
4563         size_t                  maplen;
4564         uint_t                  maxprot;
4565         rsm_mapinfo_t           *p;
4566         rsmcookie_t             *hdl;
4567         rsm_import_share_t      *sharedp;
4568         DBG_DEFINE(category,
4569             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4570 
4571         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4572             "rsmseg_resume enter: key=%u\n", seg->s_key));
4573 
4574         *cookie = NULL;
4575 
4576         ASSERT(rsmseglock_held(seg));
4577 
4578         if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4579             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4580                 return (RSM_SUCCESS);
4581         }
4582 
4583         sharedp = seg->s_share;
4584 
4585         rsmsharelock_acquire(seg);
4586 
4587         /* resume the shared connection and/or mapping */
4588         retc = rsmsegshare_resume(seg);
4589 
4590         if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4591                 /* shared state can either be connected or mapped */
4592                 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4593                     (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4594                         ASSERT(retc == RSM_SUCCESS);
4595                         seg->s_handle.in = sharedp->rsmsi_handle;
4596                         rsmsharelock_release(seg);
4597                         seg->s_state = RSM_STATE_CONNECT;
4598 
4599                 } else { /* error in rsmpi connect during resume */
4600                         seg->s_handle.in = NULL;
4601                         seg->s_state = RSM_STATE_DISCONNECT;
4602 
4603                         sharedp->rsmsi_refcnt--;
4604                         cookie = (void *)sharedp->rsmsi_cookie;
4605 
4606                         if (sharedp->rsmsi_refcnt == 0) {
4607                                 ASSERT(sharedp->rsmsi_mapcnt == 0);
4608                                 rsmsharelock_release(seg);
4609 
4610                                 /* clean up the shared data structure */
4611                                 mutex_destroy(&sharedp->rsmsi_lock);
4612                                 cv_destroy(&sharedp->rsmsi_cv);
4613                                 kmem_free((void *)(sharedp),
4614                                     sizeof (rsm_import_share_t));
4615 
4616                         } else {
4617                                 rsmsharelock_release(seg);
4618                         }
4619                         /*
4620                          * The following needs to be done after any
4621                          * rsmsharelock calls which use seg->s_share.
4622                          */
4623                         seg->s_share = NULL;
4624                 }
4625 
4626                 /* signal any waiting segment */
4627                 cv_broadcast(&seg->s_cv);
4628 
4629                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4630                     "rsmseg_resume done:state=%d\n", seg->s_state));
4631                 return (retc);
4632         }
4633 
4634         ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4635 
4636         /* Setup protections for remap */
4637         maxprot = PROT_USER;
4638         if (seg->s_mode & RSM_PERM_READ) {
4639                 maxprot |= PROT_READ;
4640         }
4641         if (seg->s_mode & RSM_PERM_WRITE) {
4642                 maxprot |= PROT_WRITE;
4643         }
4644 
4645         if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4646                 /* error in rsmpi connect or map during resume */
4647 
4648                 /* remap to trash page */
4649                 ASSERT(seg->s_ckl != NULL);
4650 
4651                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4652                         e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4653                             remap_cookie, hdl->c_off, hdl->c_len,
4654                             maxprot, 0, NULL);
4655 
4656                         DBG_PRINTF((category, RSM_ERR,
4657                             "rsmseg_resume:remap=%d\n", e));
4658                 }
4659 
4660                 seg->s_handle.in = NULL;
4661                 seg->s_state = RSM_STATE_DISCONNECT;
4662 
4663                 sharedp->rsmsi_refcnt--;
4664 
4665                 sharedp->rsmsi_mapcnt--;
4666                 seg->s_mapinfo = NULL;
4667 
4668                 if (sharedp->rsmsi_refcnt == 0) {
4669                         ASSERT(sharedp->rsmsi_mapcnt == 0);
4670                         rsmsharelock_release(seg);
4671 
4672                         /* clean up the shared data structure */
4673                         mutex_destroy(&sharedp->rsmsi_lock);
4674                         cv_destroy(&sharedp->rsmsi_cv);
4675                         kmem_free((void *)(sharedp),
4676                             sizeof (rsm_import_share_t));
4677 
4678                 } else {
4679                         rsmsharelock_release(seg);
4680                 }
4681                 /*
4682                  * The following needs to be done after any
4683                  * rsmsharelock calls which use seg->s_share.
4684                  */
4685                 seg->s_share = NULL;
4686 
4687                 /* signal any waiting segment */
4688                 cv_broadcast(&seg->s_cv);
4689 
4690                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4691                     "rsmseg_resume done:seg=%x,err=%d\n",
4692                     seg->s_key, retc));
4693                 return (retc);
4694 
4695         }
4696 
4697         seg->s_handle.in = sharedp->rsmsi_handle;
4698 
4699         if (seg->s_node == my_nodeid) { /* loopback */
4700                 ASSERT(seg->s_mapinfo == NULL);
4701 
4702                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4703                         e = devmap_umem_remap(hdl->c_dhp,
4704                             rsm_dip, seg->s_cookie,
4705                             hdl->c_off, hdl->c_len,
4706                             maxprot, 0, NULL);
4707 
4708                         DBG_PRINTF((category, RSM_ERR,
4709                             "rsmseg_resume:remap=%d\n", e));
4710                 }
4711         } else { /* remote exporter */
4712                 /* remap to the new rsmpi maps */
4713                 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4714 
4715                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4716                         p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4717                             &dev_offset, &maplen);
4718                         e = devmap_devmem_remap(hdl->c_dhp,
4719                             p->dip, p->dev_register, dev_offset,
4720                             maplen, maxprot, 0, NULL);
4721 
4722                         DBG_PRINTF((category, RSM_ERR,
4723                             "rsmseg_resume:remap=%d\n", e));
4724                 }
4725         }
4726 
4727         rsmsharelock_release(seg);
4728 
4729         seg->s_state = RSM_STATE_ACTIVE;
4730         cv_broadcast(&seg->s_cv);
4731 
4732         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4733 
4734         return (retc);
4735 }
4736 
4737 static int
4738 rsmsegshare_resume(rsmseg_t *seg)
4739 {
4740         int                     e = RSM_SUCCESS;
4741         adapter_t               *adapter;
4742         rsm_import_share_t      *sharedp;
4743         DBG_DEFINE(category,
4744             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4745 
4746         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4747 
4748         ASSERT(rsmseglock_held(seg));
4749         ASSERT(rsmsharelock_held(seg));
4750 
4751         sharedp = seg->s_share;
4752 
4753         /*
4754          * If we are not in a xxxx_QUIESCE state that means shared
4755          * connect/mapping processing has been already been done
4756          * so return success.
4757          */
4758         if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4759             (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4760                 return (RSM_SUCCESS);
4761         }
4762 
4763         adapter = seg->s_adapter;
4764 
4765         if (sharedp->rsmsi_node != my_nodeid) {
4766                 rsm_addr_t      hwaddr;
4767                 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4768 
4769                 e = adapter->rsmpi_ops->rsm_connect(
4770                     adapter->rsmpi_handle, hwaddr,
4771                     sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4772 
4773                 DBG_PRINTF((category, RSM_DEBUG,
4774                     "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4775                     sharedp->rsmsi_segid, e));
4776 
4777                 if (e != RSM_SUCCESS) {
4778                         /* when do we send the NOT_IMPORTING message */
4779                         sharedp->rsmsi_handle = NULL;
4780                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4781                         /* signal any waiting segment */
4782                         cv_broadcast(&sharedp->rsmsi_cv);
4783                         return (e);
4784                 }
4785         }
4786 
4787         if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4788                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4789                 /* signal any waiting segment */
4790                 cv_broadcast(&sharedp->rsmsi_cv);
4791                 return (e);
4792         }
4793 
4794         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4795 
4796         /* do the rsmpi map of the whole segment here */
4797         if (sharedp->rsmsi_node != my_nodeid) {
4798                 size_t mapped_len;
4799                 rsm_mapinfo_t *p;
4800 
4801                 /*
4802                  * We need to do rsmpi maps with <off, lens> identical to
4803                  * the old mapinfo list because the segment mapping handles
4804                  * dhp and such need the fragmentation of rsmpi maps to be
4805                  * identical to what it was during the mmap of the segment
4806                  */
4807                 p = sharedp->rsmsi_mapinfo;
4808 
4809                 while (p != NULL) {
4810                         mapped_len = 0;
4811 
4812                         e = adapter->rsmpi_ops->rsm_map(
4813                             sharedp->rsmsi_handle, p->start_offset,
4814                             p->individual_len, &mapped_len,
4815                             &p->dip, &p->dev_register, &p->dev_offset,
4816                             NULL, NULL);
4817 
4818                         if (e != 0) {
4819                                 DBG_PRINTF((category, RSM_ERR,
4820                                     "rsmsegshare_resume: rsmpi map err=%d\n",
4821                                     e));
4822                                 break;
4823                         }
4824 
4825                         if (mapped_len != p->individual_len) {
4826                                 DBG_PRINTF((category, RSM_ERR,
4827                                     "rsmsegshare_resume: rsmpi maplen"
4828                                     "< reqlen=%lx\n", mapped_len));
4829                                 e = RSMERR_BAD_LENGTH;
4830                                 break;
4831                         }
4832 
4833                         p = p->next;
4834 
4835                 }
4836 
4837 
4838                 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4839                         int     err;
4840                         /* Check if this is the first rsm_map */
4841                         if (p != sharedp->rsmsi_mapinfo) {
4842                                 /*
4843                                  * A single rsm_unmap undoes multiple rsm_maps.
4844                                  */
4845                                 (void) seg->s_adapter->rsmpi_ops->
4846                                     rsm_unmap(sharedp->rsmsi_handle);
4847                         }
4848 
4849                         rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4850                         sharedp->rsmsi_mapinfo = NULL;
4851 
4852                         err = adapter->rsmpi_ops->
4853                             rsm_disconnect(sharedp->rsmsi_handle);
4854 
4855                         DBG_PRINTF((category, RSM_DEBUG,
4856                             "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4857                             sharedp->rsmsi_segid, err));
4858 
4859                         sharedp->rsmsi_handle = NULL;
4860                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4861 
4862                         /* signal the waiting segments */
4863                         cv_broadcast(&sharedp->rsmsi_cv);
4864                         DBG_PRINTF((category, RSM_DEBUG,
4865                             "rsmsegshare_resume done: rsmpi map err\n"));
4866                         return (e);
4867                 }
4868         }
4869 
4870         sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4871 
4872         /* signal any waiting segment */
4873         cv_broadcast(&sharedp->rsmsi_cv);
4874 
4875         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4876 
4877         return (e);
4878 }
4879 
4880 /*
4881  * this is the routine that gets called by recv_taskq which is the
4882  * thread that processes messages that are flow-controlled.
4883  */
4884 static void
4885 rsm_intr_proc_deferred(void *arg)
4886 {
4887         path_t                  *path = (path_t *)arg;
4888         rsmipc_request_t        *msg;
4889         rsmipc_msghdr_t         *msghdr;
4890         rsm_node_id_t           src_node;
4891         msgbuf_elem_t           *head;
4892         int                     e;
4893         DBG_DEFINE(category,
4894             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4895 
4896         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4897             "rsm_intr_proc_deferred enter\n"));
4898 
4899         mutex_enter(&path->mutex);
4900 
4901         /* use the head of the msgbuf_queue */
4902         head = rsmka_gethead_msgbuf(path);
4903 
4904         mutex_exit(&path->mutex);
4905 
4906         msg = (rsmipc_request_t *)&(head->msg);
4907         msghdr = (rsmipc_msghdr_t *)msg;
4908 
4909         src_node = msghdr->rsmipc_src;
4910 
4911         /*
4912          * messages that need to send a reply should check the message version
4913          * before processing the message. And all messages that need to
4914          * send a reply should be processed here by the worker thread.
4915          */
4916         switch (msghdr->rsmipc_type) {
4917         case RSMIPC_MSG_SEGCONNECT:
4918                 if (msghdr->rsmipc_version != RSM_VERSION) {
4919                         rsmipc_reply_t reply;
4920                         reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4921                         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4922                         reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4923                         (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4924                 } else {
4925                         rsm_intr_segconnect(src_node, msg);
4926                 }
4927                 break;
4928         case RSMIPC_MSG_DISCONNECT:
4929                 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4930                 break;
4931         case RSMIPC_MSG_SUSPEND:
4932                 importer_suspend(src_node);
4933                 break;
4934         case RSMIPC_MSG_SUSPEND_DONE:
4935                 rsm_suspend_complete(src_node, 0);
4936                 break;
4937         case RSMIPC_MSG_RESUME:
4938                 importer_resume(src_node);
4939                 break;
4940         default:
4941                 ASSERT(0);
4942         }
4943 
4944         mutex_enter(&path->mutex);
4945 
4946         rsmka_dequeue_msgbuf(path);
4947 
4948         /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4949         if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4950                 path->procmsg_cnt++;
4951 
4952         ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4953 
4954         /* No need to send credits if path is going down */
4955         if ((path->state == RSMKA_PATH_ACTIVE) &&
4956             (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4957                 /*
4958                  * send credits and reset procmsg_cnt if success otherwise
4959                  * credits will be sent after processing the next message
4960                  */
4961                 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4962                 if (e == 0)
4963                         path->procmsg_cnt = 0;
4964                 else
4965                         DBG_PRINTF((category, RSM_ERR,
4966                             "rsm_intr_proc_deferred:send credits err=%d\n", e));
4967         }
4968 
4969         /*
4970          * decrement the path refcnt since we incremented it in
4971          * rsm_intr_callback_dispatch
4972          */
4973         PATH_RELE_NOLOCK(path);
4974 
4975         mutex_exit(&path->mutex);
4976 
4977         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4978             "rsm_intr_proc_deferred done\n"));
4979 }
4980 
4981 /*
4982  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4983  */
4984 static void
4985 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4986     rsm_intr_hand_arg_t arg)
4987 {
4988         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
4989         path_t                  *path;
4990         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4991         DBG_DEFINE(category,
4992             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4993 
4994         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4995             "rsm_intr_callback_dispatch enter\n"));
4996         ASSERT(data && hdlr_argp);
4997 
4998         /* look up the path - incr the path refcnt */
4999         path = rsm_find_path(hdlr_argp->adapter_name,
5000             hdlr_argp->adapter_instance, src_hwaddr);
5001 
5002         /* the path has been removed - drop this message */
5003         if (path == NULL) {
5004                 DBG_PRINTF((category, RSM_DEBUG,
5005                     "rsm_intr_callback_dispatch done: msg dropped\n"));
5006                 return;
5007         }
5008         /* the path is not active - don't accept new messages */
5009         if (path->state != RSMKA_PATH_ACTIVE) {
5010                 PATH_RELE_NOLOCK(path);
5011                 mutex_exit(&path->mutex);
5012                 DBG_PRINTF((category, RSM_DEBUG,
5013                     "rsm_intr_callback_dispatch done: msg dropped"
5014                     " path=%lx !ACTIVE\n", path));
5015                 return;
5016         }
5017 
5018         /*
5019          * Check if this message was sent to an older incarnation
5020          * of the path/sendq.
5021          */
5022         if (path->local_incn != msghdr->rsmipc_incn) {
5023                 /* decrement the refcnt */
5024                 PATH_RELE_NOLOCK(path);
5025                 mutex_exit(&path->mutex);
5026                 DBG_PRINTF((category, RSM_DEBUG,
5027                     "rsm_intr_callback_dispatch done: old incn %lld\n",
5028                     msghdr->rsmipc_incn));
5029                 return;
5030         }
5031 
5032         /* copy and enqueue msg on the path's msgbuf queue */
5033         rsmka_enqueue_msgbuf(path, data);
5034 
5035         /*
5036          * schedule task to process messages - ignore retval from
5037          * task_dispatch because we sender cannot send more than
5038          * what receiver can handle.
5039          */
5040         (void) taskq_dispatch(path->recv_taskq,
5041             rsm_intr_proc_deferred, path, KM_NOSLEEP);
5042 
5043         mutex_exit(&path->mutex);
5044 
5045         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5046             "rsm_intr_callback_dispatch done\n"));
5047 }
5048 
5049 /*
5050  * This procedure is called from rsm_srv_func when a remote node creates a
5051  * a send queue.  This event is used as a hint that an  earlier failed
5052  * attempt to create a send queue to that remote node may now succeed and
5053  * should be retried.  Indication of an earlier failed attempt is provided
5054  * by the RSMKA_SQCREATE_PENDING flag.
5055  */
5056 static void
5057 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5058 {
5059         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
5060         path_t                  *path;
5061         DBG_DEFINE(category,
5062             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5063 
5064         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5065             "rsm_sqcreateop_callback enter\n"));
5066 
5067         /* look up the path - incr the path refcnt */
5068         path = rsm_find_path(hdlr_argp->adapter_name,
5069             hdlr_argp->adapter_instance, src_hwaddr);
5070 
5071         if (path == NULL) {
5072                 DBG_PRINTF((category, RSM_DEBUG,
5073                     "rsm_sqcreateop_callback done: no path\n"));
5074                 return;
5075         }
5076 
5077         if ((path->state == RSMKA_PATH_UP) &&
5078             (path->flags & RSMKA_SQCREATE_PENDING)) {
5079                 /*
5080                  * previous attempt to create sendq had failed, retry
5081                  * it and move to RSMKA_PATH_ACTIVE state if successful.
5082                  * the refcnt will be decremented in the do_deferred_work
5083                  */
5084                 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5085         } else {
5086                 /* decrement the refcnt */
5087                 PATH_RELE_NOLOCK(path);
5088         }
5089         mutex_exit(&path->mutex);
5090 
5091         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5092             "rsm_sqcreateop_callback done\n"));
5093 }
5094 
5095 static void
5096 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5097 {
5098         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5099         rsmipc_request_t *msg = (rsmipc_request_t *)data;
5100         rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5101         rsm_node_id_t src_node;
5102         DBG_DEFINE(category,
5103             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5104 
5105         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5106             "src=%d, type=%d\n", msghdr->rsmipc_src,
5107             msghdr->rsmipc_type));
5108 
5109         /*
5110          * Check for the version number in the msg header. If it is not
5111          * RSM_VERSION, drop the message. In the future, we need to manage
5112          * incompatible version numbers in some way
5113          */
5114         if (msghdr->rsmipc_version != RSM_VERSION) {
5115                 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5116                 /*
5117                  * Drop requests that don't have a reply right here
5118                  * Request with reply will send a BAD_VERSION reply
5119                  * when they get processed by the worker thread.
5120                  */
5121                 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5122                         return;
5123                 }
5124 
5125         }
5126 
5127         src_node = msghdr->rsmipc_src;
5128 
5129         switch (msghdr->rsmipc_type) {
5130         case RSMIPC_MSG_SEGCONNECT:
5131         case RSMIPC_MSG_DISCONNECT:
5132         case RSMIPC_MSG_SUSPEND:
5133         case RSMIPC_MSG_SUSPEND_DONE:
5134         case RSMIPC_MSG_RESUME:
5135                 /*
5136                  * These message types are handled by a worker thread using
5137                  * the flow-control algorithm.
5138                  * Any message processing that does one or more of the
5139                  * following should be handled in a worker thread.
5140                  *      - allocates resources and might sleep
5141                  *      - makes RSMPI calls down to the interconnect driver
5142                  *      this by defn include requests with reply.
5143                  *      - takes a long duration of time
5144                  */
5145                 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5146                 break;
5147         case RSMIPC_MSG_NOTIMPORTING:
5148                 importer_list_rm(src_node, msg->rsmipc_key,
5149                     msg->rsmipc_segment_cookie);
5150                 break;
5151         case RSMIPC_MSG_SQREADY:
5152                 rsm_proc_sqready(data, src_hwaddr, arg);
5153                 break;
5154         case RSMIPC_MSG_SQREADY_ACK:
5155                 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5156                 break;
5157         case RSMIPC_MSG_CREDIT:
5158                 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5159                 break;
5160         case RSMIPC_MSG_REPLY:
5161                 rsm_intr_reply(msghdr);
5162                 break;
5163         case RSMIPC_MSG_BELL:
5164                 rsm_intr_event(msg);
5165                 break;
5166         case RSMIPC_MSG_IMPORTING:
5167                 importer_list_add(src_node, msg->rsmipc_key,
5168                     msg->rsmipc_adapter_hwaddr,
5169                     msg->rsmipc_segment_cookie);
5170                 break;
5171         case RSMIPC_MSG_REPUBLISH:
5172                 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5173                 break;
5174         default:
5175                 DBG_PRINTF((category, RSM_DEBUG,
5176                     "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5177                     (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5178         }
5179 
5180         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5181 
5182 }
5183 
5184 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5185     rsm_intr_q_op_t opcode, rsm_addr_t src,
5186     void *data, size_t size, rsm_intr_hand_arg_t arg)
5187 {
5188         DBG_DEFINE(category,
5189             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5190 
5191         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5192 
5193         switch (opcode) {
5194         case RSM_INTR_Q_OP_CREATE:
5195                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5196                 rsm_sqcreateop_callback(src, arg);
5197                 break;
5198         case RSM_INTR_Q_OP_DESTROY:
5199                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5200                 break;
5201         case RSM_INTR_Q_OP_RECEIVE:
5202                 rsm_intr_callback(data, src, arg);
5203                 break;
5204         default:
5205                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5206                     "rsm_srv_func: unknown opcode = %x\n", opcode));
5207         }
5208 
5209         chd = chd;
5210         size = size;
5211 
5212         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5213 
5214         return (RSM_INTR_HAND_CLAIMED);
5215 }
5216 
5217 /* *************************** IPC slots ************************* */
5218 static rsmipc_slot_t *
5219 rsmipc_alloc()
5220 {
5221         int i;
5222         rsmipc_slot_t *slot;
5223         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5224 
5225         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5226 
5227         /* try to find a free slot, if not wait */
5228         mutex_enter(&rsm_ipc.lock);
5229 
5230         while (rsm_ipc.count == 0) {
5231                 rsm_ipc.wanted = 1;
5232                 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5233         }
5234 
5235         /* An empty slot is available, find it */
5236         slot = &rsm_ipc.slots[0];
5237         for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5238                 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5239                         RSMIPC_CLEAR(slot, RSMIPC_FREE);
5240                         break;
5241                 }
5242         }
5243 
5244         ASSERT(i < RSMIPC_SZ);
5245         rsm_ipc.count--;        /* one less is available */
5246         rsm_ipc.sequence++; /* new sequence */
5247 
5248         slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5249         slot->rsmipc_cookie.ic.index = (uint_t)i;
5250 
5251         mutex_exit(&rsm_ipc.lock);
5252 
5253         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5254 
5255         return (slot);
5256 }
5257 
5258 static void
5259 rsmipc_free(rsmipc_slot_t *slot)
5260 {
5261         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5262 
5263         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5264 
5265         ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5266         ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5267 
5268         mutex_enter(&rsm_ipc.lock);
5269 
5270         RSMIPC_SET(slot, RSMIPC_FREE);
5271 
5272         slot->rsmipc_cookie.ic.sequence = 0;
5273 
5274         mutex_exit(&slot->rsmipc_lock);
5275         rsm_ipc.count++;
5276         ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5277         if (rsm_ipc.wanted) {
5278                 rsm_ipc.wanted = 0;
5279                 cv_broadcast(&rsm_ipc.cv);
5280         }
5281 
5282         mutex_exit(&rsm_ipc.lock);
5283 
5284         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5285 }
5286 
5287 static int
5288 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5289 {
5290         int             e = 0;
5291         int             credit_check = 0;
5292         int             retry_cnt = 0;
5293         int             min_retry_cnt = 10;
5294         rsm_send_t      is;
5295         rsmipc_slot_t   *rslot;
5296         adapter_t       *adapter;
5297         path_t          *path;
5298         sendq_token_t   *sendq_token;
5299         sendq_token_t   *used_sendq_token = NULL;
5300         rsm_send_q_handle_t     ipc_handle;
5301         DBG_DEFINE(category,
5302             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5303 
5304         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5305             dest));
5306 
5307         /*
5308          * Check if this is a local case
5309          */
5310         if (dest == my_nodeid) {
5311                 switch (req->rsmipc_hdr.rsmipc_type) {
5312                 case RSMIPC_MSG_SEGCONNECT:
5313                         reply->rsmipc_status = (short)rsmsegacl_validate(
5314                             req, dest, reply);
5315                         break;
5316                 case RSMIPC_MSG_BELL:
5317                         req->rsmipc_hdr.rsmipc_src = dest;
5318                         rsm_intr_event(req);
5319                         break;
5320                 case RSMIPC_MSG_IMPORTING:
5321                         importer_list_add(dest, req->rsmipc_key,
5322                             req->rsmipc_adapter_hwaddr,
5323                             req->rsmipc_segment_cookie);
5324                         break;
5325                 case RSMIPC_MSG_NOTIMPORTING:
5326                         importer_list_rm(dest, req->rsmipc_key,
5327                             req->rsmipc_segment_cookie);
5328                         break;
5329                 case RSMIPC_MSG_REPUBLISH:
5330                         importer_update(dest, req->rsmipc_key,
5331                             req->rsmipc_perm);
5332                         break;
5333                 case RSMIPC_MSG_SUSPEND:
5334                         importer_suspend(dest);
5335                         break;
5336                 case RSMIPC_MSG_SUSPEND_DONE:
5337                         rsm_suspend_complete(dest, 0);
5338                         break;
5339                 case RSMIPC_MSG_RESUME:
5340                         importer_resume(dest);
5341                         break;
5342                 default:
5343                         ASSERT(0);
5344                 }
5345                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5346                     "rsmipc_send done\n"));
5347                 return (0);
5348         }
5349 
5350         if (dest >= MAX_NODES) {
5351                 DBG_PRINTF((category, RSM_ERR,
5352                     "rsm: rsmipc_send bad node number %x\n", dest));
5353                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5354         }
5355 
5356         /*
5357          * Oh boy! we are going remote.
5358          */
5359 
5360         /*
5361          * identify if we need to have credits to send this message
5362          * - only selected requests are flow controlled
5363          */
5364         if (req != NULL) {
5365                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5366                     "rsmipc_send:request type=%d\n",
5367                     req->rsmipc_hdr.rsmipc_type));
5368 
5369                 switch (req->rsmipc_hdr.rsmipc_type) {
5370                 case RSMIPC_MSG_SEGCONNECT:
5371                 case RSMIPC_MSG_DISCONNECT:
5372                 case RSMIPC_MSG_IMPORTING:
5373                 case RSMIPC_MSG_SUSPEND:
5374                 case RSMIPC_MSG_SUSPEND_DONE:
5375                 case RSMIPC_MSG_RESUME:
5376                         credit_check = 1;
5377                         break;
5378                 default:
5379                         credit_check = 0;
5380                 }
5381         }
5382 
5383 again:
5384         if (retry_cnt++ == min_retry_cnt) {
5385                 /* backoff before further retries for 10ms */
5386                 delay(drv_usectohz(10000));
5387                 retry_cnt = 0; /* reset retry_cnt */
5388         }
5389         sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5390         if (sendq_token == NULL) {
5391                 DBG_PRINTF((category, RSM_ERR,
5392                     "rsm: rsmipc_send no device to reach node %d\n", dest));
5393                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5394         }
5395 
5396         if ((sendq_token == used_sendq_token) &&
5397             ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5398             (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5399                 rele_sendq_token(sendq_token);
5400                 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5401                 return (RSMERR_CONN_ABORTED);
5402         } else
5403                 used_sendq_token = sendq_token;
5404 
5405 /* lint -save -e413 */
5406         path = SQ_TOKEN_TO_PATH(sendq_token);
5407         adapter = path->local_adapter;
5408 /* lint -restore */
5409         ipc_handle = sendq_token->rsmpi_sendq_handle;
5410 
5411         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5412             "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5413 
5414         if (reply == NULL) {
5415                 /* Send request without ack */
5416                 /*
5417                  * Set the rsmipc_version number in the msghdr for KA
5418                  * communication versioning
5419                  */
5420                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5421                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5422                 /*
5423                  * remote endpoints incn should match the value in our
5424                  * path's remote_incn field. No need to grab any lock
5425                  * since we have refcnted the path in rsmka_get_sendq_token
5426                  */
5427                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5428 
5429                 is.is_data = (void *)req;
5430                 is.is_size = sizeof (*req);
5431                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5432                 is.is_wait = 0;
5433 
5434                 if (credit_check) {
5435                         mutex_enter(&path->mutex);
5436                         /*
5437                          * wait till we recv credits or path goes down. If path
5438                          * goes down rsm_send will fail and we handle the error
5439                          * then
5440                          */
5441                         while ((sendq_token->msgbuf_avail == 0) &&
5442                             (path->state == RSMKA_PATH_ACTIVE)) {
5443                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5444                                     &path->mutex);
5445                                 if (e == 0) {
5446                                         mutex_exit(&path->mutex);
5447                                         no_reply_cnt++;
5448                                         rele_sendq_token(sendq_token);
5449                                         DBG_PRINTF((category, RSM_DEBUG,
5450                                             "rsmipc_send done: "
5451                                             "cv_wait INTERRUPTED"));
5452                                         return (RSMERR_INTERRUPTED);
5453                                 }
5454                         }
5455 
5456                         /*
5457                          * path is not active retry on another path.
5458                          */
5459                         if (path->state != RSMKA_PATH_ACTIVE) {
5460                                 mutex_exit(&path->mutex);
5461                                 rele_sendq_token(sendq_token);
5462                                 e = RSMERR_CONN_ABORTED;
5463                                 DBG_PRINTF((category, RSM_ERR,
5464                                     "rsm: rsmipc_send: path !ACTIVE"));
5465                                 goto again;
5466                         }
5467 
5468                         ASSERT(sendq_token->msgbuf_avail > 0);
5469 
5470                         /*
5471                          * reserve a msgbuf
5472                          */
5473                         sendq_token->msgbuf_avail--;
5474 
5475                         mutex_exit(&path->mutex);
5476 
5477                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5478                             NULL);
5479 
5480                         if (e != RSM_SUCCESS) {
5481                                 mutex_enter(&path->mutex);
5482                                 /*
5483                                  * release the reserved msgbuf since
5484                                  * the send failed
5485                                  */
5486                                 sendq_token->msgbuf_avail++;
5487                                 cv_broadcast(&sendq_token->sendq_cv);
5488                                 mutex_exit(&path->mutex);
5489                         }
5490                 } else
5491                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5492                             NULL);
5493 
5494                 no_reply_cnt++;
5495                 rele_sendq_token(sendq_token);
5496                 if (e != RSM_SUCCESS) {
5497                         DBG_PRINTF((category, RSM_ERR,
5498                             "rsm: rsmipc_send no reply send"
5499                             " err = %d no reply count = %d\n",
5500                             e, no_reply_cnt));
5501                         ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5502                             e != RSMERR_BAD_BARRIER_HNDL);
5503                         atomic_inc_64(&rsm_ipcsend_errcnt);
5504                         goto again;
5505                 } else {
5506                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5507                             "rsmipc_send done\n"));
5508                         return (e);
5509                 }
5510 
5511         }
5512 
5513         if (req == NULL) {
5514                 /* Send reply - No flow control is done for reply */
5515                 /*
5516                  * Set the version in the msg header for KA communication
5517                  * versioning
5518                  */
5519                 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5520                 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5521                 /* incn number is not used for reply msgs currently */
5522                 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5523 
5524                 is.is_data = (void *)reply;
5525                 is.is_size = sizeof (*reply);
5526                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5527                 is.is_wait = 0;
5528                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5529                 rele_sendq_token(sendq_token);
5530                 if (e != RSM_SUCCESS) {
5531                         DBG_PRINTF((category, RSM_ERR,
5532                             "rsm: rsmipc_send reply send"
5533                             " err = %d\n", e));
5534                         atomic_inc_64(&rsm_ipcsend_errcnt);
5535                         goto again;
5536                 } else {
5537                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5538                             "rsmipc_send done\n"));
5539                         return (e);
5540                 }
5541         }
5542 
5543         /* Reply needed */
5544         rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5545 
5546         mutex_enter(&rslot->rsmipc_lock);
5547 
5548         rslot->rsmipc_data = (void *)reply;
5549         RSMIPC_SET(rslot, RSMIPC_PENDING);
5550 
5551         while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5552                 /*
5553                  * Set the rsmipc_version number in the msghdr for KA
5554                  * communication versioning
5555                  */
5556                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5557                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5558                 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5559                 /*
5560                  * remote endpoints incn should match the value in our
5561                  * path's remote_incn field. No need to grab any lock
5562                  * since we have refcnted the path in rsmka_get_sendq_token
5563                  */
5564                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5565 
5566                 is.is_data = (void *)req;
5567                 is.is_size = sizeof (*req);
5568                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5569                 is.is_wait = 0;
5570                 if (credit_check) {
5571 
5572                         mutex_enter(&path->mutex);
5573                         /*
5574                          * wait till we recv credits or path goes down. If path
5575                          * goes down rsm_send will fail and we handle the error
5576                          * then.
5577                          */
5578                         while ((sendq_token->msgbuf_avail == 0) &&
5579                             (path->state == RSMKA_PATH_ACTIVE)) {
5580                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5581                                     &path->mutex);
5582                                 if (e == 0) {
5583                                         mutex_exit(&path->mutex);
5584                                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5585                                         rsmipc_free(rslot);
5586                                         rele_sendq_token(sendq_token);
5587                                         DBG_PRINTF((category, RSM_DEBUG,
5588                                             "rsmipc_send done: "
5589                                             "cv_wait INTERRUPTED"));
5590                                         return (RSMERR_INTERRUPTED);
5591                                 }
5592                         }
5593 
5594                         /*
5595                          * path is not active retry on another path.
5596                          */
5597                         if (path->state != RSMKA_PATH_ACTIVE) {
5598                                 mutex_exit(&path->mutex);
5599                                 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5600                                 rsmipc_free(rslot);
5601                                 rele_sendq_token(sendq_token);
5602                                 e = RSMERR_CONN_ABORTED;
5603                                 DBG_PRINTF((category, RSM_ERR,
5604                                     "rsm: rsmipc_send: path !ACTIVE"));
5605                                 goto again;
5606                         }
5607 
5608                         ASSERT(sendq_token->msgbuf_avail > 0);
5609 
5610                         /*
5611                          * reserve a msgbuf
5612                          */
5613                         sendq_token->msgbuf_avail--;
5614 
5615                         mutex_exit(&path->mutex);
5616 
5617                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5618                             NULL);
5619 
5620                         if (e != RSM_SUCCESS) {
5621                                 mutex_enter(&path->mutex);
5622                                 /*
5623                                  * release the reserved msgbuf since
5624                                  * the send failed
5625                                  */
5626                                 sendq_token->msgbuf_avail++;
5627                                 cv_broadcast(&sendq_token->sendq_cv);
5628                                 mutex_exit(&path->mutex);
5629                         }
5630                 } else
5631                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5632                             NULL);
5633 
5634                 if (e != RSM_SUCCESS) {
5635                         DBG_PRINTF((category, RSM_ERR,
5636                             "rsm: rsmipc_send rsmpi send err = %d\n", e));
5637                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5638                         rsmipc_free(rslot);
5639                         rele_sendq_token(sendq_token);
5640                         atomic_inc_64(&rsm_ipcsend_errcnt);
5641                         goto again;
5642                 }
5643 
5644                 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5645                 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5646                     drv_usectohz(5000000), TR_CLOCK_TICK);
5647                 if (e < 0) {
5648                         /* timed out - retry */
5649                         e = RSMERR_TIMEOUT;
5650                 } else if (e == 0) {
5651                         /* signalled - return error */
5652                         e = RSMERR_INTERRUPTED;
5653                         break;
5654                 } else {
5655                         e = RSM_SUCCESS;
5656                 }
5657         }
5658 
5659         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5660         rsmipc_free(rslot);
5661         rele_sendq_token(sendq_token);
5662 
5663         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5664         return (e);
5665 }
5666 
5667 static int
5668 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5669 {
5670         rsmipc_request_t request;
5671 
5672         /*
5673          *  inform the exporter to delete this importer
5674          */
5675         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5676         request.rsmipc_key = segid;
5677         request.rsmipc_segment_cookie = cookie;
5678         return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5679 }
5680 
5681 static void
5682 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5683     int acl_len, rsm_permission_t default_permission)
5684 {
5685         int                     i;
5686         importing_token_t       *token;
5687         rsmipc_request_t        request;
5688         republish_token_t       *republish_list = NULL;
5689         republish_token_t       *rp;
5690         rsm_permission_t        permission;
5691         int                     index;
5692 
5693         /*
5694          * send the new access mode to all the nodes that have imported
5695          * this segment.
5696          * If the new acl does not have a node that was present in
5697          * the old acl a access permission of 0 is sent.
5698          */
5699 
5700         index = rsmhash(segid);
5701 
5702         /*
5703          * create a list of node/permissions to send the republish message
5704          */
5705         mutex_enter(&importer_list.lock);
5706 
5707         token = importer_list.bucket[index];
5708         while (token != NULL) {
5709                 if (segid == token->key) {
5710                         permission = default_permission;
5711 
5712                         for (i = 0; i < acl_len; i++) {
5713                                 if (token->importing_node == acl[i].ae_node) {
5714                                         permission = acl[i].ae_permission;
5715                                         break;
5716                                 }
5717                         }
5718                         rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5719 
5720                         rp->key = segid;
5721                         rp->importing_node = token->importing_node;
5722                         rp->permission = permission;
5723                         rp->next = republish_list;
5724                         republish_list = rp;
5725                 }
5726                 token = token->next;
5727         }
5728 
5729         mutex_exit(&importer_list.lock);
5730 
5731         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5732         request.rsmipc_key = segid;
5733 
5734         while (republish_list != NULL) {
5735                 request.rsmipc_perm = republish_list->permission;
5736                 (void) rsmipc_send(republish_list->importing_node,
5737                     &request, RSM_NO_REPLY);
5738                 rp = republish_list;
5739                 republish_list = republish_list->next;
5740                 kmem_free(rp, sizeof (republish_token_t));
5741         }
5742 }
5743 
5744 static void
5745 rsm_send_suspend()
5746 {
5747         int                     i, e;
5748         rsmipc_request_t        request;
5749         list_element_t          *tokp;
5750         list_element_t          *head = NULL;
5751         importing_token_t       *token;
5752         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5753             "rsm_send_suspend enter\n"));
5754 
5755         /*
5756          * create a list of node to send the suspend message
5757          *
5758          * Currently the whole importer list is scanned and we obtain
5759          * all the nodes - this basically gets all nodes that at least
5760          * import one segment from the local node.
5761          *
5762          * no need to grab the rsm_suspend_list lock here since we are
5763          * single threaded when suspend is called.
5764          */
5765 
5766         mutex_enter(&importer_list.lock);
5767         for (i = 0; i < rsm_hash_size; i++) {
5768 
5769                 token = importer_list.bucket[i];
5770 
5771                 while (token != NULL) {
5772 
5773                         tokp = head;
5774 
5775                         /*
5776                          * make sure that the token's node
5777                          * is not already on the suspend list
5778                          */
5779                         while (tokp != NULL) {
5780                                 if (tokp->nodeid == token->importing_node) {
5781                                         break;
5782                                 }
5783                                 tokp = tokp->next;
5784                         }
5785 
5786                         if (tokp == NULL) { /* not in suspend list */
5787                                 tokp = kmem_zalloc(sizeof (list_element_t),
5788                                     KM_SLEEP);
5789                                 tokp->nodeid = token->importing_node;
5790                                 tokp->next = head;
5791                                 head = tokp;
5792                         }
5793 
5794                         token = token->next;
5795                 }
5796         }
5797         mutex_exit(&importer_list.lock);
5798 
5799         if (head == NULL) { /* no importers so go ahead and quiesce segments */
5800                 exporter_quiesce();
5801                 return;
5802         }
5803 
5804         mutex_enter(&rsm_suspend_list.list_lock);
5805         ASSERT(rsm_suspend_list.list_head == NULL);
5806         /*
5807          * update the suspend list righaway so that if a node dies the
5808          * pathmanager can set the NODE dead flag
5809          */
5810         rsm_suspend_list.list_head = head;
5811         mutex_exit(&rsm_suspend_list.list_lock);
5812 
5813         tokp = head;
5814 
5815         while (tokp != NULL) {
5816                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5817                 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5818                 /*
5819                  * Error in rsmipc_send currently happens due to inaccessibility
5820                  * of the remote node.
5821                  */
5822                 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5823                         tokp->flags |= RSM_SUSPEND_ACKPENDING;
5824                 }
5825 
5826                 tokp = tokp->next;
5827         }
5828 
5829         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5830             "rsm_send_suspend done\n"));
5831 
5832 }
5833 
5834 static void
5835 rsm_send_resume()
5836 {
5837         rsmipc_request_t        request;
5838         list_element_t          *elem, *head;
5839 
5840         /*
5841          * save the suspend list so that we know where to send
5842          * the resume messages and make the suspend list head
5843          * NULL.
5844          */
5845         mutex_enter(&rsm_suspend_list.list_lock);
5846         head = rsm_suspend_list.list_head;
5847         rsm_suspend_list.list_head = NULL;
5848         mutex_exit(&rsm_suspend_list.list_lock);
5849 
5850         while (head != NULL) {
5851                 elem = head;
5852                 head = head->next;
5853 
5854                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5855 
5856                 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5857 
5858                 kmem_free((void *)elem, sizeof (list_element_t));
5859 
5860         }
5861 
5862 }
5863 
5864 /*
5865  * This function takes path and sends a message using the sendq
5866  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5867  * and RSMIPC_MSG_CREDIT are sent using this function.
5868  */
5869 int
5870 rsmipc_send_controlmsg(path_t *path, int msgtype)
5871 {
5872         int                     e;
5873         int                     retry_cnt = 0;
5874         int                     min_retry_cnt = 10;
5875         adapter_t               *adapter;
5876         rsm_send_t              is;
5877         rsm_send_q_handle_t     ipc_handle;
5878         rsmipc_controlmsg_t     msg;
5879         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5880 
5881         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5882             "rsmipc_send_controlmsg enter\n"));
5883 
5884         ASSERT(MUTEX_HELD(&path->mutex));
5885 
5886         adapter = path->local_adapter;
5887 
5888         DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5889             "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5890             my_nodeid, adapter->hwaddr, path->remote_node,
5891             path->remote_hwaddr, path->procmsg_cnt));
5892 
5893         if (path->state != RSMKA_PATH_ACTIVE) {
5894                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5895                     "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5896                 return (1);
5897         }
5898 
5899         ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5900 
5901         msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5902         msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5903         msg.rsmipc_hdr.rsmipc_type = msgtype;
5904         msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5905 
5906         if (msgtype == RSMIPC_MSG_CREDIT)
5907                 msg.rsmipc_credits = path->procmsg_cnt;
5908 
5909         msg.rsmipc_local_incn = path->local_incn;
5910 
5911         msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5912         /* incr the sendq, path refcnt */
5913         PATH_HOLD_NOLOCK(path);
5914         SENDQ_TOKEN_HOLD(path);
5915 
5916         do {
5917                 /* drop the path lock before doing the rsm_send */
5918                 mutex_exit(&path->mutex);
5919 
5920                 is.is_data = (void *)&msg;
5921                 is.is_size = sizeof (msg);
5922                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5923                 is.is_wait = 0;
5924 
5925                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5926 
5927                 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5928                     e != RSMERR_BAD_BARRIER_HNDL);
5929 
5930                 mutex_enter(&path->mutex);
5931 
5932                 if (e == RSM_SUCCESS) {
5933                         break;
5934                 }
5935                 /* error counter for statistics */
5936                 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5937 
5938                 DBG_PRINTF((category, RSM_ERR,
5939                     "rsmipc_send_controlmsg:rsm_send error=%d", e));
5940 
5941                 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5942                         (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5943                             &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5944                         retry_cnt = 0;
5945                 }
5946         } while (path->state == RSMKA_PATH_ACTIVE);
5947 
5948         /* decrement the sendq,path refcnt that we incr before rsm_send */
5949         SENDQ_TOKEN_RELE(path);
5950         PATH_RELE_NOLOCK(path);
5951 
5952         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5953             "rsmipc_send_controlmsg done=%d", e));
5954         return (e);
5955 }
5956 
5957 /*
5958  * Called from rsm_force_unload and path_importer_disconnect. The memory
5959  * mapping for the imported segment is removed and the segment is
5960  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5961  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5962  * and FALSE from rsm_rebind.
5963  *
5964  * When subsequent accesses cause page faulting, the dummy page is mapped
5965  * to resolve the fault, and the mapping generation number is incremented
5966  * so that the application can be notified on a close barrier operation.
5967  *
5968  * It is important to note that the caller of rsmseg_unload is responsible for
5969  * acquiring the segment lock before making a call to rsmseg_unload. This is
5970  * required to make the caller and rsmseg_unload thread safe. The segment lock
5971  * will be released by the rsmseg_unload function.
5972  */
5973 void
5974 rsmseg_unload(rsmseg_t *im_seg)
5975 {
5976         rsmcookie_t             *hdl;
5977         void                    *shared_cookie;
5978         rsmipc_request_t        request;
5979         uint_t                  maxprot;
5980 
5981         DBG_DEFINE(category,
5982             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5983 
5984         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5985 
5986         ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5987 
5988         /* wait until segment leaves the mapping state */
5989         while (im_seg->s_state == RSM_STATE_MAPPING)
5990                 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5991         /*
5992          * An unload is only necessary if the segment is connected. However,
5993          * if the segment was on the import list in state RSM_STATE_CONNECTING
5994          * then a connection was in progress. Change to RSM_STATE_NEW
5995          * here to cause an early exit from the connection process.
5996          */
5997         if (im_seg->s_state == RSM_STATE_NEW) {
5998                 rsmseglock_release(im_seg);
5999                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6000                     "rsmseg_unload done: RSM_STATE_NEW\n"));
6001                 return;
6002         } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6003                 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6004                 rsmsharelock_acquire(im_seg);
6005                 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6006                 rsmsharelock_release(im_seg);
6007                 rsmseglock_release(im_seg);
6008                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6009                     "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6010                 return;
6011         }
6012 
6013         if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6014                 if (im_seg->s_ckl != NULL) {
6015                         int e;
6016                         /* Setup protections for remap */
6017                         maxprot = PROT_USER;
6018                         if (im_seg->s_mode & RSM_PERM_READ) {
6019                                 maxprot |= PROT_READ;
6020                         }
6021                         if (im_seg->s_mode & RSM_PERM_WRITE) {
6022                                 maxprot |= PROT_WRITE;
6023                         }
6024                         hdl = im_seg->s_ckl;
6025                         for (; hdl != NULL; hdl = hdl->c_next) {
6026                                 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6027                                     remap_cookie,
6028                                     hdl->c_off, hdl->c_len,
6029                                     maxprot, 0, NULL);
6030 
6031                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6032                                     "remap returns %d\n", e));
6033                         }
6034                 }
6035 
6036                 (void) rsm_closeconnection(im_seg, &shared_cookie);
6037 
6038                 if (shared_cookie != NULL) {
6039                         /*
6040                          * inform the exporting node so this import
6041                          * can be deleted from the list of importers.
6042                          */
6043                         request.rsmipc_hdr.rsmipc_type =
6044                             RSMIPC_MSG_NOTIMPORTING;
6045                         request.rsmipc_key = im_seg->s_segid;
6046                         request.rsmipc_segment_cookie = shared_cookie;
6047                         rsmseglock_release(im_seg);
6048                         (void) rsmipc_send(im_seg->s_node, &request,
6049                             RSM_NO_REPLY);
6050                 } else {
6051                         rsmseglock_release(im_seg);
6052                 }
6053         }
6054         else
6055                 rsmseglock_release(im_seg);
6056 
6057         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6058 
6059 }
6060 
6061 /* ****************************** Importer Calls ************************ */
6062 
6063 static int
6064 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6065 {
6066         int shifts = 0;
6067 
6068         if (crgetuid(cr) != owner) {
6069                 shifts += 3;
6070                 if (!groupmember(group, cr))
6071                         shifts += 3;
6072         }
6073 
6074         mode &= ~(perm << shifts);
6075 
6076         if (mode == 0)
6077                 return (0);
6078 
6079         return (secpolicy_rsm_access(cr, owner, mode));
6080 }
6081 
6082 
6083 static int
6084 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6085     intptr_t dataptr, int mode)
6086 {
6087         int e;
6088         int                     recheck_state = 0;
6089         void                    *shared_cookie;
6090         rsmipc_request_t        request;
6091         rsmipc_reply_t          reply;
6092         rsm_permission_t        access;
6093         adapter_t               *adapter;
6094         rsm_addr_t              addr = 0;
6095         rsm_import_share_t      *sharedp;
6096         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6097 
6098         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6099 
6100         adapter = rsm_getadapter(msg, mode);
6101         if (adapter == NULL) {
6102                 DBG_PRINTF((category, RSM_ERR,
6103                     "rsm_connect done:ENODEV adapter=NULL\n"));
6104                 return (RSMERR_CTLR_NOT_PRESENT);
6105         }
6106 
6107         if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6108                 rsmka_release_adapter(adapter);
6109                 DBG_PRINTF((category, RSM_ERR,
6110                     "rsm_connect done:ENODEV loopback\n"));
6111                 return (RSMERR_CTLR_NOT_PRESENT);
6112         }
6113 
6114 
6115         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6116         ASSERT(seg->s_state == RSM_STATE_NEW);
6117 
6118         /*
6119          * Translate perm to access
6120          */
6121         if (msg->perm & ~RSM_PERM_RDWR) {
6122                 rsmka_release_adapter(adapter);
6123                 DBG_PRINTF((category, RSM_ERR,
6124                     "rsm_connect done:EINVAL invalid perms\n"));
6125                 return (RSMERR_BAD_PERMS);
6126         }
6127         access = 0;
6128         if (msg->perm & RSM_PERM_READ)
6129                 access |= RSM_ACCESS_READ;
6130         if (msg->perm & RSM_PERM_WRITE)
6131                 access |= RSM_ACCESS_WRITE;
6132 
6133         seg->s_node = msg->nodeid;
6134 
6135         /*
6136          * Adding to the import list locks the segment; release the segment
6137          * lock so we can get the reply for the send.
6138          */
6139         e = rsmimport_add(seg, msg->key);
6140         if (e) {
6141                 rsmka_release_adapter(adapter);
6142                 DBG_PRINTF((category, RSM_ERR,
6143                     "rsm_connect done:rsmimport_add failed %d\n", e));
6144                 return (e);
6145         }
6146         seg->s_state = RSM_STATE_CONNECTING;
6147 
6148         /*
6149          * Set the s_adapter field here so as to have a valid comparison of
6150          * the adapter and the s_adapter value during rsmshare_get. For
6151          * any error, set s_adapter to NULL before doing a release_adapter
6152          */
6153         seg->s_adapter = adapter;
6154 
6155         rsmseglock_release(seg);
6156 
6157         /*
6158          * get the pointer to the shared data structure; the
6159          * shared data is locked and refcount has been incremented
6160          */
6161         sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6162 
6163         ASSERT(rsmsharelock_held(seg));
6164 
6165         do {
6166                 /* flag indicates whether we need to recheck the state */
6167                 recheck_state = 0;
6168                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6169                     "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6170                 switch (sharedp->rsmsi_state) {
6171                 case RSMSI_STATE_NEW:
6172                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6173                         break;
6174                 case RSMSI_STATE_CONNECTING:
6175                         /* FALLTHRU */
6176                 case RSMSI_STATE_CONN_QUIESCE:
6177                         /* FALLTHRU */
6178                 case RSMSI_STATE_MAP_QUIESCE:
6179                         /* wait for the state to change */
6180                         while ((sharedp->rsmsi_state ==
6181                             RSMSI_STATE_CONNECTING) ||
6182                             (sharedp->rsmsi_state ==
6183                             RSMSI_STATE_CONN_QUIESCE) ||
6184                             (sharedp->rsmsi_state ==
6185                             RSMSI_STATE_MAP_QUIESCE)) {
6186                                 if (cv_wait_sig(&sharedp->rsmsi_cv,
6187                                     &sharedp->rsmsi_lock) == 0) {
6188                                         /* signalled - clean up and return */
6189                                         rsmsharelock_release(seg);
6190                                         rsmimport_rm(seg);
6191                                         seg->s_adapter = NULL;
6192                                         rsmka_release_adapter(adapter);
6193                                         seg->s_state = RSM_STATE_NEW;
6194                                         DBG_PRINTF((category, RSM_ERR,
6195                                             "rsm_connect done: INTERRUPTED\n"));
6196                                         return (RSMERR_INTERRUPTED);
6197                                 }
6198                         }
6199                         /*
6200                          * the state changed, loop back and check what it is
6201                          */
6202                         recheck_state = 1;
6203                         break;
6204                 case RSMSI_STATE_ABORT_CONNECT:
6205                         /* exit the loop and clean up further down */
6206                         break;
6207                 case RSMSI_STATE_CONNECTED:
6208                         /* already connected, good - fall through */
6209                 case RSMSI_STATE_MAPPED:
6210                         /* already mapped, wow - fall through */
6211                         /* access validation etc is done further down */
6212                         break;
6213                 case RSMSI_STATE_DISCONNECTED:
6214                         /* disconnected - so reconnect now */
6215                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6216                         break;
6217                 default:
6218                         ASSERT(0); /* Invalid State */
6219                 }
6220         } while (recheck_state);
6221 
6222         if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6223                 /* we are the first to connect */
6224                 rsmsharelock_release(seg);
6225 
6226                 if (msg->nodeid != my_nodeid) {
6227                         addr = get_remote_hwaddr(adapter, msg->nodeid);
6228 
6229                         if ((int64_t)addr < 0) {
6230                                 rsmsharelock_acquire(seg);
6231                                 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6232                                     RSMSI_STATE_NEW);
6233                                 rsmsharelock_release(seg);
6234                                 rsmimport_rm(seg);
6235                                 seg->s_adapter = NULL;
6236                                 rsmka_release_adapter(adapter);
6237                                 seg->s_state = RSM_STATE_NEW;
6238                                 DBG_PRINTF((category, RSM_ERR,
6239                                     "rsm_connect done: hwaddr<0\n"));
6240                                 return (RSMERR_INTERNAL_ERROR);
6241                         }
6242                 } else {
6243                         addr = adapter->hwaddr;
6244                 }
6245 
6246                 /*
6247                  * send request to node [src, dest, key, msgid] and get back
6248                  * [status, msgid, cookie]
6249                  */
6250                 request.rsmipc_key = msg->key;
6251                 /*
6252                  * we need the s_mode of the exporter so pass
6253                  * RSM_ACCESS_TRUSTED
6254                  */
6255                 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6256                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6257                 request.rsmipc_adapter_hwaddr = addr;
6258                 request.rsmipc_segment_cookie = sharedp;
6259 
6260                 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6261                 if (e) {
6262                         rsmsharelock_acquire(seg);
6263                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6264                             RSMSI_STATE_NEW);
6265                         rsmsharelock_release(seg);
6266                         rsmimport_rm(seg);
6267                         seg->s_adapter = NULL;
6268                         rsmka_release_adapter(adapter);
6269                         seg->s_state = RSM_STATE_NEW;
6270                         DBG_PRINTF((category, RSM_ERR,
6271                             "rsm_connect done:rsmipc_send failed %d\n", e));
6272                         return (e);
6273                 }
6274 
6275                 if (reply.rsmipc_status != RSM_SUCCESS) {
6276                         rsmsharelock_acquire(seg);
6277                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6278                             RSMSI_STATE_NEW);
6279                         rsmsharelock_release(seg);
6280                         rsmimport_rm(seg);
6281                         seg->s_adapter = NULL;
6282                         rsmka_release_adapter(adapter);
6283                         seg->s_state = RSM_STATE_NEW;
6284                         DBG_PRINTF((category, RSM_ERR,
6285                             "rsm_connect done:rsmipc_send reply err %d\n",
6286                             reply.rsmipc_status));
6287                         return (reply.rsmipc_status);
6288                 }
6289 
6290                 rsmsharelock_acquire(seg);
6291                 /* store the information recvd into the shared data struct */
6292                 sharedp->rsmsi_mode = reply.rsmipc_mode;
6293                 sharedp->rsmsi_uid = reply.rsmipc_uid;
6294                 sharedp->rsmsi_gid = reply.rsmipc_gid;
6295                 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6296                 sharedp->rsmsi_cookie = sharedp;
6297         }
6298 
6299         rsmsharelock_release(seg);
6300 
6301         /*
6302          * Get the segment lock and check for a force disconnect
6303          * from the export side which would have changed the state
6304          * back to RSM_STATE_NEW. Once the segment lock is acquired a
6305          * force disconnect will be held off until the connection
6306          * has completed.
6307          */
6308         rsmseglock_acquire(seg);
6309         rsmsharelock_acquire(seg);
6310         ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6311             seg->s_state == RSM_STATE_ABORT_CONNECT);
6312 
6313         shared_cookie = sharedp->rsmsi_cookie;
6314 
6315         if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6316             (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6317                 seg->s_state = RSM_STATE_NEW;
6318                 seg->s_adapter = NULL;
6319                 rsmsharelock_release(seg);
6320                 rsmseglock_release(seg);
6321                 rsmimport_rm(seg);
6322                 rsmka_release_adapter(adapter);
6323 
6324                 rsmsharelock_acquire(seg);
6325                 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6326                         /*
6327                          * set a flag indicating abort handling has been
6328                          * done
6329                          */
6330                         sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6331                         rsmsharelock_release(seg);
6332                         /* send a message to exporter - only once */
6333                         (void) rsm_send_notimporting(msg->nodeid,
6334                             msg->key, shared_cookie);
6335                         rsmsharelock_acquire(seg);
6336                         /*
6337                          * wake up any waiting importers and inform that
6338                          * connection has been aborted
6339                          */
6340                         cv_broadcast(&sharedp->rsmsi_cv);
6341                 }
6342                 rsmsharelock_release(seg);
6343 
6344                 DBG_PRINTF((category, RSM_ERR,
6345                     "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6346                 return (RSMERR_INTERRUPTED);
6347         }
6348 
6349 
6350         /*
6351          * We need to verify that this process has access
6352          */
6353         e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6354             access & sharedp->rsmsi_mode,
6355             (int)(msg->perm & RSM_PERM_RDWR), cred);
6356         if (e) {
6357                 rsmsharelock_release(seg);
6358                 seg->s_state = RSM_STATE_NEW;
6359                 seg->s_adapter = NULL;
6360                 rsmseglock_release(seg);
6361                 rsmimport_rm(seg);
6362                 rsmka_release_adapter(adapter);
6363                 /*
6364                  * No need to lock segment it has been removed
6365                  * from the hash table
6366                  */
6367                 rsmsharelock_acquire(seg);
6368                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6369                         rsmsharelock_release(seg);
6370                         /* this is the first importer */
6371 
6372                         (void) rsm_send_notimporting(msg->nodeid, msg->key,
6373                             shared_cookie);
6374                         rsmsharelock_acquire(seg);
6375                         sharedp->rsmsi_state = RSMSI_STATE_NEW;
6376                         cv_broadcast(&sharedp->rsmsi_cv);
6377                 }
6378                 rsmsharelock_release(seg);
6379 
6380                 DBG_PRINTF((category, RSM_ERR,
6381                     "rsm_connect done: ipcaccess failed\n"));
6382                 return (RSMERR_PERM_DENIED);
6383         }
6384 
6385         /* update state and cookie */
6386         seg->s_segid = sharedp->rsmsi_segid;
6387         seg->s_len = sharedp->rsmsi_seglen;
6388         seg->s_mode = access & sharedp->rsmsi_mode;
6389         seg->s_pid = ddi_get_pid();
6390         seg->s_mapinfo = NULL;
6391 
6392         if (seg->s_node != my_nodeid) {
6393                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6394                         e = adapter->rsmpi_ops->rsm_connect(
6395                             adapter->rsmpi_handle,
6396                             addr, seg->s_segid, &sharedp->rsmsi_handle);
6397 
6398                         if (e != RSM_SUCCESS) {
6399                                 seg->s_state = RSM_STATE_NEW;
6400                                 seg->s_adapter = NULL;
6401                                 rsmsharelock_release(seg);
6402                                 rsmseglock_release(seg);
6403                                 rsmimport_rm(seg);
6404                                 rsmka_release_adapter(adapter);
6405                                 /*
6406                                  *  inform the exporter to delete this importer
6407                                  */
6408                                 (void) rsm_send_notimporting(msg->nodeid,
6409                                     msg->key, shared_cookie);
6410 
6411                                 /*
6412                                  * Now inform any waiting importers to
6413                                  * retry connect. This needs to be done
6414                                  * after sending notimporting so that
6415                                  * the notimporting is sent before a waiting
6416                                  * importer sends a segconnect while retrying
6417                                  *
6418                                  * No need to lock segment it has been removed
6419                                  * from the hash table
6420                                  */
6421 
6422                                 rsmsharelock_acquire(seg);
6423                                 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6424                                 cv_broadcast(&sharedp->rsmsi_cv);
6425                                 rsmsharelock_release(seg);
6426 
6427                                 DBG_PRINTF((category, RSM_ERR,
6428                                     "rsm_connect error %d\n", e));
6429                                 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6430                                         return (
6431                                             RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6432                                 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6433                                     (e == RSMERR_UNKNOWN_RSM_ADDR))
6434                                         return (RSMERR_REMOTE_NODE_UNREACHABLE);
6435                                 else
6436                                         return (e);
6437                         }
6438 
6439                 }
6440                 seg->s_handle.in = sharedp->rsmsi_handle;
6441 
6442         }
6443 
6444         seg->s_state = RSM_STATE_CONNECT;
6445 
6446 
6447         seg->s_flags &= ~RSM_IMPORT_DUMMY;       /* clear dummy flag */
6448         if (bar_va) {
6449                 /* increment generation number on barrier page */
6450                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6451                 /* return user off into barrier page where status will be */
6452                 msg->off = (int)seg->s_hdr.rsmrc_num;
6453                 msg->gnum = bar_va[msg->off];     /* gnum race */
6454         } else {
6455                 msg->off = 0;
6456                 msg->gnum = 0;       /* gnum race */
6457         }
6458 
6459         msg->len = (int)sharedp->rsmsi_seglen;
6460         msg->rnum = seg->s_minor;
6461         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6462         rsmsharelock_release(seg);
6463         rsmseglock_release(seg);
6464 
6465         /* Return back to user the segment size & perm in case it's needed */
6466 
6467 #ifdef _MULTI_DATAMODEL
6468         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6469                 rsm_ioctlmsg32_t msg32;
6470 
6471                 if (msg->len > UINT_MAX)
6472                         msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6473                 else
6474                         msg32.len = msg->len;
6475                 msg32.off = msg->off;
6476                 msg32.perm = msg->perm;
6477                 msg32.gnum = msg->gnum;
6478                 msg32.rnum = msg->rnum;
6479 
6480                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6481                     "rsm_connect done\n"));
6482 
6483                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6484                     sizeof (msg32), mode))
6485                         return (RSMERR_BAD_ADDR);
6486                 else
6487                         return (RSM_SUCCESS);
6488         }
6489 #endif
6490         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6491 
6492         if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6493             mode))
6494                 return (RSMERR_BAD_ADDR);
6495         else
6496                 return (RSM_SUCCESS);
6497 }
6498 
6499 static int
6500 rsm_unmap(rsmseg_t *seg)
6501 {
6502         int                     err;
6503         adapter_t               *adapter;
6504         rsm_import_share_t      *sharedp;
6505         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6506 
6507         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6508             "rsm_unmap enter %u\n", seg->s_segid));
6509 
6510         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6511 
6512         /* assert seg is locked */
6513         ASSERT(rsmseglock_held(seg));
6514         ASSERT(seg->s_state != RSM_STATE_MAPPING);
6515 
6516         if ((seg->s_state != RSM_STATE_ACTIVE) &&
6517             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6518                 /* segment unmap has already been done */
6519                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6520                 return (RSM_SUCCESS);
6521         }
6522 
6523         sharedp = seg->s_share;
6524 
6525         rsmsharelock_acquire(seg);
6526 
6527         /*
6528          *      - shared data struct is in MAPPED or MAP_QUIESCE state
6529          */
6530 
6531         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6532             sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6533 
6534         /*
6535          * Unmap pages - previously rsm_memseg_import_unmap was called only if
6536          * the segment cookie list was NULL; but it is always NULL when
6537          * called from rsmmap_unmap and won't be NULL when called for
6538          * a force disconnect - so the check for NULL cookie list was removed
6539          */
6540 
6541         ASSERT(sharedp->rsmsi_mapcnt > 0);
6542 
6543         sharedp->rsmsi_mapcnt--;
6544 
6545         if (sharedp->rsmsi_mapcnt == 0) {
6546                 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6547                         /* unmap the shared RSMPI mapping */
6548                         adapter = seg->s_adapter;
6549                         if (seg->s_node != my_nodeid) {
6550                                 ASSERT(sharedp->rsmsi_handle != NULL);
6551                                 err = adapter->rsmpi_ops->
6552                                     rsm_unmap(sharedp->rsmsi_handle);
6553                                 DBG_PRINTF((category, RSM_DEBUG,
6554                                     "rsm_unmap: rsmpi unmap %d\n", err));
6555                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6556                                 sharedp->rsmsi_mapinfo = NULL;
6557                         }
6558                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6559                 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6560                         sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6561                 }
6562         }
6563 
6564         rsmsharelock_release(seg);
6565 
6566         /*
6567          * The s_cookie field is used to store the cookie returned from the
6568          * ddi_umem_lock when binding the pages for an export segment. This
6569          * is the primary use of the s_cookie field and does not normally
6570          * pertain to any importing segment except in the loopback case.
6571          * For the loopback case, the import segment and export segment are
6572          * on the same node, the s_cookie field of the segment structure for
6573          * the importer is initialized to the s_cookie field in the exported
6574          * segment during the map operation and is used during the call to
6575          * devmap_umem_setup for the import mapping.
6576          * Thus, during unmap, we simply need to set s_cookie to NULL to
6577          * indicate that the mapping no longer exists.
6578          */
6579         seg->s_cookie = NULL;
6580 
6581         seg->s_mapinfo = NULL;
6582 
6583         if (seg->s_state == RSM_STATE_ACTIVE)
6584                 seg->s_state = RSM_STATE_CONNECT;
6585         else
6586                 seg->s_state = RSM_STATE_CONN_QUIESCE;
6587 
6588         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6589 
6590         return (RSM_SUCCESS);
6591 }
6592 
6593 /*
6594  * cookie returned here if not null indicates that it is
6595  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6596  * message.
6597  */
6598 static int
6599 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6600 {
6601         int                     e;
6602         adapter_t               *adapter;
6603         rsm_import_share_t      *sharedp;
6604         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6605 
6606         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6607             "rsm_closeconnection enter\n"));
6608 
6609         *cookie = (void *)NULL;
6610 
6611         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6612 
6613         /* assert seg is locked */
6614         ASSERT(rsmseglock_held(seg));
6615 
6616         if (seg->s_state == RSM_STATE_DISCONNECT) {
6617                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6618                     "rsm_closeconnection done: already disconnected\n"));
6619                 return (RSM_SUCCESS);
6620         }
6621 
6622         /* wait for all putv/getv ops to get done */
6623         while (seg->s_rdmacnt > 0) {
6624                 cv_wait(&seg->s_cv, &seg->s_lock);
6625         }
6626 
6627         (void) rsm_unmap(seg);
6628 
6629         ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6630             seg->s_state == RSM_STATE_CONN_QUIESCE);
6631 
6632         adapter = seg->s_adapter;
6633         sharedp = seg->s_share;
6634 
6635         ASSERT(sharedp != NULL);
6636 
6637         rsmsharelock_acquire(seg);
6638 
6639         /*
6640          * Disconnect on adapter
6641          *
6642          * The current algorithm is stateless, I don't have to contact
6643          * server when I go away. It only gives me permissions. Of course,
6644          * the adapters will talk to terminate the connect.
6645          *
6646          * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6647          */
6648         if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6649             (sharedp->rsmsi_node != my_nodeid)) {
6650 
6651                 if (sharedp->rsmsi_refcnt == 1) {
6652                         /* this is the last importer */
6653                         ASSERT(sharedp->rsmsi_mapcnt == 0);
6654 
6655                         e = adapter->rsmpi_ops->
6656                             rsm_disconnect(sharedp->rsmsi_handle);
6657                         if (e != RSM_SUCCESS) {
6658                                 DBG_PRINTF((category, RSM_DEBUG,
6659                                     "rsm:disconnect failed seg=%x:err=%d\n",
6660                                     seg->s_key, e));
6661                         }
6662                 }
6663         }
6664 
6665         seg->s_handle.in = NULL;
6666 
6667         sharedp->rsmsi_refcnt--;
6668 
6669         if (sharedp->rsmsi_refcnt == 0) {
6670                 *cookie = (void *)sharedp->rsmsi_cookie;
6671                 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6672                 sharedp->rsmsi_handle = NULL;
6673                 rsmsharelock_release(seg);
6674 
6675                 /* clean up the shared data structure */
6676                 mutex_destroy(&sharedp->rsmsi_lock);
6677                 cv_destroy(&sharedp->rsmsi_cv);
6678                 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6679 
6680         } else {
6681                 rsmsharelock_release(seg);
6682         }
6683 
6684         /* increment generation number on barrier page */
6685         if (bar_va) {
6686                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6687         }
6688 
6689         /*
6690          * The following needs to be done after any
6691          * rsmsharelock calls which use seg->s_share.
6692          */
6693         seg->s_share = NULL;
6694 
6695         seg->s_state = RSM_STATE_DISCONNECT;
6696         /* signal anyone waiting in the CONN_QUIESCE state */
6697         cv_broadcast(&seg->s_cv);
6698 
6699         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6700             "rsm_closeconnection done\n"));
6701 
6702         return (RSM_SUCCESS);
6703 }
6704 
6705 int
6706 rsm_disconnect(rsmseg_t *seg)
6707 {
6708         rsmipc_request_t        request;
6709         void                    *shared_cookie;
6710         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6711 
6712         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6713 
6714         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6715 
6716         /* assert seg isn't locked */
6717         ASSERT(!rsmseglock_held(seg));
6718 
6719 
6720         /* Remove segment from imported list */
6721         rsmimport_rm(seg);
6722 
6723         /* acquire the segment */
6724         rsmseglock_acquire(seg);
6725 
6726         /* wait until segment leaves the mapping state */
6727         while (seg->s_state == RSM_STATE_MAPPING)
6728                 cv_wait(&seg->s_cv, &seg->s_lock);
6729 
6730         if (seg->s_state == RSM_STATE_DISCONNECT) {
6731                 seg->s_state = RSM_STATE_NEW;
6732                 rsmseglock_release(seg);
6733                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6734                     "rsm_disconnect done: already disconnected\n"));
6735                 return (RSM_SUCCESS);
6736         }
6737 
6738         (void) rsm_closeconnection(seg, &shared_cookie);
6739 
6740         /* update state */
6741         seg->s_state = RSM_STATE_NEW;
6742 
6743         if (shared_cookie != NULL) {
6744                 /*
6745                  *  This is the last importer so inform the exporting node
6746                  *  so this import can be deleted from the list of importers.
6747                  */
6748                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6749                 request.rsmipc_key = seg->s_segid;
6750                 request.rsmipc_segment_cookie = shared_cookie;
6751                 rsmseglock_release(seg);
6752                 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6753         } else {
6754                 rsmseglock_release(seg);
6755         }
6756 
6757         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6758 
6759         return (DDI_SUCCESS);
6760 }
6761 
6762 static int
6763 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6764     struct pollhead **phpp)
6765 {
6766         minor_t         rnum;
6767         rsmresource_t   *res;
6768         rsmseg_t        *seg;
6769         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6770 
6771         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6772 
6773         /* find minor, no lock */
6774         rnum = getminor(dev);
6775         res = rsmresource_lookup(rnum, RSM_NOLOCK);
6776 
6777         /* poll is supported only for export/import segments */
6778         if ((res == NULL) || (res == RSMRC_RESERVED) ||
6779             (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6780                 return (ENXIO);
6781         }
6782 
6783         /*
6784          * An exported segment must be in state RSM_STATE_EXPORT; an
6785          * imported segment must be in state RSM_STATE_ACTIVE.
6786          */
6787         seg = (rsmseg_t *)res;
6788 
6789         if (seg->s_pollevent) {
6790                 *reventsp = POLLRDNORM;
6791         } else {
6792                 *reventsp = 0;
6793         }
6794 
6795         if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
6796                 /* cannot take segment lock here */
6797                 *phpp = &seg->s_poll;
6798                 seg->s_pollflag |= RSM_SEGMENT_POLL;
6799         }
6800         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6801         return (0);
6802 }
6803 
6804 
6805 
6806 /* ************************* IOCTL Commands ********************* */
6807 
6808 static rsmseg_t *
6809 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6810     rsm_resource_type_t type)
6811 {
6812         /* get segment from resource handle */
6813         rsmseg_t *seg;
6814         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6815 
6816         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6817 
6818 
6819         if (res != RSMRC_RESERVED) {
6820                 seg = (rsmseg_t *)res;
6821         } else {
6822                 /* Allocate segment now and bind it */
6823                 seg = rsmseg_alloc(rnum, credp);
6824 
6825                 /*
6826                  * if DR pre-processing is going on or DR is in progress
6827                  * then the new export segments should be in the NEW_QSCD state
6828                  */
6829                 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6830                         mutex_enter(&rsm_drv_data.drv_lock);
6831                         if ((rsm_drv_data.drv_state ==
6832                             RSM_DRV_PREDEL_STARTED) ||
6833                             (rsm_drv_data.drv_state ==
6834                             RSM_DRV_PREDEL_COMPLETED) ||
6835                             (rsm_drv_data.drv_state ==
6836                             RSM_DRV_DR_IN_PROGRESS)) {
6837                                 seg->s_state = RSM_STATE_NEW_QUIESCED;
6838                         }
6839                         mutex_exit(&rsm_drv_data.drv_lock);
6840                 }
6841 
6842                 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6843         }
6844 
6845         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6846 
6847         return (seg);
6848 }
6849 
6850 static int
6851 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6852     int mode, cred_t *credp)
6853 {
6854         int error;
6855         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6856 
6857         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6858 
6859         arg = arg;
6860         credp = credp;
6861 
6862         ASSERT(seg != NULL);
6863 
6864         switch (cmd) {
6865         case RSM_IOCTL_BIND:
6866                 error = rsm_bind(seg, msg, arg, mode);
6867                 break;
6868         case RSM_IOCTL_REBIND:
6869                 error = rsm_rebind(seg, msg);
6870                 break;
6871         case RSM_IOCTL_UNBIND:
6872                 error = ENOTSUP;
6873                 break;
6874         case RSM_IOCTL_PUBLISH:
6875                 error = rsm_publish(seg, msg, arg, mode);
6876                 break;
6877         case RSM_IOCTL_REPUBLISH:
6878                 error = rsm_republish(seg, msg, mode);
6879                 break;
6880         case RSM_IOCTL_UNPUBLISH:
6881                 error = rsm_unpublish(seg, 1);
6882                 break;
6883         default:
6884                 error = EINVAL;
6885                 break;
6886         }
6887 
6888         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6889             error));
6890 
6891         return (error);
6892 }
6893 static int
6894 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6895     int mode, cred_t *credp)
6896 {
6897         int error;
6898         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6899 
6900         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6901 
6902         ASSERT(seg);
6903 
6904         switch (cmd) {
6905         case RSM_IOCTL_CONNECT:
6906                 error = rsm_connect(seg, msg, credp, arg, mode);
6907                 break;
6908         default:
6909                 error = EINVAL;
6910                 break;
6911         }
6912 
6913         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6914             error));
6915         return (error);
6916 }
6917 
6918 static int
6919 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6920     int mode)
6921 {
6922         int e;
6923         adapter_t *adapter;
6924         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6925 
6926         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6927 
6928 
6929         if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6930                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6931                     "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6932                 return (RSMERR_CONN_ABORTED);
6933         } else if (seg->s_node == my_nodeid) {
6934                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6935                     "rsmbar_ioctl done: loopback\n"));
6936                 return (RSM_SUCCESS);
6937         }
6938 
6939         adapter = seg->s_adapter;
6940 
6941         switch (cmd) {
6942         case RSM_IOCTL_BAR_CHECK:
6943                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6944                     "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6945                 return (bar_va ? RSM_SUCCESS : EINVAL);
6946         case RSM_IOCTL_BAR_OPEN:
6947                 e = adapter->rsmpi_ops->
6948                     rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6949                 break;
6950         case RSM_IOCTL_BAR_ORDER:
6951                 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6952                 break;
6953         case RSM_IOCTL_BAR_CLOSE:
6954                 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6955                 break;
6956         default:
6957                 e = EINVAL;
6958                 break;
6959         }
6960 
6961         if (e == RSM_SUCCESS) {
6962 #ifdef _MULTI_DATAMODEL
6963                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6964                         rsm_ioctlmsg32_t msg32;
6965                         int i;
6966 
6967                         for (i = 0; i < 4; i++) {
6968                                 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6969                         }
6970 
6971                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6972                             "rsmbar_ioctl done\n"));
6973                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6974                             sizeof (msg32), mode))
6975                                 return (RSMERR_BAD_ADDR);
6976                         else
6977                                 return (RSM_SUCCESS);
6978                 }
6979 #endif
6980                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6981                     "rsmbar_ioctl done\n"));
6982                 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6983                     sizeof (*msg), mode))
6984                         return (RSMERR_BAD_ADDR);
6985                 else
6986                         return (RSM_SUCCESS);
6987         }
6988 
6989         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6990             "rsmbar_ioctl done: error=%d\n", e));
6991 
6992         return (e);
6993 }
6994 
6995 /*
6996  * Ring the doorbell of the export segment to which this segment is
6997  * connected.
6998  */
6999 static int
7000 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7001 {
7002         int e = 0;
7003         rsmipc_request_t request;
7004 
7005         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7006 
7007         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7008 
7009         request.rsmipc_key = seg->s_segid;
7010         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7011         request.rsmipc_segment_cookie = NULL;
7012         e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7013 
7014         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7015             "exportbell_ioctl done: %d\n", e));
7016 
7017         return (e);
7018 }
7019 
7020 /*
7021  * Ring the doorbells of all segments importing this segment
7022  */
7023 static int
7024 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7025 {
7026         importing_token_t       *token = NULL;
7027         rsmipc_request_t        request;
7028         int                     index;
7029 
7030         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7031 
7032         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7033 
7034         ASSERT(seg->s_state != RSM_STATE_NEW &&
7035             seg->s_state != RSM_STATE_NEW_QUIESCED);
7036 
7037         request.rsmipc_key = seg->s_segid;
7038         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7039 
7040         index = rsmhash(seg->s_segid);
7041 
7042         token = importer_list.bucket[index];
7043 
7044         while (token != NULL) {
7045                 if (seg->s_key == token->key) {
7046                         request.rsmipc_segment_cookie =
7047                             token->import_segment_cookie;
7048                         (void) rsmipc_send(token->importing_node,
7049                             &request, RSM_NO_REPLY);
7050                 }
7051                 token = token->next;
7052         }
7053 
7054         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7055             "importbell_ioctl done\n"));
7056         return (RSM_SUCCESS);
7057 }
7058 
7059 static int
7060 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7061     rsm_poll_event_t **eventspp, int mode)
7062 {
7063         rsm_poll_event_t        *evlist = NULL;
7064         size_t                  evlistsz;
7065         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7066 
7067 #ifdef _MULTI_DATAMODEL
7068         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7069                 int i;
7070                 rsm_consume_event_msg32_t cemsg32 = {0};
7071                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7072                 rsm_poll_event32_t      *evlist32;
7073                 size_t                  evlistsz32;
7074 
7075                 /* copyin the ioctl message */
7076                 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7077                     sizeof (rsm_consume_event_msg32_t), mode)) {
7078                         DBG_PRINTF((category, RSM_ERR,
7079                             "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7080                         return (RSMERR_BAD_ADDR);
7081                 }
7082                 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7083                 msgp->numents = (int)cemsg32.numents;
7084 
7085                 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7086                 /*
7087                  * If numents is large alloc events list on heap otherwise
7088                  * use the address of array that was passed in.
7089                  */
7090                 if (msgp->numents > RSM_MAX_POLLFDS) {
7091                         if (msgp->numents > max_segs) { /* validate numents */
7092                                 DBG_PRINTF((category, RSM_ERR,
7093                                     "consumeevent_copyin: "
7094                                     "RSMERR_BAD_ARGS_ERRORS\n"));
7095                                 return (RSMERR_BAD_ARGS_ERRORS);
7096                         }
7097                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7098                 } else {
7099                         evlist32 = event32;
7100                 }
7101 
7102                 /* copyin the seglist into the rsm_poll_event32_t array */
7103                 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7104                     evlistsz32, mode)) {
7105                         if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7106                                 kmem_free(evlist32, evlistsz32);
7107                         }
7108                         DBG_PRINTF((category, RSM_ERR,
7109                             "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7110                         return (RSMERR_BAD_ADDR);
7111                 }
7112 
7113                 /* evlist and evlistsz are based on rsm_poll_event_t type */
7114                 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7115 
7116                 if (msgp->numents > RSM_MAX_POLLFDS) {
7117                         evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7118                         *eventspp = evlist;
7119                 } else {
7120                         evlist = *eventspp;
7121                 }
7122                 /*
7123                  * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7124                  * array
7125                  */
7126                 for (i = 0; i < msgp->numents; i++) {
7127                         evlist[i].rnum = evlist32[i].rnum;
7128                         evlist[i].fdsidx = evlist32[i].fdsidx;
7129                         evlist[i].revent = evlist32[i].revent;
7130                 }
7131                 /* free the temp 32-bit event list */
7132                 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7133                         kmem_free(evlist32, evlistsz32);
7134                 }
7135 
7136                 return (RSM_SUCCESS);
7137         }
7138 #endif
7139         /* copyin the ioctl message */
7140         if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7141             mode)) {
7142                 DBG_PRINTF((category, RSM_ERR,
7143                     "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7144                 return (RSMERR_BAD_ADDR);
7145         }
7146         /*
7147          * If numents is large alloc events list on heap otherwise
7148          * use the address of array that was passed in.
7149          */
7150         if (msgp->numents > RSM_MAX_POLLFDS) {
7151                 if (msgp->numents > max_segs) { /* validate numents */
7152                         DBG_PRINTF((category, RSM_ERR,
7153                             "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7154                         return (RSMERR_BAD_ARGS_ERRORS);
7155                 }
7156                 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7157                 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7158                 *eventspp  = evlist;
7159         }
7160 
7161         /* copyin the seglist */
7162         if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7163             sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7164                 if (evlist) {
7165                         kmem_free(evlist, evlistsz);
7166                         *eventspp = NULL;
7167                 }
7168                 DBG_PRINTF((category, RSM_ERR,
7169                     "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7170                 return (RSMERR_BAD_ADDR);
7171         }
7172 
7173         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7174             "consumeevent_copyin done\n"));
7175         return (RSM_SUCCESS);
7176 }
7177 
7178 static int
7179 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7180     rsm_poll_event_t *eventsp, int mode)
7181 {
7182         size_t                  evlistsz;
7183         int                     err = RSM_SUCCESS;
7184         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7185 
7186         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7187             "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7188             msgp->numents, eventsp));
7189 
7190 #ifdef _MULTI_DATAMODEL
7191         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7192                 int i;
7193                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7194                 rsm_poll_event32_t      *evlist32;
7195                 size_t                  evlistsz32;
7196 
7197                 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7198                 if (msgp->numents > RSM_MAX_POLLFDS) {
7199                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7200                 } else {
7201                         evlist32 = event32;
7202                 }
7203 
7204                 /*
7205                  * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7206                  * array
7207                  */
7208                 for (i = 0; i < msgp->numents; i++) {
7209                         evlist32[i].rnum = eventsp[i].rnum;
7210                         evlist32[i].fdsidx = eventsp[i].fdsidx;
7211                         evlist32[i].revent = eventsp[i].revent;
7212                 }
7213 
7214                 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7215                     evlistsz32, mode)) {
7216                         err = RSMERR_BAD_ADDR;
7217                 }
7218 
7219                 if (msgp->numents > RSM_MAX_POLLFDS) {
7220                         if (evlist32) { /* free the temp 32-bit event list */
7221                                 kmem_free(evlist32, evlistsz32);
7222                         }
7223                         /*
7224                          * eventsp and evlistsz are based on rsm_poll_event_t
7225                          * type
7226                          */
7227                         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7228                         /* event list on the heap and needs to be freed here */
7229                         if (eventsp) {
7230                                 kmem_free(eventsp, evlistsz);
7231                         }
7232                 }
7233 
7234                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7235                     "consumeevent_copyout done: err=%d\n", err));
7236                 return (err);
7237         }
7238 #endif
7239         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7240 
7241         if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7242             mode)) {
7243                 err = RSMERR_BAD_ADDR;
7244         }
7245 
7246         if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7247                 /* event list on the heap and needs to be freed here */
7248                 kmem_free(eventsp, evlistsz);
7249         }
7250 
7251         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7252             "consumeevent_copyout done: err=%d\n", err));
7253         return (err);
7254 }
7255 
7256 static int
7257 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7258 {
7259         int     rc;
7260         int     i;
7261         minor_t rnum;
7262         rsm_consume_event_msg_t msg = {0};
7263         rsmseg_t                *seg;
7264         rsm_poll_event_t        *event_list;
7265         rsm_poll_event_t        events[RSM_MAX_POLLFDS];
7266         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7267 
7268         event_list = events;
7269 
7270         if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7271             RSM_SUCCESS) {
7272                 return (rc);
7273         }
7274 
7275         for (i = 0; i < msg.numents; i++) {
7276                 rnum = event_list[i].rnum;
7277                 event_list[i].revent = 0;
7278                 /* get the segment structure */
7279                 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7280                 if (seg) {
7281                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7282                             "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7283                             seg));
7284                         if (seg->s_pollevent) {
7285                                 /* consume the event */
7286                                 atomic_dec_32(&seg->s_pollevent);
7287                                 event_list[i].revent = POLLRDNORM;
7288                         }
7289                         rsmseglock_release(seg);
7290                 }
7291         }
7292 
7293         if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7294             RSM_SUCCESS) {
7295                 return (rc);
7296         }
7297 
7298         return (RSM_SUCCESS);
7299 }
7300 
7301 static int
7302 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7303 {
7304         int size;
7305         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7306 
7307         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7308 
7309 #ifdef _MULTI_DATAMODEL
7310         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7311                 rsmka_iovec32_t *iovec32, *iovec32_base;
7312                 int i;
7313 
7314                 size = count * sizeof (rsmka_iovec32_t);
7315                 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7316                 if (ddi_copyin((caddr_t)user_vec,
7317                     (caddr_t)iovec32, size, mode)) {
7318                         kmem_free(iovec32, size);
7319                         DBG_PRINTF((category, RSM_DEBUG,
7320                             "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7321                         return (RSMERR_BAD_ADDR);
7322                 }
7323 
7324                 for (i = 0; i < count; i++, iovec++, iovec32++) {
7325                         iovec->io_type = (int)iovec32->io_type;
7326                         if (iovec->io_type == RSM_HANDLE_TYPE)
7327                                 iovec->local.segid = (rsm_memseg_id_t)
7328                                     iovec32->local;
7329                         else
7330                                 iovec->local.vaddr =
7331                                     (caddr_t)(uintptr_t)iovec32->local;
7332                         iovec->local_offset = (size_t)iovec32->local_offset;
7333                         iovec->remote_offset = (size_t)iovec32->remote_offset;
7334                         iovec->transfer_len = (size_t)iovec32->transfer_len;
7335 
7336                 }
7337                 kmem_free(iovec32_base, size);
7338                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7339                     "iovec_copyin done\n"));
7340                 return (DDI_SUCCESS);
7341         }
7342 #endif
7343 
7344         size = count * sizeof (rsmka_iovec_t);
7345         if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7346                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7347                     "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7348                 return (RSMERR_BAD_ADDR);
7349         }
7350 
7351         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7352 
7353         return (DDI_SUCCESS);
7354 }
7355 
7356 
7357 static int
7358 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7359 {
7360         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7361 
7362         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7363 
7364 #ifdef _MULTI_DATAMODEL
7365         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7366                 rsmka_scat_gath32_t sg_io32;
7367 
7368                 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7369                     mode)) {
7370                         DBG_PRINTF((category, RSM_DEBUG,
7371                             "sgio_copyin done: returning EFAULT\n"));
7372                         return (RSMERR_BAD_ADDR);
7373                 }
7374                 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7375                 sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7376                 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7377                 sg_io->flags = (size_t)sg_io32.flags;
7378                 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7379                     (uintptr_t)sg_io32.remote_handle;
7380                 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7381                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7382                     "sgio_copyin done\n"));
7383                 return (DDI_SUCCESS);
7384         }
7385 #endif
7386         if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7387             mode)) {
7388                 DBG_PRINTF((category, RSM_DEBUG,
7389                     "sgio_copyin done: returning EFAULT\n"));
7390                 return (RSMERR_BAD_ADDR);
7391         }
7392         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7393         return (DDI_SUCCESS);
7394 }
7395 
7396 static int
7397 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7398 {
7399         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7400 
7401         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7402             "sgio_resid_copyout enter\n"));
7403 
7404 #ifdef _MULTI_DATAMODEL
7405         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7406                 rsmka_scat_gath32_t sg_io32;
7407 
7408                 sg_io32.io_residual_count = sg_io->io_residual_count;
7409                 sg_io32.flags = sg_io->flags;
7410 
7411                 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7412                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7413                     sizeof (uint32_t), mode)) {
7414 
7415                         DBG_PRINTF((category, RSM_ERR,
7416                             "sgio_resid_copyout error: rescnt\n"));
7417                         return (RSMERR_BAD_ADDR);
7418                 }
7419 
7420                 if (ddi_copyout((caddr_t)&sg_io32.flags,
7421                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7422                     sizeof (uint32_t), mode)) {
7423 
7424                         DBG_PRINTF((category, RSM_ERR,
7425                             "sgio_resid_copyout error: flags\n"));
7426                         return (RSMERR_BAD_ADDR);
7427                 }
7428                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7429                     "sgio_resid_copyout done\n"));
7430                 return (DDI_SUCCESS);
7431         }
7432 #endif
7433         if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7434             (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7435             sizeof (ulong_t), mode)) {
7436 
7437                 DBG_PRINTF((category, RSM_ERR,
7438                     "sgio_resid_copyout error:rescnt\n"));
7439                 return (RSMERR_BAD_ADDR);
7440         }
7441 
7442         if (ddi_copyout((caddr_t)&sg_io->flags,
7443             (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7444             sizeof (uint_t), mode)) {
7445 
7446                 DBG_PRINTF((category, RSM_ERR,
7447                     "sgio_resid_copyout error:flags\n"));
7448                 return (RSMERR_BAD_ADDR);
7449         }
7450 
7451         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7452         return (DDI_SUCCESS);
7453 }
7454 
7455 
7456 static int
7457 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7458 {
7459         rsmka_scat_gath_t       sg_io;
7460         rsmka_iovec_t           ka_iovec_arr[RSM_MAX_IOVLEN];
7461         rsmka_iovec_t           *ka_iovec;
7462         rsmka_iovec_t           *ka_iovec_start;
7463         rsmpi_scat_gath_t       rsmpi_sg_io;
7464         rsmpi_iovec_t           iovec_arr[RSM_MAX_IOVLEN];
7465         rsmpi_iovec_t           *iovec;
7466         rsmpi_iovec_t           *iovec_start = NULL;
7467         rsmapi_access_entry_t   *acl;
7468         rsmresource_t           *res;
7469         minor_t                 rnum;
7470         rsmseg_t                *im_seg, *ex_seg;
7471         int                     e;
7472         int                     error = 0;
7473         uint_t                  i;
7474         uint_t                  iov_proc = 0; /* num of iovecs processed */
7475         size_t                  size = 0;
7476         size_t                  ka_size;
7477 
7478         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7479 
7480         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7481 
7482         credp = credp;
7483 
7484         /*
7485          * Copyin the scatter/gather structure  and build new structure
7486          * for rsmpi.
7487          */
7488         e = sgio_copyin(arg, &sg_io, mode);
7489         if (e != DDI_SUCCESS) {
7490                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7491                     "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7492                 return (e);
7493         }
7494 
7495         if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7496                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7497                     "rsm_iovec_ioctl done: request_count(%d) too large\n",
7498                     sg_io.io_request_count));
7499                 return (RSMERR_BAD_SGIO);
7500         }
7501 
7502         rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7503         rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7504         rsmpi_sg_io.io_segflg = 0;
7505 
7506         /* Allocate memory and copyin io vector array  */
7507         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7508                 ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7509                 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7510         } else {
7511                 ka_iovec_start = ka_iovec = ka_iovec_arr;
7512         }
7513         e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7514             sg_io.io_request_count, mode);
7515         if (e != DDI_SUCCESS) {
7516                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7517                         kmem_free(ka_iovec, ka_size);
7518                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7519                     "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7520                 return (e);
7521         }
7522 
7523         /* get the import segment descriptor */
7524         rnum = getminor(dev);
7525         res = rsmresource_lookup(rnum, RSM_LOCK);
7526 
7527         /*
7528          * The following sequence of locking may (or MAY NOT) cause a
7529          * deadlock but this is currently not addressed here since the
7530          * implementation will be changed to incorporate the use of
7531          * reference counting for both the import and the export segments.
7532          */
7533 
7534         /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7535 
7536         im_seg = (rsmseg_t *)res;
7537 
7538         if (im_seg == NULL) {
7539                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7540                         kmem_free(ka_iovec, ka_size);
7541                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7542                     "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7543                 return (EINVAL);
7544         }
7545         /* putv/getv supported is supported only on import segments */
7546         if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7547                 rsmseglock_release(im_seg);
7548                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7549                         kmem_free(ka_iovec, ka_size);
7550                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7551                     "rsm_iovec_ioctl done: not an import segment\n"));
7552                 return (EINVAL);
7553         }
7554 
7555         /*
7556          * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7557          * as well as wait for a local DR to complete.
7558          */
7559         while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7560             (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7561             (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7562                 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7563                         DBG_PRINTF((category, RSM_DEBUG,
7564                             "rsm_iovec_ioctl done: cv_wait INTR"));
7565                         rsmseglock_release(im_seg);
7566                         return (RSMERR_INTERRUPTED);
7567                 }
7568         }
7569 
7570         if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7571             (im_seg->s_state != RSM_STATE_ACTIVE)) {
7572 
7573                 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7574                     im_seg->s_state == RSM_STATE_NEW);
7575 
7576                 DBG_PRINTF((category, RSM_DEBUG,
7577                     "rsm_iovec_ioctl done: im_seg not conn/map"));
7578                 rsmseglock_release(im_seg);
7579                 e = RSMERR_BAD_SGIO;
7580                 goto out;
7581         }
7582 
7583         im_seg->s_rdmacnt++;
7584         rsmseglock_release(im_seg);
7585 
7586         /*
7587          * Allocate and set up the io vector for rsmpi
7588          */
7589         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7590                 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7591                 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7592         } else {
7593                 iovec_start = iovec = iovec_arr;
7594         }
7595 
7596         rsmpi_sg_io.iovec = iovec;
7597         for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7598                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7599                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7600 
7601                         if (ex_seg == NULL) {
7602                                 e = RSMERR_BAD_SGIO;
7603                                 break;
7604                         }
7605                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7606 
7607                         acl = ex_seg->s_acl;
7608                         if (acl[0].ae_permission == 0) {
7609                                 struct buf *xbuf;
7610                                 dev_t sdev = 0;
7611 
7612                                 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7613                                     0, ex_seg->s_len, B_WRITE,
7614                                     sdev, 0, NULL, DDI_UMEM_SLEEP);
7615 
7616                                 ASSERT(xbuf != NULL);
7617 
7618                                 iovec->local_mem.ms_type = RSM_MEM_BUF;
7619                                 iovec->local_mem.ms_memory.bp = xbuf;
7620                         } else {
7621                                 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7622                                 iovec->local_mem.ms_memory.handle =
7623                                     ex_seg->s_handle.out;
7624                         }
7625                         ex_seg->s_rdmacnt++; /* refcnt the handle */
7626                         rsmseglock_release(ex_seg);
7627                 } else {
7628                         iovec->local_mem.ms_type = RSM_MEM_VADDR;
7629                         iovec->local_mem.ms_memory.vr.vaddr =
7630                             ka_iovec->local.vaddr;
7631                 }
7632 
7633                 iovec->local_offset = ka_iovec->local_offset;
7634                 iovec->remote_handle = im_seg->s_handle.in;
7635                 iovec->remote_offset = ka_iovec->remote_offset;
7636                 iovec->transfer_length = ka_iovec->transfer_len;
7637                 iovec++;
7638                 ka_iovec++;
7639         }
7640 
7641         if (iov_proc <  sg_io.io_request_count) {
7642                 /* error while processing handle */
7643                 rsmseglock_acquire(im_seg);
7644                 im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7645                 if (im_seg->s_rdmacnt == 0) {
7646                         cv_broadcast(&im_seg->s_cv);
7647                 }
7648                 rsmseglock_release(im_seg);
7649                 goto out;
7650         }
7651 
7652         /* call rsmpi */
7653         if (cmd == RSM_IOCTL_PUTV)
7654                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7655                     im_seg->s_adapter->rsmpi_handle,
7656                     &rsmpi_sg_io);
7657         else if (cmd == RSM_IOCTL_GETV)
7658                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7659                     im_seg->s_adapter->rsmpi_handle,
7660                     &rsmpi_sg_io);
7661         else {
7662                 e = EINVAL;
7663                 DBG_PRINTF((category, RSM_DEBUG,
7664                     "iovec_ioctl: bad command = %x\n", cmd));
7665         }
7666 
7667 
7668         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7669             "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7670 
7671         sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7672 
7673         /*
7674          * Check for implicit signal post flag and do the signal
7675          * post if needed
7676          */
7677         if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7678             e == RSM_SUCCESS) {
7679                 rsmipc_request_t request;
7680 
7681                 request.rsmipc_key = im_seg->s_segid;
7682                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7683                 request.rsmipc_segment_cookie = NULL;
7684                 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7685                 /*
7686                  * Reset the implicit signal post flag to 0 to indicate
7687                  * that the signal post has been done and need not be
7688                  * done in the RSMAPI library
7689                  */
7690                 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7691         }
7692 
7693         rsmseglock_acquire(im_seg);
7694         im_seg->s_rdmacnt--;
7695         if (im_seg->s_rdmacnt == 0) {
7696                 cv_broadcast(&im_seg->s_cv);
7697         }
7698         rsmseglock_release(im_seg);
7699         error = sgio_resid_copyout(arg, &sg_io, mode);
7700 out:
7701         iovec = iovec_start;
7702         ka_iovec = ka_iovec_start;
7703         for (i = 0; i < iov_proc; i++) {
7704                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7705                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7706 
7707                         ASSERT(ex_seg != NULL);
7708                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7709 
7710                         ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7711                         if (ex_seg->s_rdmacnt == 0) {
7712                                 cv_broadcast(&ex_seg->s_cv);
7713                         }
7714                         rsmseglock_release(ex_seg);
7715                 }
7716 
7717                 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7718 
7719                 /*
7720                  * At present there is no dependency on the existence of xbufs
7721                  * created by ddi_umem_iosetup for each of the iovecs. So we
7722                  * can these xbufs here.
7723                  */
7724                 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7725                         freerbuf(iovec->local_mem.ms_memory.bp);
7726                 }
7727 
7728                 iovec++;
7729                 ka_iovec++;
7730         }
7731 
7732         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7733                 if (iovec_start)
7734                         kmem_free(iovec_start, size);
7735                 kmem_free(ka_iovec_start, ka_size);
7736         }
7737 
7738         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7739             "rsm_iovec_ioctl done %d\n", e));
7740         /* if RSMPI call fails return that else return copyout's retval */
7741         return ((e != RSM_SUCCESS) ? e : error);
7742 
7743 }
7744 
7745 
7746 static int
7747 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7748 {
7749         adapter_t       *adapter;
7750         rsm_addr_t      addr;
7751         rsm_node_id_t   node;
7752         int             rval = DDI_SUCCESS;
7753         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7754 
7755         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7756 
7757         adapter =  rsm_getadapter(msg, mode);
7758         if (adapter == NULL) {
7759                 DBG_PRINTF((category, RSM_DEBUG,
7760                     "rsmaddr_ioctl done: adapter not found\n"));
7761                 return (RSMERR_CTLR_NOT_PRESENT);
7762         }
7763 
7764         switch (cmd) {
7765         case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7766                 /* returns the hwaddr in msg->hwaddr */
7767                 if (msg->nodeid == my_nodeid) {
7768                         msg->hwaddr = adapter->hwaddr;
7769                 } else {
7770                         addr = get_remote_hwaddr(adapter, msg->nodeid);
7771                         if ((int64_t)addr < 0) {
7772                                 rval = RSMERR_INTERNAL_ERROR;
7773                         } else {
7774                                 msg->hwaddr = addr;
7775                         }
7776                 }
7777                 break;
7778         case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7779                 /* returns the nodeid in msg->nodeid */
7780                 if (msg->hwaddr == adapter->hwaddr) {
7781                         msg->nodeid = my_nodeid;
7782                 } else {
7783                         node = get_remote_nodeid(adapter, msg->hwaddr);
7784                         if ((int)node < 0) {
7785                                 rval = RSMERR_INTERNAL_ERROR;
7786                         } else {
7787                                 msg->nodeid = (rsm_node_id_t)node;
7788                         }
7789                 }
7790                 break;
7791         default:
7792                 rval = EINVAL;
7793                 break;
7794         }
7795 
7796         rsmka_release_adapter(adapter);
7797         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7798             "rsmaddr_ioctl done: %d\n", rval));
7799         return (rval);
7800 }
7801 
7802 static int
7803 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7804 {
7805         DBG_DEFINE(category,
7806             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7807 
7808         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7809 
7810 #ifdef _MULTI_DATAMODEL
7811 
7812         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7813                 rsm_ioctlmsg32_t msg32;
7814                 int i;
7815 
7816                 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7817                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7818                             "rsm_ddi_copyin done: EFAULT\n"));
7819                         return (RSMERR_BAD_ADDR);
7820                 }
7821                 msg->len = msg32.len;
7822                 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7823                 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7824                 msg->key = msg32.key;
7825                 msg->acl_len = msg32.acl_len;
7826                 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7827                 msg->cnum = msg32.cnum;
7828                 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7829                 msg->cname_len = msg32.cname_len;
7830                 msg->nodeid = msg32.nodeid;
7831                 msg->hwaddr = msg32.hwaddr;
7832                 msg->perm = msg32.perm;
7833                 for (i = 0; i < 4; i++) {
7834                         msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7835                 }
7836                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7837                     "rsm_ddi_copyin done\n"));
7838                 return (RSM_SUCCESS);
7839         }
7840 #endif
7841         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7842         if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7843                 return (RSMERR_BAD_ADDR);
7844         else
7845                 return (RSM_SUCCESS);
7846 }
7847 
7848 static int
7849 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7850 {
7851         rsmka_int_controller_attr_t     rsm_cattr;
7852         DBG_DEFINE(category,
7853             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7854 
7855         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7856             "rsmattr_ddi_copyout enter\n"));
7857         /*
7858          * need to copy appropriate data from rsm_controller_attr_t
7859          * to rsmka_int_controller_attr_t
7860          */
7861 #ifdef  _MULTI_DATAMODEL
7862         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7863                 rsmka_int_controller_attr32_t rsm_cattr32;
7864 
7865                 rsm_cattr32.attr_direct_access_sizes =
7866                     adapter->rsm_attr.attr_direct_access_sizes;
7867                 rsm_cattr32.attr_atomic_sizes =
7868                     adapter->rsm_attr.attr_atomic_sizes;
7869                 rsm_cattr32.attr_page_size =
7870                     adapter->rsm_attr.attr_page_size;
7871                 if (adapter->rsm_attr.attr_max_export_segment_size >
7872                     UINT_MAX)
7873                         rsm_cattr32.attr_max_export_segment_size =
7874                             RSM_MAXSZ_PAGE_ALIGNED;
7875                 else
7876                         rsm_cattr32.attr_max_export_segment_size =
7877                             adapter->rsm_attr.attr_max_export_segment_size;
7878                 if (adapter->rsm_attr.attr_tot_export_segment_size >
7879                     UINT_MAX)
7880                         rsm_cattr32.attr_tot_export_segment_size =
7881                             RSM_MAXSZ_PAGE_ALIGNED;
7882                 else
7883                         rsm_cattr32.attr_tot_export_segment_size =
7884                             adapter->rsm_attr.attr_tot_export_segment_size;
7885                 if (adapter->rsm_attr.attr_max_export_segments >
7886                     UINT_MAX)
7887                         rsm_cattr32.attr_max_export_segments =
7888                             UINT_MAX;
7889                 else
7890                         rsm_cattr32.attr_max_export_segments =
7891                             adapter->rsm_attr.attr_max_export_segments;
7892                 if (adapter->rsm_attr.attr_max_import_map_size >
7893                     UINT_MAX)
7894                         rsm_cattr32.attr_max_import_map_size =
7895                             RSM_MAXSZ_PAGE_ALIGNED;
7896                 else
7897                         rsm_cattr32.attr_max_import_map_size =
7898                             adapter->rsm_attr.attr_max_import_map_size;
7899                 if (adapter->rsm_attr.attr_tot_import_map_size >
7900                     UINT_MAX)
7901                         rsm_cattr32.attr_tot_import_map_size =
7902                             RSM_MAXSZ_PAGE_ALIGNED;
7903                 else
7904                         rsm_cattr32.attr_tot_import_map_size =
7905                             adapter->rsm_attr.attr_tot_import_map_size;
7906                 if (adapter->rsm_attr.attr_max_import_segments >
7907                     UINT_MAX)
7908                         rsm_cattr32.attr_max_import_segments =
7909                             UINT_MAX;
7910                 else
7911                         rsm_cattr32.attr_max_import_segments =
7912                             adapter->rsm_attr.attr_max_import_segments;
7913                 rsm_cattr32.attr_controller_addr =
7914                     adapter->rsm_attr.attr_controller_addr;
7915 
7916                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7917                     "rsmattr_ddi_copyout done\n"));
7918                 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7919                     sizeof (rsmka_int_controller_attr32_t), mode)) {
7920                         return (RSMERR_BAD_ADDR);
7921                 }
7922                 else
7923                         return (RSM_SUCCESS);
7924         }
7925 #endif
7926         rsm_cattr.attr_direct_access_sizes =
7927             adapter->rsm_attr.attr_direct_access_sizes;
7928         rsm_cattr.attr_atomic_sizes =
7929             adapter->rsm_attr.attr_atomic_sizes;
7930         rsm_cattr.attr_page_size =
7931             adapter->rsm_attr.attr_page_size;
7932         rsm_cattr.attr_max_export_segment_size =
7933             adapter->rsm_attr.attr_max_export_segment_size;
7934         rsm_cattr.attr_tot_export_segment_size =
7935             adapter->rsm_attr.attr_tot_export_segment_size;
7936         rsm_cattr.attr_max_export_segments =
7937             adapter->rsm_attr.attr_max_export_segments;
7938         rsm_cattr.attr_max_import_map_size =
7939             adapter->rsm_attr.attr_max_import_map_size;
7940         rsm_cattr.attr_tot_import_map_size =
7941             adapter->rsm_attr.attr_tot_import_map_size;
7942         rsm_cattr.attr_max_import_segments =
7943             adapter->rsm_attr.attr_max_import_segments;
7944         rsm_cattr.attr_controller_addr =
7945             adapter->rsm_attr.attr_controller_addr;
7946         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7947             "rsmattr_ddi_copyout done\n"));
7948         if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7949             sizeof (rsmka_int_controller_attr_t), mode)) {
7950                 return (RSMERR_BAD_ADDR);
7951         }
7952         else
7953                 return (RSM_SUCCESS);
7954 }
7955 
7956 /*ARGSUSED*/
7957 static int
7958 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7959     int *rvalp)
7960 {
7961         rsmseg_t *seg;
7962         rsmresource_t   *res;
7963         minor_t         rnum;
7964         rsm_ioctlmsg_t msg = {0};
7965         int error;
7966         adapter_t *adapter;
7967         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7968 
7969         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7970 
7971         if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7972                 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7973                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7974                     "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7975                 return (error);
7976         }
7977 
7978         /* topology cmd does not use the arg common to other cmds */
7979         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7980                 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7981                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7982                     "rsm_ioctl done: %d\n", error));
7983                 return (error);
7984         }
7985 
7986         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7987                 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7988                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7989                     "rsm_ioctl done: %d\n", error));
7990                 return (error);
7991         }
7992 
7993         /*
7994          * try to load arguments
7995          */
7996         if (cmd != RSM_IOCTL_RING_BELL &&
7997             rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7998                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7999                     "rsm_ioctl done: EFAULT\n"));
8000                 return (RSMERR_BAD_ADDR);
8001         }
8002 
8003         if (cmd == RSM_IOCTL_ATTR) {
8004                 adapter =  rsm_getadapter(&msg, mode);
8005                 if (adapter == NULL) {
8006                         DBG_PRINTF((category, RSM_DEBUG,
8007                             "rsm_ioctl done: ENODEV\n"));
8008                         return (RSMERR_CTLR_NOT_PRESENT);
8009                 }
8010                 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8011                 rsmka_release_adapter(adapter);
8012                 DBG_PRINTF((category, RSM_DEBUG,
8013                     "rsm_ioctl:after copyout %d\n", error));
8014                 return (error);
8015         }
8016 
8017         if (cmd == RSM_IOCTL_BAR_INFO) {
8018                 /* Return library off,len of barrier page */
8019                 msg.off = barrier_offset;
8020                 msg.len = (int)barrier_size;
8021 #ifdef _MULTI_DATAMODEL
8022                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8023                         rsm_ioctlmsg32_t msg32;
8024 
8025                         if (msg.len > UINT_MAX)
8026                                 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8027                         else
8028                                 msg32.len = (int32_t)msg.len;
8029                         msg32.off = (int32_t)msg.off;
8030                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8031                             "rsm_ioctl done\n"));
8032                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8033                             sizeof (msg32), mode))
8034                                 return (RSMERR_BAD_ADDR);
8035                         else
8036                                 return (RSM_SUCCESS);
8037                 }
8038 #endif
8039                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8040                     "rsm_ioctl done\n"));
8041                 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8042                     sizeof (msg), mode))
8043                         return (RSMERR_BAD_ADDR);
8044                 else
8045                         return (RSM_SUCCESS);
8046         }
8047 
8048         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8049                 /* map the nodeid or hwaddr */
8050                 error = rsmaddr_ioctl(cmd, &msg, mode);
8051                 if (error == RSM_SUCCESS) {
8052 #ifdef _MULTI_DATAMODEL
8053                         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8054                                 rsm_ioctlmsg32_t msg32;
8055 
8056                                 msg32.hwaddr = (uint64_t)msg.hwaddr;
8057                                 msg32.nodeid = (uint32_t)msg.nodeid;
8058 
8059                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8060                                     "rsm_ioctl done\n"));
8061                                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8062                                     sizeof (msg32), mode))
8063                                         return (RSMERR_BAD_ADDR);
8064                                 else
8065                                         return (RSM_SUCCESS);
8066                         }
8067 #endif
8068                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8069                             "rsm_ioctl done\n"));
8070                         if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8071                             sizeof (msg), mode))
8072                                 return (RSMERR_BAD_ADDR);
8073                         else
8074                                 return (RSM_SUCCESS);
8075                 }
8076                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8077                     "rsm_ioctl done: %d\n", error));
8078                 return (error);
8079         }
8080 
8081         /* Find resource and look it in read mode */
8082         rnum = getminor(dev);
8083         res = rsmresource_lookup(rnum, RSM_NOLOCK);
8084         ASSERT(res != NULL);
8085 
8086         /*
8087          * Find command group
8088          */
8089         switch (RSM_IOCTL_CMDGRP(cmd)) {
8090         case RSM_IOCTL_EXPORT_SEG:
8091                 /*
8092                  * Export list is searched during publish, loopback and
8093                  * remote lookup call.
8094                  */
8095                 seg = rsmresource_seg(res, rnum, credp,
8096                     RSM_RESOURCE_EXPORT_SEGMENT);
8097                 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8098                         error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8099                             credp);
8100                 } else { /* export ioctl on an import/barrier resource */
8101                         error = RSMERR_BAD_SEG_HNDL;
8102                 }
8103                 break;
8104         case RSM_IOCTL_IMPORT_SEG:
8105                 /* Import list is searched during remote unmap call. */
8106                 seg = rsmresource_seg(res, rnum, credp,
8107                     RSM_RESOURCE_IMPORT_SEGMENT);
8108                 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8109                         error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8110                             credp);
8111                 } else  { /* import ioctl on an export/barrier resource */
8112                         error = RSMERR_BAD_SEG_HNDL;
8113                 }
8114                 break;
8115         case RSM_IOCTL_BAR:
8116                 if (res != RSMRC_RESERVED &&
8117                     res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8118                         error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8119                             mode);
8120                 } else { /* invalid res value */
8121                         error = RSMERR_BAD_SEG_HNDL;
8122                 }
8123                 break;
8124         case RSM_IOCTL_BELL:
8125                 if (res != RSMRC_RESERVED) {
8126                         if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8127                                 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8128                         else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8129                                 error = importbell_ioctl((rsmseg_t *)res, cmd);
8130                         else /* RSM_RESOURCE_BAR */
8131                                 error = RSMERR_BAD_SEG_HNDL;
8132                 } else { /* invalid res value */
8133                         error = RSMERR_BAD_SEG_HNDL;
8134                 }
8135                 break;
8136         default:
8137                 error = EINVAL;
8138         }
8139 
8140         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8141             error));
8142         return (error);
8143 }
8144 
8145 
8146 /* **************************** Segment Mapping Operations ********* */
8147 static rsm_mapinfo_t *
8148 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8149     size_t *map_len)
8150 {
8151         rsm_mapinfo_t   *p;
8152         /*
8153          * Find the correct mapinfo structure to use during the mapping
8154          * from the seg->s_mapinfo list.
8155          * The seg->s_mapinfo list contains in reverse order the mappings
8156          * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8157          * access the correct entry within this list for the mapping
8158          * requested.
8159          *
8160          * The algorithm for selecting a list entry is as follows:
8161          *
8162          * When start_offset of an entry <= off we have found the entry
8163          * we were looking for. Adjust the dev_offset and map_len (needs
8164          * to be PAGESIZE aligned).
8165          */
8166         p = seg->s_mapinfo;
8167         for (; p; p = p->next) {
8168                 if (p->start_offset <= off) {
8169                         *dev_offset = p->dev_offset + off - p->start_offset;
8170                         *map_len = (len > p->individual_len) ?
8171                             p->individual_len : ptob(btopr(len));
8172                         return (p);
8173                 }
8174                 p = p->next;
8175         }
8176 
8177         return (NULL);
8178 }
8179 
8180 static void
8181 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8182 {
8183         rsm_mapinfo_t *p;
8184 
8185         while (mapinfo != NULL) {
8186                 p = mapinfo;
8187                 mapinfo = mapinfo->next;
8188                 kmem_free(p, sizeof (*p));
8189         }
8190 }
8191 
8192 static int
8193 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8194     size_t len, void **pvtp)
8195 {
8196         rsmcookie_t     *p;
8197         rsmresource_t   *res;
8198         rsmseg_t        *seg;
8199         minor_t rnum;
8200         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8201 
8202         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8203 
8204         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8205             "rsmmap_map: dhp = %x\n", dhp));
8206 
8207         flags = flags;
8208 
8209         rnum = getminor(dev);
8210         res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8211         ASSERT(res != NULL);
8212 
8213         seg = (rsmseg_t *)res;
8214 
8215         rsmseglock_acquire(seg);
8216 
8217         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8218 
8219         /*
8220          * Allocate structure and add cookie to segment list
8221          */
8222         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8223 
8224         p->c_dhp = dhp;
8225         p->c_off = off;
8226         p->c_len = len;
8227         p->c_next = seg->s_ckl;
8228         seg->s_ckl = p;
8229 
8230         *pvtp = (void *)seg;
8231 
8232         rsmseglock_release(seg);
8233 
8234         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8235         return (DDI_SUCCESS);
8236 }
8237 
8238 /*
8239  * Page fault handling is done here. The prerequisite mapping setup
8240  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8241  * ddi_umem_setup
8242  */
8243 static int
8244 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8245     uint_t type, uint_t rw)
8246 {
8247         int e;
8248         rsmseg_t *seg = (rsmseg_t *)pvt;
8249         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8250 
8251         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8252 
8253         rsmseglock_acquire(seg);
8254 
8255         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8256 
8257         while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8258                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8259                         DBG_PRINTF((category, RSM_DEBUG,
8260                             "rsmmap_access done: cv_wait INTR"));
8261                         rsmseglock_release(seg);
8262                         return (RSMERR_INTERRUPTED);
8263                 }
8264         }
8265 
8266         ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8267             seg->s_state == RSM_STATE_ACTIVE);
8268 
8269         if (seg->s_state == RSM_STATE_DISCONNECT)
8270                 seg->s_flags |= RSM_IMPORT_DUMMY;
8271 
8272         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8273             "rsmmap_access: dhp = %x\n", dhp));
8274 
8275         rsmseglock_release(seg);
8276 
8277         if (e = devmap_load(dhp, offset, len, type, rw)) {
8278                 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8279         }
8280 
8281 
8282         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8283 
8284         return (e);
8285 }
8286 
8287 static int
8288 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8289     void **newpvt)
8290 {
8291         rsmseg_t        *seg = (rsmseg_t *)oldpvt;
8292         rsmcookie_t     *p, *old;
8293         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8294 
8295         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8296 
8297         /*
8298          * Same as map, create an entry to hold cookie and add it to
8299          * connect segment list. The oldpvt is a pointer to segment.
8300          * Return segment pointer in newpvt.
8301          */
8302         rsmseglock_acquire(seg);
8303 
8304         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8305 
8306         /*
8307          * Find old cookie
8308          */
8309         for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8310                 if (old->c_dhp == dhp) {
8311                         break;
8312                 }
8313         }
8314         if (old == NULL) {
8315                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8316                     "rsmmap_dup done: EINVAL\n"));
8317                 rsmseglock_release(seg);
8318                 return (EINVAL);
8319         }
8320 
8321         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8322 
8323         p->c_dhp = new_dhp;
8324         p->c_off = old->c_off;
8325         p->c_len = old->c_len;
8326         p->c_next = seg->s_ckl;
8327         seg->s_ckl = p;
8328 
8329         *newpvt = (void *)seg;
8330 
8331         rsmseglock_release(seg);
8332 
8333         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8334 
8335         return (DDI_SUCCESS);
8336 }
8337 
8338 static void
8339 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8340     devmap_cookie_t new_dhp1, void **pvtp1,
8341     devmap_cookie_t new_dhp2, void **pvtp2)
8342 {
8343         /*
8344          * Remove pvtp structure from segment list.
8345          */
8346         rsmseg_t        *seg = (rsmseg_t *)pvtp;
8347         int freeflag;
8348 
8349         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8350 
8351         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8352 
8353         off = off; len = len;
8354         pvtp1 = pvtp1; pvtp2 = pvtp2;
8355 
8356         rsmseglock_acquire(seg);
8357 
8358         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8359 
8360         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8361             "rsmmap_unmap: dhp = %x\n", dhp));
8362         /*
8363          * We can go ahead and remove the dhps even if we are in
8364          * the MAPPING state because the dhps being removed here
8365          * belong to a different mmap and we are holding the segment
8366          * lock.
8367          */
8368         if (new_dhp1 == NULL && new_dhp2 == NULL) {
8369                 /* find and remove dhp handle */
8370                 rsmcookie_t *tmp, **back = &seg->s_ckl;
8371 
8372                 while (*back != NULL) {
8373                         tmp = *back;
8374                         if (tmp->c_dhp == dhp) {
8375                                 *back = tmp->c_next;
8376                                 kmem_free(tmp, sizeof (*tmp));
8377                                 break;
8378                         }
8379                         back = &tmp->c_next;
8380                 }
8381         } else {
8382                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8383                     "rsmmap_unmap:parital unmap"
8384                     "new_dhp1 %lx, new_dhp2 %lx\n",
8385                     (size_t)new_dhp1, (size_t)new_dhp2));
8386         }
8387 
8388         /*
8389          * rsmmap_unmap is called for each mapping cookie on the list.
8390          * When the list becomes empty and we are not in the MAPPING
8391          * state then unmap in the rsmpi driver.
8392          */
8393         if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8394                 (void) rsm_unmap(seg);
8395 
8396         if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8397                 freeflag = 1;
8398         } else {
8399                 freeflag = 0;
8400         }
8401 
8402         rsmseglock_release(seg);
8403 
8404         if (freeflag) {
8405                 /* Free the segment structure */
8406                 rsmseg_free(seg);
8407         }
8408         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8409 
8410 }
8411 
8412 static struct devmap_callback_ctl rsmmap_ops = {
8413         DEVMAP_OPS_REV, /* devmap_ops version number    */
8414         rsmmap_map,     /* devmap_ops map routine */
8415         rsmmap_access,  /* devmap_ops access routine */
8416         rsmmap_dup,             /* devmap_ops dup routine               */
8417         rsmmap_unmap,   /* devmap_ops unmap routine */
8418 };
8419 
8420 static int
8421 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8422     size_t *maplen, uint_t model /*ARGSUSED*/)
8423 {
8424         struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8425         int             err;
8426         uint_t          maxprot;
8427         minor_t         rnum;
8428         rsmseg_t        *seg;
8429         off_t           dev_offset;
8430         size_t          cur_len;
8431         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8432 
8433         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8434 
8435         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8436             "rsm_devmap: off = %lx, len = %lx\n", off, len));
8437         rnum = getminor(dev);
8438         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8439         ASSERT(seg != NULL);
8440 
8441         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8442                 if ((off == barrier_offset) &&
8443                     (len == barrier_size)) {
8444 
8445                         ASSERT(bar_va != NULL && bar_cookie != NULL);
8446 
8447                         /*
8448                          * The offset argument in devmap_umem_setup represents
8449                          * the offset within the kernel memory defined by the
8450                          * cookie. We use this offset as barrier_offset.
8451                          */
8452                         err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8453                             barrier_offset, len, PROT_USER|PROT_READ,
8454                             DEVMAP_DEFAULTS, 0);
8455 
8456                         if (err != 0) {
8457                                 DBG_PRINTF((category, RSM_ERR,
8458                                     "rsm_devmap done: %d\n", err));
8459                                 return (RSMERR_MAP_FAILED);
8460                         }
8461                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8462                             "rsm_devmap done: %d\n", err));
8463 
8464                         *maplen = barrier_size;
8465 
8466                         return (err);
8467                 } else {
8468                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8469                             "rsm_devmap done: %d\n", err));
8470                         return (RSMERR_MAP_FAILED);
8471                 }
8472         }
8473 
8474         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8475         ASSERT(seg->s_state == RSM_STATE_MAPPING);
8476 
8477         /*
8478          * Make sure we still have permission for the map operation.
8479          */
8480         maxprot = PROT_USER;
8481         if (seg->s_mode & RSM_PERM_READ) {
8482                 maxprot |= PROT_READ;
8483         }
8484 
8485         if (seg->s_mode & RSM_PERM_WRITE) {
8486                 maxprot |= PROT_WRITE;
8487         }
8488 
8489         /*
8490          * For each devmap call, rsmmap_map is called. This maintains driver
8491          * private information for the mapping. Thus, if there are multiple
8492          * devmap calls there will be multiple rsmmap_map calls and for each
8493          * call, the mapping information will be stored.
8494          * In case of an error during the processing of the devmap call, error
8495          * will be returned. This error return causes the caller of rsm_devmap
8496          * to undo all the mappings by calling rsmmap_unmap for each one.
8497          * rsmmap_unmap will free up the private information for the requested
8498          * mapping.
8499          */
8500         if (seg->s_node != my_nodeid) {
8501                 rsm_mapinfo_t *p;
8502 
8503                 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8504                 if (p == NULL) {
8505                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8506                             "rsm_devmap: incorrect mapping info\n"));
8507                         return (RSMERR_MAP_FAILED);
8508                 }
8509                 err = devmap_devmem_setup(dhc, p->dip,
8510                     callbackops, p->dev_register,
8511                     dev_offset, cur_len, maxprot,
8512                     DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8513 
8514                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8515                     "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8516                     "off=%lx,len=%lx\n",
8517                     p->dip, p->dev_register, dev_offset, off, cur_len));
8518 
8519                 if (err != 0) {
8520                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8521                             "rsm_devmap: devmap_devmem_setup failed %d\n",
8522                             err));
8523                         return (RSMERR_MAP_FAILED);
8524                 }
8525                 /* cur_len is always an integral multiple pagesize */
8526                 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8527                 *maplen = cur_len;
8528                 return (err);
8529 
8530         } else {
8531                 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8532                     seg->s_cookie, off, len, maxprot,
8533                     DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8534                 if (err != 0) {
8535                         DBG_PRINTF((category, RSM_DEBUG,
8536                             "rsm_devmap: devmap_umem_setup failed %d\n",
8537                             err));
8538                         return (RSMERR_MAP_FAILED);
8539                 }
8540                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8541                     "rsm_devmap: loopback done\n"));
8542 
8543                 *maplen = ptob(btopr(len));
8544 
8545                 return (err);
8546         }
8547 }
8548 
8549 /*
8550  * We can use the devmap framework for mapping device memory to user space by
8551  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8552  * processing calls this entry point and devmap_setup is called within this
8553  * function, which eventually calls rsm_devmap
8554  */
8555 static int
8556 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8557     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8558 {
8559         int                     error = 0;
8560         int                     old_state;
8561         minor_t                 rnum;
8562         rsmseg_t                *seg, *eseg;
8563         adapter_t               *adapter;
8564         rsm_import_share_t      *sharedp;
8565         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8566 
8567         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8568 
8569         /*
8570          * find segment
8571          */
8572         rnum = getminor(dev);
8573         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8574 
8575         if (seg == NULL) {
8576                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8577                     "rsm_segmap done: invalid segment\n"));
8578                 return (EINVAL);
8579         }
8580 
8581         /*
8582          * the user is trying to map a resource that has not been
8583          * defined yet. The library uses this to map in the
8584          * barrier page.
8585          */
8586         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8587                 rsmseglock_release(seg);
8588 
8589                 /*
8590                  * The mapping for the barrier page is identified
8591                  * by the special offset barrier_offset
8592                  */
8593 
8594                 if (off == (off_t)barrier_offset ||
8595                     len == (off_t)barrier_size) {
8596                         if (bar_cookie == NULL || bar_va == NULL) {
8597                                 DBG_PRINTF((category, RSM_DEBUG,
8598                                     "rsm_segmap: bar cookie/va is NULL\n"));
8599                                 return (EINVAL);
8600                         }
8601 
8602                         error = devmap_setup(dev, (offset_t)off, as, addrp,
8603                             (size_t)len, prot, maxprot, flags,  cred);
8604 
8605                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8606                             "rsm_segmap done: %d\n", error));
8607                         return (error);
8608                 } else {
8609                         DBG_PRINTF((category, RSM_DEBUG,
8610                             "rsm_segmap: bad offset/length\n"));
8611                         return (EINVAL);
8612                 }
8613         }
8614 
8615         /* Make sure you can only map imported segments */
8616         if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8617                 rsmseglock_release(seg);
8618                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8619                     "rsm_segmap done: not an import segment\n"));
8620                 return (EINVAL);
8621         }
8622         /* check means library is broken */
8623         ASSERT(seg->s_hdr.rsmrc_num == rnum);
8624 
8625         /* wait for the segment to become unquiesced */
8626         while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8627                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8628                         rsmseglock_release(seg);
8629                         DBG_PRINTF((category, RSM_DEBUG,
8630                             "rsm_segmap done: cv_wait INTR"));
8631                         return (ENODEV);
8632                 }
8633         }
8634 
8635         /* wait until segment leaves the mapping state */
8636         while (seg->s_state == RSM_STATE_MAPPING)
8637                 cv_wait(&seg->s_cv, &seg->s_lock);
8638 
8639         /*
8640          * we allow multiple maps of the same segment in the KA
8641          * and it works because we do an rsmpi map of the whole
8642          * segment during the first map and all the device mapping
8643          * information needed in rsm_devmap is in the mapinfo list.
8644          */
8645         if ((seg->s_state != RSM_STATE_CONNECT) &&
8646             (seg->s_state != RSM_STATE_ACTIVE)) {
8647                 rsmseglock_release(seg);
8648                 DBG_PRINTF((category, RSM_DEBUG,
8649                     "rsm_segmap done: segment not connected\n"));
8650                 return (ENODEV);
8651         }
8652 
8653         /*
8654          * Make sure we are not mapping a larger segment than what's
8655          * exported
8656          */
8657         if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8658                 rsmseglock_release(seg);
8659                 DBG_PRINTF((category, RSM_DEBUG,
8660                     "rsm_segmap done: off+len>seg size\n"));
8661                 return (ENXIO);
8662         }
8663 
8664         /*
8665          * Make sure we still have permission for the map operation.
8666          */
8667         maxprot = PROT_USER;
8668         if (seg->s_mode & RSM_PERM_READ) {
8669                 maxprot |= PROT_READ;
8670         }
8671 
8672         if (seg->s_mode & RSM_PERM_WRITE) {
8673                 maxprot |= PROT_WRITE;
8674         }
8675 
8676         if ((prot & maxprot) != prot) {
8677                 /* No permission */
8678                 rsmseglock_release(seg);
8679                 DBG_PRINTF((category, RSM_DEBUG,
8680                     "rsm_segmap done: no permission\n"));
8681                 return (EACCES);
8682         }
8683 
8684         old_state = seg->s_state;
8685 
8686         ASSERT(seg->s_share != NULL);
8687 
8688         rsmsharelock_acquire(seg);
8689 
8690         sharedp = seg->s_share;
8691 
8692         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8693             "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8694 
8695         if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8696             (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8697                 rsmsharelock_release(seg);
8698                 rsmseglock_release(seg);
8699                 DBG_PRINTF((category, RSM_DEBUG,
8700                     "rsm_segmap done:RSMSI_STATE %d invalid\n",
8701                     sharedp->rsmsi_state));
8702                 return (ENODEV);
8703         }
8704 
8705         /*
8706          * Do the map - since we want importers to share mappings
8707          * we do the rsmpi map for the whole segment
8708          */
8709         if (seg->s_node != my_nodeid) {
8710                 uint_t dev_register;
8711                 off_t dev_offset;
8712                 dev_info_t *dip;
8713                 size_t tmp_len;
8714                 size_t total_length_mapped = 0;
8715                 size_t length_to_map = seg->s_len;
8716                 off_t tmp_off = 0;
8717                 rsm_mapinfo_t *p;
8718 
8719                 /*
8720                  * length_to_map = seg->s_len is always an integral
8721                  * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8722                  * list is a multiple of PAGESIZE - RSMPI map ensures this
8723                  */
8724 
8725                 adapter = seg->s_adapter;
8726                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8727                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8728 
8729                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8730                         error = 0;
8731                         /* map the whole segment */
8732                         while (total_length_mapped < seg->s_len) {
8733                                 tmp_len = 0;
8734 
8735                                 error = adapter->rsmpi_ops->rsm_map(
8736                                     seg->s_handle.in, tmp_off,
8737                                     length_to_map, &tmp_len,
8738                                     &dip, &dev_register, &dev_offset,
8739                                     NULL, NULL);
8740 
8741                                 if (error != 0)
8742                                         break;
8743 
8744                                 /*
8745                                  * Store the mapping info obtained from rsm_map
8746                                  */
8747                                 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8748                                 p->dev_register = dev_register;
8749                                 p->dev_offset = dev_offset;
8750                                 p->dip = dip;
8751                                 p->individual_len = tmp_len;
8752                                 p->start_offset = tmp_off;
8753                                 p->next = sharedp->rsmsi_mapinfo;
8754                                 sharedp->rsmsi_mapinfo = p;
8755 
8756                                 total_length_mapped += tmp_len;
8757                                 length_to_map -= tmp_len;
8758                                 tmp_off += tmp_len;
8759                         }
8760                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8761 
8762                         if (error != RSM_SUCCESS) {
8763                                 /* Check if this is the the first rsm_map */
8764                                 if (sharedp->rsmsi_mapinfo != NULL) {
8765                                         /*
8766                                          * A single rsm_unmap undoes
8767                                          * multiple rsm_maps.
8768                                          */
8769                                         (void) seg->s_adapter->rsmpi_ops->
8770                                             rsm_unmap(sharedp->rsmsi_handle);
8771                                         rsm_free_mapinfo(sharedp->
8772                                             rsmsi_mapinfo);
8773                                 }
8774                                 sharedp->rsmsi_mapinfo = NULL;
8775                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8776                                 rsmsharelock_release(seg);
8777                                 rsmseglock_release(seg);
8778                                 DBG_PRINTF((category, RSM_DEBUG,
8779                                     "rsm_segmap done: rsmpi map err %d\n",
8780                                     error));
8781                                 ASSERT(error != RSMERR_BAD_LENGTH &&
8782                                     error != RSMERR_BAD_MEM_ALIGNMENT &&
8783                                     error != RSMERR_BAD_SEG_HNDL);
8784                                 if (error == RSMERR_UNSUPPORTED_OPERATION)
8785                                         return (ENOTSUP);
8786                                 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8787                                         return (EAGAIN);
8788                                 else if (error == RSMERR_CONN_ABORTED)
8789                                         return (ENODEV);
8790                                 else
8791                                         return (error);
8792                         } else {
8793                                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8794                         }
8795                 } else {
8796                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8797                 }
8798 
8799                 sharedp->rsmsi_mapcnt++;
8800 
8801                 rsmsharelock_release(seg);
8802 
8803                 /* move to an intermediate mapping state */
8804                 seg->s_state = RSM_STATE_MAPPING;
8805                 rsmseglock_release(seg);
8806 
8807                 error = devmap_setup(dev, (offset_t)off, as, addrp,
8808                     len, prot, maxprot, flags, cred);
8809 
8810                 rsmseglock_acquire(seg);
8811                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8812 
8813                 if (error == DDI_SUCCESS) {
8814                         seg->s_state = RSM_STATE_ACTIVE;
8815                 } else {
8816                         rsmsharelock_acquire(seg);
8817 
8818                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8819 
8820                         sharedp->rsmsi_mapcnt--;
8821                         if (sharedp->rsmsi_mapcnt == 0) {
8822                                 /* unmap the shared RSMPI mapping */
8823                                 ASSERT(sharedp->rsmsi_handle != NULL);
8824                                 (void) adapter->rsmpi_ops->
8825                                     rsm_unmap(sharedp->rsmsi_handle);
8826                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8827                                 sharedp->rsmsi_mapinfo = NULL;
8828                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8829                         }
8830 
8831                         rsmsharelock_release(seg);
8832                         seg->s_state = old_state;
8833                         DBG_PRINTF((category, RSM_ERR,
8834                             "rsm: devmap_setup failed %d\n", error));
8835                 }
8836                 cv_broadcast(&seg->s_cv);
8837                 rsmseglock_release(seg);
8838                 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8839                     error));
8840                 return (error);
8841         } else {
8842                 /*
8843                  * For loopback, the export segment mapping cookie (s_cookie)
8844                  * is also used as the s_cookie value for its import segments
8845                  * during mapping.
8846                  * Note that reference counting for s_cookie of the export
8847                  * segment is not required due to the following:
8848                  * We never have a case of the export segment being destroyed,
8849                  * leaving the import segments with a stale value for the
8850                  * s_cookie field, since a force disconnect is done prior to a
8851                  * destroy of an export segment. The force disconnect causes
8852                  * the s_cookie value to be reset to NULL. Also for the
8853                  * rsm_rebind operation, we change the s_cookie value of the
8854                  * export segment as well as of all its local (loopback)
8855                  * importers.
8856                  */
8857                 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8858 
8859                 rsmsharelock_release(seg);
8860                 /*
8861                  * In order to maintain the lock ordering between the export
8862                  * and import segment locks, we need to acquire the export
8863                  * segment lock first and only then acquire the import
8864                  * segment lock.
8865                  * The above is necessary to avoid any deadlock scenarios
8866                  * with rsm_rebind which also acquires both the export
8867                  * and import segment locks in the above mentioned order.
8868                  * Based on code inspection, there seem to be no other
8869                  * situations in which both the export and import segment
8870                  * locks are acquired either in the same or opposite order
8871                  * as mentioned above.
8872                  * Thus in order to conform to the above lock order, we
8873                  * need to change the state of the import segment to
8874                  * RSM_STATE_MAPPING, release the lock. Once this is done we
8875                  * can now safely acquire the export segment lock first
8876                  * followed by the import segment lock which is as per
8877                  * the lock order mentioned above.
8878                  */
8879                 /* move to an intermediate mapping state */
8880                 seg->s_state = RSM_STATE_MAPPING;
8881                 rsmseglock_release(seg);
8882 
8883                 eseg = rsmexport_lookup(seg->s_key);
8884 
8885                 if (eseg == NULL) {
8886                         rsmseglock_acquire(seg);
8887                         /*
8888                          * Revert to old_state and signal any waiters
8889                          * The shared state is not changed
8890                          */
8891 
8892                         seg->s_state = old_state;
8893                         cv_broadcast(&seg->s_cv);
8894                         rsmseglock_release(seg);
8895                         DBG_PRINTF((category, RSM_DEBUG,
8896                             "rsm_segmap done: key %d not found\n", seg->s_key));
8897                         return (ENODEV);
8898                 }
8899 
8900                 rsmsharelock_acquire(seg);
8901                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8902                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8903 
8904                 sharedp->rsmsi_mapcnt++;
8905                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8906                 rsmsharelock_release(seg);
8907 
8908                 ASSERT(eseg->s_cookie != NULL);
8909 
8910                 /*
8911                  * It is not required or necessary to acquire the import
8912                  * segment lock here to change the value of s_cookie since
8913                  * no one will touch the import segment as long as it is
8914                  * in the RSM_STATE_MAPPING state.
8915                  */
8916                 seg->s_cookie = eseg->s_cookie;
8917 
8918                 rsmseglock_release(eseg);
8919 
8920                 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8921                     prot, maxprot, flags, cred);
8922 
8923                 rsmseglock_acquire(seg);
8924                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8925                 if (error == 0) {
8926                         seg->s_state = RSM_STATE_ACTIVE;
8927                 } else {
8928                         rsmsharelock_acquire(seg);
8929 
8930                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8931 
8932                         sharedp->rsmsi_mapcnt--;
8933                         if (sharedp->rsmsi_mapcnt == 0) {
8934                                 sharedp->rsmsi_mapinfo = NULL;
8935                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8936                         }
8937                         rsmsharelock_release(seg);
8938                         seg->s_state = old_state;
8939                         seg->s_cookie = NULL;
8940                 }
8941                 cv_broadcast(&seg->s_cv);
8942                 rsmseglock_release(seg);
8943                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8944                     "rsm_segmap done: %d\n", error));
8945                 return (error);
8946         }
8947 }
8948 
8949 int
8950 rsmka_null_seg_create(
8951     rsm_controller_handle_t argcp,
8952     rsm_memseg_export_handle_t *handle,
8953     size_t size,
8954     uint_t flags,
8955     rsm_memory_local_t *memory,
8956     rsm_resource_callback_t callback,
8957     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8958 {
8959         return (RSM_SUCCESS);
8960 }
8961 
8962 
8963 int
8964 rsmka_null_seg_destroy(
8965     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
8966 {
8967         return (RSM_SUCCESS);
8968 }
8969 
8970 
8971 int
8972 rsmka_null_bind(
8973     rsm_memseg_export_handle_t argmemseg,
8974     off_t offset,
8975     rsm_memory_local_t *argmemory,
8976     rsm_resource_callback_t callback,
8977     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8978 {
8979         return (RSM_SUCCESS);
8980 }
8981 
8982 
8983 int
8984 rsmka_null_unbind(
8985     rsm_memseg_export_handle_t argmemseg,
8986     off_t offset,
8987     size_t length       /*ARGSUSED*/)
8988 {
8989         return (DDI_SUCCESS);
8990 }
8991 
8992 int
8993 rsmka_null_rebind(
8994     rsm_memseg_export_handle_t argmemseg,
8995     off_t offset,
8996     rsm_memory_local_t *memory,
8997     rsm_resource_callback_t callback,
8998     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8999 {
9000         return (RSM_SUCCESS);
9001 }
9002 
9003 int
9004 rsmka_null_publish(
9005     rsm_memseg_export_handle_t argmemseg,
9006     rsm_access_entry_t access_list[],
9007     uint_t access_list_length,
9008     rsm_memseg_id_t segment_id,
9009     rsm_resource_callback_t callback,
9010     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9011 {
9012         return (RSM_SUCCESS);
9013 }
9014 
9015 
9016 int
9017 rsmka_null_republish(
9018     rsm_memseg_export_handle_t memseg,
9019     rsm_access_entry_t access_list[],
9020     uint_t access_list_length,
9021     rsm_resource_callback_t callback,
9022     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9023 {
9024         return (RSM_SUCCESS);
9025 }
9026 
9027 int
9028 rsmka_null_unpublish(
9029     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
9030 {
9031         return (RSM_SUCCESS);
9032 }
9033 
9034 
9035 void
9036 rsmka_init_loopback()
9037 {
9038         rsm_ops_t       *ops = &null_rsmpi_ops;
9039         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9040 
9041         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9042             "rsmka_init_loopback enter\n"));
9043 
9044         /* initialize null ops vector */
9045         ops->rsm_seg_create = rsmka_null_seg_create;
9046         ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9047         ops->rsm_bind = rsmka_null_bind;
9048         ops->rsm_unbind = rsmka_null_unbind;
9049         ops->rsm_rebind = rsmka_null_rebind;
9050         ops->rsm_publish = rsmka_null_publish;
9051         ops->rsm_unpublish = rsmka_null_unpublish;
9052         ops->rsm_republish = rsmka_null_republish;
9053 
9054         /* initialize attributes for loopback adapter */
9055         loopback_attr.attr_name = loopback_str;
9056         loopback_attr.attr_page_size = 0x8; /* 8K */
9057 
9058         /* initialize loopback adapter */
9059         loopback_adapter.rsm_attr = loopback_attr;
9060         loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9061         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9062             "rsmka_init_loopback done\n"));
9063 }
9064 
9065 /* ************** DR functions ********************************** */
9066 static void
9067 rsm_quiesce_exp_seg(rsmresource_t *resp)
9068 {
9069         int             recheck_state;
9070         rsmseg_t        *segp = (rsmseg_t *)resp;
9071         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9072         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9073 
9074         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9075             "%s enter: key=%u\n", function, segp->s_key));
9076 
9077         rsmseglock_acquire(segp);
9078         do {
9079                 recheck_state = 0;
9080                 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9081                     (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9082                     (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9083                     (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9084                         rsmseglock_release(segp);
9085                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9086                             "%s done:state =%d\n", function,
9087                             segp->s_state));
9088                         return;
9089                 }
9090 
9091                 if (segp->s_state == RSM_STATE_NEW) {
9092                         segp->s_state = RSM_STATE_NEW_QUIESCED;
9093                         rsmseglock_release(segp);
9094                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9095                             "%s done:state =%d\n", function,
9096                             segp->s_state));
9097                         return;
9098                 }
9099 
9100                 if (segp->s_state == RSM_STATE_BIND) {
9101                         /* unbind */
9102                         (void) rsm_unbind_pages(segp);
9103                         segp->s_state = RSM_STATE_BIND_QUIESCED;
9104                         rsmseglock_release(segp);
9105                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9106                             "%s done:state =%d\n", function,
9107                             segp->s_state));
9108                         return;
9109                 }
9110 
9111                 if (segp->s_state == RSM_STATE_EXPORT) {
9112                         /*
9113                          * wait for putv/getv to complete if the segp is
9114                          * a local memory handle
9115                          */
9116                         while ((segp->s_state == RSM_STATE_EXPORT) &&
9117                             (segp->s_rdmacnt != 0)) {
9118                                 cv_wait(&segp->s_cv, &segp->s_lock);
9119                         }
9120 
9121                         if (segp->s_state != RSM_STATE_EXPORT) {
9122                                 /*
9123                                  * state changed need to see what it
9124                                  * should be changed to.
9125                                  */
9126                                 recheck_state = 1;
9127                                 continue;
9128                         }
9129 
9130                         segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9131                         rsmseglock_release(segp);
9132                         /*
9133                          * send SUSPEND messages - currently it will be
9134                          * done at the end
9135                          */
9136                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9137                             "%s done:state =%d\n", function,
9138                             segp->s_state));
9139                         return;
9140                 }
9141         } while (recheck_state);
9142 
9143         rsmseglock_release(segp);
9144 }
9145 
9146 static void
9147 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9148 {
9149         int                     ret;
9150         rsmseg_t                *segp = (rsmseg_t *)resp;
9151         rsmapi_access_entry_t   *acl;
9152         rsm_access_entry_t      *rsmpi_acl;
9153         int                     acl_len;
9154         int                     create_flags = 0;
9155         struct buf              *xbuf;
9156         rsm_memory_local_t      mem;
9157         adapter_t               *adapter;
9158         dev_t                   sdev = 0;
9159         rsm_resource_callback_t callback_flag;
9160         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9161         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9162 
9163         rsmseglock_acquire(segp);
9164 
9165         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9166             "%s enter: key=%u, state=%d\n", function, segp->s_key,
9167             segp->s_state));
9168 
9169         if ((segp->s_state == RSM_STATE_NEW) ||
9170             (segp->s_state == RSM_STATE_BIND) ||
9171             (segp->s_state == RSM_STATE_EXPORT)) {
9172                 rsmseglock_release(segp);
9173                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9174                     function, segp->s_state));
9175                 return;
9176         }
9177 
9178         if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9179                 segp->s_state = RSM_STATE_NEW;
9180                 cv_broadcast(&segp->s_cv);
9181                 rsmseglock_release(segp);
9182                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9183                     function, segp->s_state));
9184                 return;
9185         }
9186 
9187         if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9188                 /* bind the segment */
9189                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9190                     segp->s_len, segp->s_proc);
9191                 if (ret == RSM_SUCCESS) { /* bind successful */
9192                         segp->s_state = RSM_STATE_BIND;
9193                 } else { /* bind failed - resource unavailable */
9194                         segp->s_state = RSM_STATE_NEW;
9195                 }
9196                 cv_broadcast(&segp->s_cv);
9197                 rsmseglock_release(segp);
9198                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9199                     "%s done: bind_qscd bind = %d\n", function, ret));
9200                 return;
9201         }
9202 
9203         while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9204                 /* wait for the segment to move to EXPORT_QUIESCED state */
9205                 cv_wait(&segp->s_cv, &segp->s_lock);
9206         }
9207 
9208         if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9209                 /* bind the segment */
9210                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9211                     segp->s_len, segp->s_proc);
9212 
9213                 if (ret != RSM_SUCCESS) {
9214                         /* bind failed - resource unavailable */
9215                         acl_len = segp->s_acl_len;
9216                         acl = segp->s_acl;
9217                         rsmpi_acl = segp->s_acl_in;
9218                         segp->s_acl_len = 0;
9219                         segp->s_acl = NULL;
9220                         segp->s_acl_in = NULL;
9221                         rsmseglock_release(segp);
9222 
9223                         rsmexport_rm(segp);
9224                         rsmacl_free(acl, acl_len);
9225                         rsmpiacl_free(rsmpi_acl, acl_len);
9226 
9227                         rsmseglock_acquire(segp);
9228                         segp->s_state = RSM_STATE_NEW;
9229                         cv_broadcast(&segp->s_cv);
9230                         rsmseglock_release(segp);
9231                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9232                             "%s done: exp_qscd bind failed = %d\n",
9233                             function, ret));
9234                         return;
9235                 }
9236                 /*
9237                  * publish the segment
9238                  * if  successful
9239                  *   segp->s_state = RSM_STATE_EXPORT;
9240                  * else failed
9241                  *   segp->s_state = RSM_STATE_BIND;
9242                  */
9243 
9244                 /* check whether it is a local_memory_handle */
9245                 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9246                         if ((segp->s_acl[0].ae_node == my_nodeid) &&
9247                             (segp->s_acl[0].ae_permission == 0)) {
9248                                 segp->s_state = RSM_STATE_EXPORT;
9249                                 cv_broadcast(&segp->s_cv);
9250                                 rsmseglock_release(segp);
9251                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9252                                     "%s done:exp_qscd\n", function));
9253                                 return;
9254                         }
9255                 }
9256                 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9257                     sdev, 0, NULL, DDI_UMEM_SLEEP);
9258                 ASSERT(xbuf != NULL);
9259 
9260                 mem.ms_type = RSM_MEM_BUF;
9261                 mem.ms_bp = xbuf;
9262 
9263                 adapter = segp->s_adapter;
9264 
9265                 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9266                         create_flags = RSM_ALLOW_UNBIND_REBIND;
9267                 }
9268 
9269                 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9270                         callback_flag  = RSM_RESOURCE_DONTWAIT;
9271                 } else {
9272                         callback_flag  = RSM_RESOURCE_SLEEP;
9273                 }
9274 
9275                 ret = adapter->rsmpi_ops->rsm_seg_create(
9276                     adapter->rsmpi_handle, &segp->s_handle.out,
9277                     segp->s_len, create_flags, &mem,
9278                     callback_flag, NULL);
9279 
9280                 if (ret != RSM_SUCCESS) {
9281                         acl_len = segp->s_acl_len;
9282                         acl = segp->s_acl;
9283                         rsmpi_acl = segp->s_acl_in;
9284                         segp->s_acl_len = 0;
9285                         segp->s_acl = NULL;
9286                         segp->s_acl_in = NULL;
9287                         rsmseglock_release(segp);
9288 
9289                         rsmexport_rm(segp);
9290                         rsmacl_free(acl, acl_len);
9291                         rsmpiacl_free(rsmpi_acl, acl_len);
9292 
9293                         rsmseglock_acquire(segp);
9294                         segp->s_state = RSM_STATE_BIND;
9295                         cv_broadcast(&segp->s_cv);
9296                         rsmseglock_release(segp);
9297                         DBG_PRINTF((category, RSM_ERR,
9298                             "%s done: exp_qscd create failed = %d\n",
9299                             function, ret));
9300                         return;
9301                 }
9302 
9303                 ret = adapter->rsmpi_ops->rsm_publish(
9304                     segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9305                     segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9306 
9307                 if (ret != RSM_SUCCESS) {
9308                         acl_len = segp->s_acl_len;
9309                         acl = segp->s_acl;
9310                         rsmpi_acl = segp->s_acl_in;
9311                         segp->s_acl_len = 0;
9312                         segp->s_acl = NULL;
9313                         segp->s_acl_in = NULL;
9314                         adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9315                         rsmseglock_release(segp);
9316 
9317                         rsmexport_rm(segp);
9318                         rsmacl_free(acl, acl_len);
9319                         rsmpiacl_free(rsmpi_acl, acl_len);
9320 
9321                         rsmseglock_acquire(segp);
9322                         segp->s_state = RSM_STATE_BIND;
9323                         cv_broadcast(&segp->s_cv);
9324                         rsmseglock_release(segp);
9325                         DBG_PRINTF((category, RSM_ERR,
9326                             "%s done: exp_qscd publish failed = %d\n",
9327                             function, ret));
9328                         return;
9329                 }
9330 
9331                 segp->s_state = RSM_STATE_EXPORT;
9332                 cv_broadcast(&segp->s_cv);
9333                 rsmseglock_release(segp);
9334                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9335                     function));
9336                 return;
9337         }
9338 
9339         rsmseglock_release(segp);
9340 
9341         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9342 }
9343 
9344 static void
9345 rsm_quiesce_imp_seg(rsmresource_t *resp)
9346 {
9347         rsmseg_t        *segp = (rsmseg_t *)resp;
9348         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9349         DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9350 
9351         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9352             "%s enter: key=%u\n", function, segp->s_key));
9353 
9354         rsmseglock_acquire(segp);
9355         segp->s_flags |= RSM_DR_INPROGRESS;
9356 
9357         while (segp->s_rdmacnt != 0) {
9358                 /* wait for the RDMA to complete */
9359                 cv_wait(&segp->s_cv, &segp->s_lock);
9360         }
9361 
9362         rsmseglock_release(segp);
9363 
9364         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9365 
9366 }
9367 
9368 static void
9369 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9370 {
9371         rsmseg_t        *segp = (rsmseg_t *)resp;
9372         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9373         DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9374 
9375         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9376             "%s enter: key=%u\n", function, segp->s_key));
9377 
9378         rsmseglock_acquire(segp);
9379 
9380         segp->s_flags &= ~RSM_DR_INPROGRESS;
9381         /* wake up any waiting putv/getv ops */
9382         cv_broadcast(&segp->s_cv);
9383 
9384         rsmseglock_release(segp);
9385 
9386         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9387 
9388 
9389 }
9390 
9391 static void
9392 rsm_process_exp_seg(rsmresource_t *resp, int event)
9393 {
9394         if (event == RSM_DR_QUIESCE)
9395                 rsm_quiesce_exp_seg(resp);
9396         else /* UNQUIESCE */
9397                 rsm_unquiesce_exp_seg(resp);
9398 }
9399 
9400 static void
9401 rsm_process_imp_seg(rsmresource_t *resp, int event)
9402 {
9403         if (event == RSM_DR_QUIESCE)
9404                 rsm_quiesce_imp_seg(resp);
9405         else /* UNQUIESCE */
9406                 rsm_unquiesce_imp_seg(resp);
9407 }
9408 
9409 static void
9410 rsm_dr_process_local_segments(int event)
9411 {
9412 
9413         int i, j;
9414         rsmresource_blk_t       *blk;
9415         rsmresource_t           *p;
9416         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9417 
9418         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9419             "rsm_dr_process_local_segments enter\n"));
9420 
9421         /* iterate through the resource structure */
9422 
9423         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9424 
9425         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9426                 blk = rsm_resource.rsmrc_root[i];
9427                 if (blk != NULL) {
9428                         for (j = 0; j < RSMRC_BLKSZ; j++) {
9429                                 p = blk->rsmrcblk_blks[j];
9430                                 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9431                                         /* valid resource */
9432                                         if (p->rsmrc_type ==
9433                                             RSM_RESOURCE_EXPORT_SEGMENT)
9434                                                 rsm_process_exp_seg(p, event);
9435                                         else if (p->rsmrc_type ==
9436                                             RSM_RESOURCE_IMPORT_SEGMENT)
9437                                                 rsm_process_imp_seg(p, event);
9438                                 }
9439                         }
9440                 }
9441         }
9442 
9443         rw_exit(&rsm_resource.rsmrc_lock);
9444 
9445         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9446             "rsm_dr_process_local_segments done\n"));
9447 }
9448 
9449 /* *************** DR callback functions ************ */
9450 static void
9451 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9452 {
9453         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9454         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9455             "rsm_dr_callback_post_add is a no-op\n"));
9456         /* Noop */
9457 }
9458 
9459 static int
9460 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9461 {
9462         int     recheck_state = 0;
9463         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9464 
9465         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9466             "rsm_dr_callback_pre_del enter\n"));
9467 
9468         mutex_enter(&rsm_drv_data.drv_lock);
9469 
9470         do {
9471                 recheck_state = 0;
9472                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9473                     "rsm_dr_callback_pre_del:state=%d\n",
9474                     rsm_drv_data.drv_state));
9475 
9476                 switch (rsm_drv_data.drv_state) {
9477                 case RSM_DRV_NEW:
9478                         /*
9479                          * The state should usually never be RSM_DRV_NEW
9480                          * since in this state the callbacks have not yet
9481                          * been registered. So, ASSERT.
9482                          */
9483                         ASSERT(0);
9484                         return (0);
9485                 case RSM_DRV_REG_PROCESSING:
9486                         /*
9487                          * The driver is in the process of registering
9488                          * with the DR framework. So, wait till the
9489                          * registration process is complete.
9490                          */
9491                         recheck_state = 1;
9492                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9493                         break;
9494                 case RSM_DRV_UNREG_PROCESSING:
9495                         /*
9496                          * If the state is RSM_DRV_UNREG_PROCESSING, the
9497                          * module is in the process of detaching and
9498                          * unregistering the callbacks from the DR
9499                          * framework. So, simply return.
9500                          */
9501                         mutex_exit(&rsm_drv_data.drv_lock);
9502                         DBG_PRINTF((category, RSM_DEBUG,
9503                             "rsm_dr_callback_pre_del:"
9504                             "pre-del on NEW/UNREG\n"));
9505                         return (0);
9506                 case RSM_DRV_OK:
9507                         rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9508                         break;
9509                 case RSM_DRV_PREDEL_STARTED:
9510                         /* FALLTHRU */
9511                 case RSM_DRV_PREDEL_COMPLETED:
9512                         /* FALLTHRU */
9513                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9514                         recheck_state = 1;
9515                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9516                         break;
9517                 case RSM_DRV_DR_IN_PROGRESS:
9518                         rsm_drv_data.drv_memdel_cnt++;
9519                         mutex_exit(&rsm_drv_data.drv_lock);
9520                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9521                             "rsm_dr_callback_pre_del done\n"));
9522                         return (0);
9523                         /* break; */
9524                 default:
9525                         ASSERT(0);
9526                         break;
9527                 }
9528 
9529         } while (recheck_state);
9530 
9531         rsm_drv_data.drv_memdel_cnt++;
9532 
9533         mutex_exit(&rsm_drv_data.drv_lock);
9534 
9535         /* Do all the quiescing stuff here */
9536         DBG_PRINTF((category, RSM_DEBUG,
9537             "rsm_dr_callback_pre_del: quiesce things now\n"));
9538 
9539         rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9540 
9541         /*
9542          * now that all local segments have been quiesced lets inform
9543          * the importers
9544          */
9545         rsm_send_suspend();
9546 
9547         /*
9548          * In response to the suspend message the remote node(s) will process
9549          * the segments and send a suspend_complete message. Till all
9550          * the nodes send the suspend_complete message we wait in the
9551          * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9552          * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9553          */
9554         mutex_enter(&rsm_drv_data.drv_lock);
9555 
9556         while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9557                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9558         }
9559 
9560         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9561 
9562         rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9563         cv_broadcast(&rsm_drv_data.drv_cv);
9564 
9565         mutex_exit(&rsm_drv_data.drv_lock);
9566 
9567         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9568             "rsm_dr_callback_pre_del done\n"));
9569 
9570         return (0);
9571 }
9572 
9573 static void
9574 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9575 {
9576         int     recheck_state = 0;
9577         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9578 
9579         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9580             "rsm_dr_callback_post_del enter\n"));
9581 
9582         mutex_enter(&rsm_drv_data.drv_lock);
9583 
9584         do {
9585                 recheck_state = 0;
9586                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9587                     "rsm_dr_callback_post_del:state=%d\n",
9588                     rsm_drv_data.drv_state));
9589 
9590                 switch (rsm_drv_data.drv_state) {
9591                 case RSM_DRV_NEW:
9592                         /*
9593                          * The driver state cannot not be RSM_DRV_NEW
9594                          * since in this state the callbacks have not
9595                          * yet been registered.
9596                          */
9597                         ASSERT(0);
9598                         return;
9599                 case RSM_DRV_REG_PROCESSING:
9600                         /*
9601                          * The driver is in the process of registering with
9602                          * the DR framework. Wait till the registration is
9603                          * complete.
9604                          */
9605                         recheck_state = 1;
9606                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9607                         break;
9608                 case RSM_DRV_UNREG_PROCESSING:
9609                         /*
9610                          * RSM_DRV_UNREG_PROCESSING state means the module
9611                          * is detaching and unregistering the callbacks
9612                          * from the DR framework. So simply return.
9613                          */
9614                         /* FALLTHRU */
9615                 case RSM_DRV_OK:
9616                         /*
9617                          * RSM_DRV_OK means we missed the pre-del
9618                          * corresponding to this post-del coz we had not
9619                          * registered yet, so simply return.
9620                          */
9621                         mutex_exit(&rsm_drv_data.drv_lock);
9622                         DBG_PRINTF((category, RSM_DEBUG,
9623                             "rsm_dr_callback_post_del:"
9624                             "post-del on OK/UNREG\n"));
9625                         return;
9626                         /* break; */
9627                 case RSM_DRV_PREDEL_STARTED:
9628                         /* FALLTHRU */
9629                 case RSM_DRV_PREDEL_COMPLETED:
9630                         /* FALLTHRU */
9631                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9632                         recheck_state = 1;
9633                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9634                         break;
9635                 case RSM_DRV_DR_IN_PROGRESS:
9636                         rsm_drv_data.drv_memdel_cnt--;
9637                         if (rsm_drv_data.drv_memdel_cnt > 0) {
9638                                 mutex_exit(&rsm_drv_data.drv_lock);
9639                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9640                                     "rsm_dr_callback_post_del done:\n"));
9641                                 return;
9642                         }
9643                         rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9644                         break;
9645                 default:
9646                         ASSERT(0);
9647                         return;
9648                         /* break; */
9649                 }
9650         } while (recheck_state);
9651 
9652         mutex_exit(&rsm_drv_data.drv_lock);
9653 
9654         /* Do all the unquiescing stuff here */
9655         DBG_PRINTF((category, RSM_DEBUG,
9656             "rsm_dr_callback_post_del: unquiesce things now\n"));
9657 
9658         rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9659 
9660         /*
9661          * now that all local segments have been unquiesced lets inform
9662          * the importers
9663          */
9664         rsm_send_resume();
9665 
9666         mutex_enter(&rsm_drv_data.drv_lock);
9667 
9668         rsm_drv_data.drv_state = RSM_DRV_OK;
9669 
9670         cv_broadcast(&rsm_drv_data.drv_cv);
9671 
9672         mutex_exit(&rsm_drv_data.drv_lock);
9673 
9674         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9675             "rsm_dr_callback_post_del done\n"));
9676 
9677         return;
9678 
9679 }