1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2012 Milan Jurik. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28
29 /*
30 * Overview of the RSM Kernel Agent:
31 * ---------------------------------
32 *
33 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
34 * kernel agent is a pseudo device driver which makes use of the RSMPI
35 * interface on behalf of the RSMAPI user library.
36 *
37 * The kernel agent functionality can be categorized into the following
38 * components:
39 * 1. Driver Infrastructure
40 * 2. Export/Import Segment Management
41 * 3. Internal resource allocation/deallocation
42 *
43 * The driver infrastructure includes the basic module loading entry points
44 * like _init, _info, _fini to load, unload and report information about
45 * the driver module. The driver infrastructure also includes the
46 * autoconfiguration entry points namely, attach, detach and getinfo for
47 * the device autoconfiguration.
48 *
49 * The kernel agent is a pseudo character device driver and exports
50 * a cb_ops structure which defines the driver entry points for character
51 * device access. This includes the open and close entry points. The
52 * other entry points provided include ioctl, devmap and segmap and chpoll.
53 * read and write entry points are not used since the device is memory
54 * mapped. Also ddi_prop_op is used for the prop_op entry point.
55 *
56 * The ioctl entry point supports a number of commands, which are used by
57 * the RSMAPI library in order to export and import segments. These
58 * commands include commands for binding and rebinding the physical pages
59 * allocated to the virtual address range, publishing the export segment,
60 * unpublishing and republishing an export segment, creating an
61 * import segment and a virtual connection from this import segment to
62 * an export segment, performing scatter-gather data transfer, barrier
63 * operations.
64 *
65 *
66 * Export and Import segments:
67 * ---------------------------
68 *
69 * In order to create an RSM export segment a process allocates a range in its
70 * virtual address space for the segment using standard Solaris interfaces.
71 * The process then calls RSMAPI, which in turn makes an ioctl call to the
72 * RSM kernel agent for an allocation of physical memory pages and for
73 * creation of the export segment by binding these pages to the virtual
74 * address range. These pages are locked in memory so that remote accesses
75 * are always applied to the correct page. Then the RSM segment is published,
76 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
77 * is assigned to it.
78 *
79 * In order to import a published RSM segment, RSMAPI creates an import
80 * segment and forms a virtual connection across the interconnect to the
81 * export segment, via an ioctl into the kernel agent with the connect
82 * command. The import segment setup is completed by mapping the
83 * local device memory into the importers virtual address space. The
84 * mapping of the import segment is handled by the segmap/devmap
85 * infrastructure described as follows.
86 *
87 * Segmap and Devmap interfaces:
88 *
89 * The RSM kernel agent allows device memory to be directly accessed by user
90 * threads via memory mapping. In order to do so, the RSM kernel agent
91 * supports the devmap and segmap entry points.
92 *
93 * The segmap entry point(rsm_segmap) is responsible for setting up a memory
94 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
95 * responsible for exporting the device memory to the user applications.
96 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
97 * control is transfered to the devmap_setup call which calls rsm_devmap.
98 *
99 * rsm_devmap validates the user mapping to the device or kernel memory
100 * and passes the information to the system for setting up the mapping. The
101 * actual setting up of the mapping is done by devmap_devmem_setup(for
102 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
103 * registered for device context management via the devmap_devmem_setup
104 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
105 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
106 * is created, a mapping is freed, a mapping is accessed or an existing
107 * mapping is duplicated respectively. These callbacks allow the RSM kernel
108 * agent to maintain state information associated with the mappings.
109 * The state information is mainly in the form of a cookie list for the import
110 * segment for which mapping has been done.
111 *
112 * Forced disconnect of import segments:
113 *
114 * When an exported segment is unpublished, the exporter sends a forced
115 * disconnect message to all its importers. The importer segments are
116 * unloaded and disconnected. This involves unloading the original
117 * mappings and remapping to a preallocated kernel trash page. This is
118 * done by devmap_umem_remap. The trash/dummy page is a kernel page,
119 * preallocated by the kernel agent during attach using ddi_umem_alloc with
120 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
121 * due to unloading of the original mappings.
122 *
123 * Additionally every segment has a mapping generation number associated
124 * with it. This is an entry in the barrier generation page, created
125 * during attach time. This mapping generation number for the import
126 * segments is incremented on a force disconnect to notify the application
127 * of the force disconnect. On this notification, the application needs
128 * to reconnect the segment to establish a new legitimate mapping.
129 *
130 *
131 * Locks used in the kernel agent:
132 * -------------------------------
133 *
134 * The kernel agent uses a variety of mutexes and condition variables for
135 * mutual exclusion of the shared data structures and for synchronization
136 * between the various threads. Some of the locks are described as follows.
137 *
138 * Each resource structure, which represents either an export/import segment
139 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
140 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
141 * rsmseglock_acquire and rsmseglock_release macros. An additional
142 * lock called the rsmsi_lock is used for the shared import data structure
143 * that is relevant for resources representing import segments. There is
144 * also a condition variable associated with the resource called s_cv. This
145 * is used to wait for events like the segment state change etc.
146 *
147 * The resource structures are allocated from a pool of resource structures,
148 * called rsm_resource. This pool is protected via a reader-writer lock,
149 * called rsmrc_lock.
150 *
151 * There are two separate hash tables, one for the export segments and
152 * one for the import segments. The export segments are inserted into the
153 * export segment hash table only after they have been published and the
154 * import segments are inserted in the import segments list only after they
155 * have successfully connected to an exported segment. These tables are
156 * protected via reader-writer locks.
157 *
158 * Debug Support in the kernel agent:
159 * ----------------------------------
160 *
161 * Debugging support in the kernel agent is provided by the following
162 * macros.
163 *
164 * DBG_PRINTF((category, level, message)) is a macro which logs a debug
165 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
166 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
167 * on the definition of the category and level. All messages that belong to
168 * the specified category(rsmdbg_category) and are of an equal or greater
169 * severity than the specified level(rsmdbg_level) are logged. The message
170 * is a string which uses the same formatting rules as the strings used in
171 * printf.
172 *
173 * The category defines which component of the kernel agent has logged this
174 * message. There are a number of categories that have been defined such as
175 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
176 * DBG_ADDCATEGORY is used to add in another category to the currently
177 * specified category value so that the component using this new category
178 * can also effectively log debug messages. Thus, the category of a specific
179 * message is some combination of the available categories and we can define
180 * sub-categories if we want a finer level of granularity.
181 *
182 * The level defines the severity of the message. Different level values are
183 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
184 * the least severe(debug level is 0).
185 *
186 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
187 * variable or a string respectively.
188 *
189 *
190 * NOTES:
191 *
192 * Special Fork and Exec Handling:
193 * -------------------------------
194 *
195 * The backing physical pages of an exported segment are always locked down.
196 * Thus, there are two cases in which a process having exported segments
197 * will cause a cpu to hang: (1) the process invokes exec; (2) a process
198 * forks and invokes exit before the duped file descriptors for the export
199 * segments are closed in the child process. The hang is caused because the
200 * address space release algorithm in Solaris VM subsystem is based on a
201 * non-blocking loop which does not terminate while segments are locked
202 * down. In addition to this, Solaris VM subsystem lacks a callback
203 * mechanism to the rsm kernel agent to allow unlocking these export
204 * segment pages.
205 *
206 * In order to circumvent this problem, the kernel agent does the following.
207 * The Solaris VM subsystem keeps memory segments in increasing order of
208 * virtual addressses. Thus a special page(special_exit_offset) is allocated
209 * by the kernel agent and is mmapped into the heap area of the process address
210 * space(the mmap is done by the RSMAPI library). During the mmap processing
211 * of this special page by the devmap infrastructure, a callback(the same
212 * devmap context management callbacks discussed above) is registered for an
213 * unmap.
214 *
215 * As discussed above, this page is processed by the Solaris address space
216 * release code before any of the exported segments pages(which are allocated
217 * from high memory). It is during this processing that the unmap callback gets
218 * called and this callback is responsible for force destroying the exported
219 * segments and thus eliminating the problem of locked pages.
220 *
221 * Flow-control:
222 * ------------
223 *
224 * A credit based flow control algorithm is used for messages whose
225 * processing cannot be done in the interrupt context because it might
226 * involve invoking rsmpi calls, or might take a long time to complete
227 * or might need to allocate resources. The algorithm operates on a per
228 * path basis. To send a message the pathend needs to have a credit and
229 * it consumes one for every message that is flow controlled. On the
230 * receiving pathend the message is put on a msgbuf_queue and a task is
231 * dispatched on the worker thread - recv_taskq where it is processed.
232 * After processing the message, the receiving pathend dequeues the message,
233 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
234 * credits to the sender pathend.
235 *
236 * RSM_DRTEST:
237 * -----------
238 *
239 * This is used to enable the DR testing using a test driver on test
240 * platforms which do not supported DR.
241 *
242 */
243
244 #include <sys/types.h>
245 #include <sys/param.h>
246 #include <sys/user.h>
247 #include <sys/buf.h>
248 #include <sys/systm.h>
249 #include <sys/cred.h>
250 #include <sys/vm.h>
251 #include <sys/uio.h>
252 #include <vm/seg.h>
253 #include <vm/page.h>
254 #include <sys/stat.h>
255
256 #include <sys/time.h>
257 #include <sys/errno.h>
258
259 #include <sys/file.h>
260 #include <sys/uio.h>
261 #include <sys/proc.h>
262 #include <sys/mman.h>
263 #include <sys/open.h>
264 #include <sys/atomic.h>
265 #include <sys/mem_config.h>
266
267
268 #include <sys/ddi.h>
269 #include <sys/devops.h>
270 #include <sys/ddidevmap.h>
271 #include <sys/sunddi.h>
272 #include <sys/esunddi.h>
273 #include <sys/ddi_impldefs.h>
274
275 #include <sys/kmem.h>
276 #include <sys/conf.h>
277 #include <sys/devops.h>
278 #include <sys/ddi_impldefs.h>
279
280 #include <sys/modctl.h>
281
282 #include <sys/policy.h>
283 #include <sys/types.h>
284 #include <sys/conf.h>
285 #include <sys/param.h>
286
287 #include <sys/taskq.h>
288
289 #include <sys/rsm/rsm_common.h>
290 #include <sys/rsm/rsmapi_common.h>
291 #include <sys/rsm/rsm.h>
292 #include <rsm_in.h>
293 #include <sys/rsm/rsmka_path_int.h>
294 #include <sys/rsm/rsmpi.h>
295
296 #include <sys/modctl.h>
297 #include <sys/debug.h>
298
299 #include <sys/tuneable.h>
300
301 #ifdef RSM_DRTEST
302 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
303 void *arg);
304 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
305 void *arg);
306 #endif
307
308 extern void dbg_printf(int category, int level, char *fmt, ...);
309 extern void rsmka_pathmanager_init();
310 extern void rsmka_pathmanager_cleanup();
311 extern void rele_sendq_token(sendq_token_t *);
312 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
313 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
314 extern int rsmka_topology_ioctl(caddr_t, int, int);
315
316 extern pri_t maxclsyspri;
317 extern work_queue_t work_queue;
318 extern kmutex_t ipc_info_lock;
319 extern kmutex_t ipc_info_cvlock;
320 extern kcondvar_t ipc_info_cv;
321 extern kmutex_t path_hold_cvlock;
322 extern kcondvar_t path_hold_cv;
323
324 extern kmutex_t rsmka_buf_lock;
325
326 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
327 extern adapter_t *rsmka_lookup_adapter(char *, int);
328 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
329 extern boolean_t rsmka_do_path_active(path_t *, int);
330 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
331 extern void rsmka_release_adapter(adapter_t *);
332 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
333 extern void rsmka_dequeue_msgbuf(path_t *path);
334 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
335 /* lint -w2 */
336
337 static int rsm_open(dev_t *, int, int, cred_t *);
338 static int rsm_close(dev_t, int, int, cred_t *);
339 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
340 cred_t *credp, int *rvalp);
341 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
342 uint_t);
343 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
344 uint_t, uint_t, cred_t *);
345 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
346 struct pollhead **phpp);
347
348 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
349 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
350 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
351
352 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
353 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
354 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
355 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
356 rsm_permission_t);
357 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
358 static void rsmacl_free(rsmapi_access_entry_t *, int);
359 static void rsmpiacl_free(rsm_access_entry_t *, int);
360
361 static int rsm_inc_pgcnt(pgcnt_t);
362 static void rsm_dec_pgcnt(pgcnt_t);
363 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
364 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
365 size_t *);
366 static void exporter_quiesce();
367 static void rsmseg_suspend(rsmseg_t *, int *);
368 static void rsmsegshare_suspend(rsmseg_t *);
369 static int rsmseg_resume(rsmseg_t *, void **);
370 static int rsmsegshare_resume(rsmseg_t *);
371
372 static struct cb_ops rsm_cb_ops = {
373 rsm_open, /* open */
374 rsm_close, /* close */
375 nodev, /* strategy */
376 nodev, /* print */
377 nodev, /* dump */
378 nodev, /* read */
379 nodev, /* write */
380 rsm_ioctl, /* ioctl */
381 rsm_devmap, /* devmap */
382 NULL, /* mmap */
383 rsm_segmap, /* segmap */
384 rsm_chpoll, /* poll */
385 ddi_prop_op, /* cb_prop_op */
386 0, /* streamtab */
387 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
388 0,
389 0,
390 0
391 };
392
393 static struct dev_ops rsm_ops = {
394 DEVO_REV, /* devo_rev, */
395 0, /* refcnt */
396 rsm_info, /* get_dev_info */
397 nulldev, /* identify */
398 nulldev, /* probe */
399 rsm_attach, /* attach */
400 rsm_detach, /* detach */
401 nodev, /* reset */
402 &rsm_cb_ops, /* driver operations */
403 (struct bus_ops *)0, /* bus operations */
404 0,
405 ddi_quiesce_not_needed, /* quiesce */
406 };
407
408 /*
409 * Module linkage information for the kernel.
410 */
411
412 static struct modldrv modldrv = {
413 &mod_driverops, /* Type of module. This one is a pseudo driver */
414 "Remote Shared Memory Driver",
415 &rsm_ops, /* driver ops */
416 };
417
418 static struct modlinkage modlinkage = {
419 MODREV_1,
420 (void *)&modldrv,
421 0,
422 0,
423 0
424 };
425
426 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
427 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
428 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
429
430 static kphysm_setup_vector_t rsm_dr_callback_vec = {
431 KPHYSM_SETUP_VECTOR_VERSION,
432 rsm_dr_callback_post_add,
433 rsm_dr_callback_pre_del,
434 rsm_dr_callback_post_del
435 };
436
437 /* This flag can be changed to 0 to help with PIT testing */
438 int rsmka_modunloadok = 1;
439 int no_reply_cnt = 0;
440
441 uint64_t rsm_ctrlmsg_errcnt = 0;
442 uint64_t rsm_ipcsend_errcnt = 0;
443
444 #define MAX_NODES 64
445
446 static struct rsm_driver_data rsm_drv_data;
447 static struct rsmresource_table rsm_resource;
448
449 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
450 static void rsmresource_destroy(void);
451 static int rsmresource_alloc(minor_t *);
452 static rsmresource_t *rsmresource_free(minor_t rnum);
453 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
454 static int rsm_unpublish(rsmseg_t *seg, int mode);
455 static int rsm_unbind(rsmseg_t *seg);
456 static uint_t rsmhash(rsm_memseg_id_t key);
457 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
458 static void rsmhash_free(rsmhash_table_t *rhash, int size);
459 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
460 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
461 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
462 void *cookie);
463 int rsm_disconnect(rsmseg_t *seg);
464 void rsmseg_unload(rsmseg_t *);
465 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
466
467 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
468 rsm_intr_q_op_t opcode, rsm_addr_t src,
469 void *data, size_t size, rsm_intr_hand_arg_t arg);
470
471 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
472
473 rsm_node_id_t my_nodeid;
474
475 /* cookie, va, offsets and length for the barrier */
476 static rsm_gnum_t *bar_va;
477 static ddi_umem_cookie_t bar_cookie;
478 static off_t barrier_offset;
479 static size_t barrier_size;
480 static int max_segs;
481
482 /* cookie for the trash memory */
483 static ddi_umem_cookie_t remap_cookie;
484
485 static rsm_memseg_id_t rsm_nextavail_segmentid;
486
487 extern taskq_t *work_taskq;
488 extern char *taskq_name;
489
490 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */
491
492 static rsmhash_table_t rsm_export_segs; /* list of exported segs */
493 rsmhash_table_t rsm_import_segs; /* list of imported segs */
494 static rsmhash_table_t rsm_event_queues; /* list of event queues */
495
496 static rsm_ipc_t rsm_ipc; /* ipc info */
497
498 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
499 static list_head_t rsm_suspend_list;
500
501 /* list of descriptors for remote importers */
502 static importers_table_t importer_list;
503
504 kmutex_t rsm_suspend_cvlock;
505 kcondvar_t rsm_suspend_cv;
506
507 static kmutex_t rsm_lock;
508
509 adapter_t loopback_adapter;
510 rsm_controller_attr_t loopback_attr;
511
512 int rsmipc_send_controlmsg(path_t *path, int msgtype);
513
514 void rsmka_init_loopback();
515
516 int rsmka_null_seg_create(
517 rsm_controller_handle_t,
518 rsm_memseg_export_handle_t *,
519 size_t,
520 uint_t,
521 rsm_memory_local_t *,
522 rsm_resource_callback_t,
523 rsm_resource_callback_arg_t);
524
525 int rsmka_null_seg_destroy(
526 rsm_memseg_export_handle_t);
527
528 int rsmka_null_bind(
529 rsm_memseg_export_handle_t,
530 off_t,
531 rsm_memory_local_t *,
532 rsm_resource_callback_t,
533 rsm_resource_callback_arg_t);
534
535 int rsmka_null_unbind(
536 rsm_memseg_export_handle_t,
537 off_t,
538 size_t);
539
540 int rsmka_null_rebind(
541 rsm_memseg_export_handle_t,
542 off_t,
543 rsm_memory_local_t *,
544 rsm_resource_callback_t,
545 rsm_resource_callback_arg_t);
546
547 int rsmka_null_publish(
548 rsm_memseg_export_handle_t,
549 rsm_access_entry_t [],
550 uint_t,
551 rsm_memseg_id_t,
552 rsm_resource_callback_t,
553 rsm_resource_callback_arg_t);
554
555
556 int rsmka_null_republish(
557 rsm_memseg_export_handle_t,
558 rsm_access_entry_t [],
559 uint_t,
560 rsm_resource_callback_t,
561 rsm_resource_callback_arg_t);
562
563 int rsmka_null_unpublish(
564 rsm_memseg_export_handle_t);
565
566 rsm_ops_t null_rsmpi_ops;
567
568 /*
569 * data and locks to keep track of total amount of exported memory
570 */
571 static pgcnt_t rsm_pgcnt;
572 static pgcnt_t rsm_pgcnt_max; /* max allowed */
573 static kmutex_t rsm_pgcnt_lock;
574
575 static int rsm_enable_dr;
576
577 static char loopback_str[] = "loopback";
578
579 int rsm_hash_size;
580
581 /*
582 * The locking model is as follows:
583 *
584 * Local operations:
585 * find resource - grab reader lock on resouce list
586 * insert rc - grab writer lock
587 * delete rc - grab writer lock and resource mutex
588 * read/write - no lock
589 *
590 * Remote invocations:
591 * find resource - grab read lock and resource mutex
592 *
593 * State:
594 * resource state - grab resource mutex
595 */
596
597 int
598 _init(void)
599 {
600 int e;
601
602 e = mod_install(&modlinkage);
603 if (e != 0) {
604 return (e);
605 }
606
607 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
608
609 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
610
611
612 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
613
614 rsm_hash_size = RSM_HASHSZ;
615
616 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
617
618 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
619
620 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
621
622 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
623 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
624
625 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
626 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
627
628 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
629 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
630
631 rsm_ipc.count = RSMIPC_SZ;
632 rsm_ipc.wanted = 0;
633 rsm_ipc.sequence = 0;
634
635 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
636
637 for (e = 0; e < RSMIPC_SZ; e++) {
638 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
639
640 RSMIPC_SET(slot, RSMIPC_FREE);
641 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
642 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
643 }
644
645 /*
646 * Initialize the suspend message list
647 */
648 rsm_suspend_list.list_head = NULL;
649 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
650
651 /*
652 * It is assumed here that configuration data is available
653 * during system boot since _init may be called at that time.
654 */
655
656 rsmka_pathmanager_init();
657
658 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
659 "rsm: _init done\n"));
660
661 return (DDI_SUCCESS);
662
663 }
664
665 int
666 _info(struct modinfo *modinfop)
667 {
668
669 return (mod_info(&modlinkage, modinfop));
670 }
671
672 int
673 _fini(void)
674 {
675 int e;
676
677 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
678 "rsm: _fini enter\n"));
679
680 /*
681 * The rsmka_modunloadok flag is simply used to help with
682 * the PIT testing. Make this flag 0 to disallow modunload.
683 */
684 if (rsmka_modunloadok == 0)
685 return (EBUSY);
686
687 /* rsm_detach will be called as a result of mod_remove */
688 e = mod_remove(&modlinkage);
689 if (e) {
690 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
691 "Unable to fini RSM %x\n", e));
692 return (e);
693 }
694
695 rsmka_pathmanager_cleanup();
696
697 rw_destroy(&rsm_resource.rsmrc_lock);
698
699 rw_destroy(&rsm_export_segs.rsmhash_rw);
700 rw_destroy(&rsm_import_segs.rsmhash_rw);
701 rw_destroy(&rsm_event_queues.rsmhash_rw);
702
703 mutex_destroy(&importer_list.lock);
704
705 mutex_destroy(&rsm_ipc.lock);
706 cv_destroy(&rsm_ipc.cv);
707
708 (void) mutex_destroy(&rsm_suspend_list.list_lock);
709
710 (void) mutex_destroy(&rsm_pgcnt_lock);
711
712 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
713
714 return (DDI_SUCCESS);
715
716 }
717
718 /*ARGSUSED1*/
719 static int
720 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
721 {
722 minor_t rnum;
723 int percent;
724 int ret;
725 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
726
727 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
728
729 switch (cmd) {
730 case DDI_ATTACH:
731 break;
732 case DDI_RESUME:
733 default:
734 DBG_PRINTF((category, RSM_ERR,
735 "rsm:rsm_attach - cmd not supported\n"));
736 return (DDI_FAILURE);
737 }
738
739 if (rsm_dip != NULL) {
740 DBG_PRINTF((category, RSM_ERR,
741 "rsm:rsm_attach - supports only "
742 "one instance\n"));
743 return (DDI_FAILURE);
744 }
745
746 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
747 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
748 "enable-dynamic-reconfiguration", 1);
749
750 mutex_enter(&rsm_drv_data.drv_lock);
751 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
752 mutex_exit(&rsm_drv_data.drv_lock);
753
754 if (rsm_enable_dr) {
755 #ifdef RSM_DRTEST
756 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
757 (void *)NULL);
758 #else
759 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
760 (void *)NULL);
761 #endif
762 if (ret != 0) {
763 mutex_exit(&rsm_drv_data.drv_lock);
764 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
765 "reconfiguration setup failed\n");
766 return (DDI_FAILURE);
767 }
768 }
769
770 mutex_enter(&rsm_drv_data.drv_lock);
771 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
772 rsm_drv_data.drv_state = RSM_DRV_OK;
773 cv_broadcast(&rsm_drv_data.drv_cv);
774 mutex_exit(&rsm_drv_data.drv_lock);
775
776 /*
777 * page_list_read_lock();
778 * xx_setup();
779 * page_list_read_unlock();
780 */
781
782 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
783 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
784 "segment-hashtable-size", RSM_HASHSZ);
785 if (rsm_hash_size == 0) {
786 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
787 "rsm: segment-hashtable-size in rsm.conf "
788 "must be greater than 0, defaulting to 128\n"));
789 rsm_hash_size = RSM_HASHSZ;
790 }
791
792 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
793 rsm_hash_size));
794
795 rsm_pgcnt = 0;
796
797 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
798 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
799 "max-exported-memory", 0);
800 if (percent < 0) {
801 DBG_PRINTF((category, RSM_ERR,
802 "rsm:rsm_attach not enough memory available to "
803 "export, or max-exported-memory set incorrectly.\n"));
804 return (DDI_FAILURE);
805 }
806 /* 0 indicates no fixed upper limit. maxmem is the max */
807 /* available pageable physical mem */
808 rsm_pgcnt_max = (percent*maxmem)/100;
809
810 if (rsm_pgcnt_max > 0) {
811 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
812 "rsm: Available physical memory = %lu pages, "
813 "Max exportable memory = %lu pages",
814 maxmem, rsm_pgcnt_max));
815 }
816
817 /*
818 * Create minor number
819 */
820 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
821 DBG_PRINTF((category, RSM_ERR,
822 "rsm: rsm_attach - Unable to get "
823 "minor number\n"));
824 return (DDI_FAILURE);
825 }
826
827 ASSERT(rnum == RSM_DRIVER_MINOR);
828
829 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
830 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
831 DBG_PRINTF((category, RSM_ERR,
832 "rsm: rsm_attach - unable to allocate "
833 "minor #\n"));
834 return (DDI_FAILURE);
835 }
836
837 rsm_dip = devi;
838 /*
839 * Allocate the hashtables
840 */
841 rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
842 rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
843
844 importer_list.bucket = (importing_token_t **)
845 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
846
847 /*
848 * Allocate a resource struct
849 */
850 {
851 rsmresource_t *p;
852
853 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
854
855 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
856
857 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
858 }
859
860 /*
861 * Based on the rsm.conf property max-segments, determine the maximum
862 * number of segments that can be exported/imported. This is then used
863 * to determine the size for barrier failure pages.
864 */
865
866 /* First get the max number of segments from the rsm.conf file */
867 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
868 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
869 "max-segments", 0);
870 if (max_segs == 0) {
871 /* Use default number of segments */
872 max_segs = RSM_MAX_NUM_SEG;
873 }
874
875 /*
876 * Based on the max number of segments allowed, determine the barrier
877 * page size. add 1 to max_segs since the barrier page itself uses
878 * a slot
879 */
880 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
881 PAGESIZE);
882
883 /*
884 * allocation of the barrier failure page
885 */
886 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
887 DDI_UMEM_SLEEP, &bar_cookie);
888
889 /*
890 * Set the barrier_offset
891 */
892 barrier_offset = 0;
893
894 /*
895 * Allocate a trash memory and get a cookie for it. This will be used
896 * when remapping segments during force disconnects. Allocate the
897 * trash memory with a large size which is page aligned.
898 */
899 (void) ddi_umem_alloc((size_t)TRASHSIZE,
900 DDI_UMEM_TRASH, &remap_cookie);
901
902 /* initialize user segment id allocation variable */
903 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
904
905 /*
906 * initialize the null_rsmpi_ops vector and the loopback adapter
907 */
908 rsmka_init_loopback();
909
910
911 ddi_report_dev(devi);
912
913 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
914
915 return (DDI_SUCCESS);
916 }
917
918 /*
919 * The call to mod_remove in the _fine routine will cause the system
920 * to call rsm_detach
921 */
922 /*ARGSUSED*/
923 static int
924 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
925 {
926 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
927
928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
929
930 switch (cmd) {
931 case DDI_DETACH:
932 break;
933 default:
934 DBG_PRINTF((category, RSM_ERR,
935 "rsm:rsm_detach - cmd %x not supported\n",
936 cmd));
937 return (DDI_FAILURE);
938 }
939
940 mutex_enter(&rsm_drv_data.drv_lock);
941 while (rsm_drv_data.drv_state != RSM_DRV_OK)
942 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
943 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
944 mutex_exit(&rsm_drv_data.drv_lock);
945
946 /*
947 * Unregister the DR callback functions
948 */
949 if (rsm_enable_dr) {
950 #ifdef RSM_DRTEST
951 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
952 (void *)NULL);
953 #else
954 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
955 (void *)NULL);
956 #endif
957 }
958
959 mutex_enter(&rsm_drv_data.drv_lock);
960 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
961 rsm_drv_data.drv_state = RSM_DRV_NEW;
962 mutex_exit(&rsm_drv_data.drv_lock);
963
964 ASSERT(rsm_suspend_list.list_head == NULL);
965
966 /*
967 * Release all resources, seglist, controller, ...
968 */
969
970 /* remove intersend queues */
971 /* remove registered services */
972
973
974 ddi_remove_minor_node(dip, DRIVER_NAME);
975 rsm_dip = NULL;
976
977 /*
978 * Free minor zero resource
979 */
980 {
981 rsmresource_t *p;
982
983 p = rsmresource_free(RSM_DRIVER_MINOR);
984 if (p) {
985 mutex_destroy(&p->rsmrc_lock);
986 kmem_free((void *)p, sizeof (*p));
987 }
988 }
989
990 /*
991 * Free resource table
992 */
993
994 rsmresource_destroy();
995
996 /*
997 * Free the hash tables
998 */
999 rsmhash_free(&rsm_export_segs, rsm_hash_size);
1000 rsmhash_free(&rsm_import_segs, rsm_hash_size);
1001
1002 kmem_free((void *)importer_list.bucket,
1003 rsm_hash_size * sizeof (importing_token_t *));
1004 importer_list.bucket = NULL;
1005
1006
1007 /* free barrier page */
1008 if (bar_cookie != NULL) {
1009 ddi_umem_free(bar_cookie);
1010 }
1011 bar_va = NULL;
1012 bar_cookie = NULL;
1013
1014 /*
1015 * Free the memory allocated for the trash
1016 */
1017 if (remap_cookie != NULL) {
1018 ddi_umem_free(remap_cookie);
1019 }
1020 remap_cookie = NULL;
1021
1022 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1023
1024 return (DDI_SUCCESS);
1025 }
1026
1027 /*ARGSUSED*/
1028 static int
1029 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1030 {
1031 register int error;
1032 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1033
1034 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1035
1036 switch (infocmd) {
1037 case DDI_INFO_DEVT2DEVINFO:
1038 if (rsm_dip == NULL)
1039 error = DDI_FAILURE;
1040 else {
1041 *result = (void *)rsm_dip;
1042 error = DDI_SUCCESS;
1043 }
1044 break;
1045 case DDI_INFO_DEVT2INSTANCE:
1046 *result = (void *)0;
1047 error = DDI_SUCCESS;
1048 break;
1049 default:
1050 error = DDI_FAILURE;
1051 }
1052
1053 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1054 return (error);
1055 }
1056
1057 adapter_t *
1058 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1059 {
1060 adapter_t *adapter;
1061 char adapter_devname[MAXNAMELEN];
1062 int instance;
1063 DBG_DEFINE(category,
1064 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1065
1066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1067
1068 instance = msg->cnum;
1069
1070 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1071 return (NULL);
1072 }
1073
1074 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1075 return (NULL);
1076
1077 if (strcmp(adapter_devname, "loopback") == 0)
1078 return (&loopback_adapter);
1079
1080 adapter = rsmka_lookup_adapter(adapter_devname, instance);
1081
1082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1083
1084 return (adapter);
1085 }
1086
1087
1088 /*
1089 * *********************** Resource Number Management ********************
1090 * All resources are stored in a simple hash table. The table is an array
1091 * of pointers to resource blks. Each blk contains:
1092 * base - base number of this blk
1093 * used - number of used slots in this blk.
1094 * blks - array of pointers to resource items.
1095 * An entry in a resource blk is empty if it's NULL.
1096 *
1097 * We start with no resource array. Each time we run out of slots, we
1098 * reallocate a new larger array and copy the pointer to the new array and
1099 * a new resource blk is allocated and added to the hash table.
1100 *
1101 * The resource control block contains:
1102 * root - array of pointer of resource blks
1103 * sz - current size of array.
1104 * len - last valid entry in array.
1105 *
1106 * A search operation based on a resource number is as follows:
1107 * index = rnum / RESOURCE_BLKSZ;
1108 * ASSERT(index < resource_block.len);
1109 * ASSERT(index < resource_block.sz);
1110 * offset = rnum % RESOURCE_BLKSZ;
1111 * ASSERT(offset >= resource_block.root[index]->base);
1112 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1113 * return resource_block.root[index]->blks[offset];
1114 *
1115 * A resource blk is freed with its used count reachs zero.
1116 */
1117 static int
1118 rsmresource_alloc(minor_t *rnum)
1119 {
1120
1121 /* search for available resource slot */
1122 int i, j, empty = -1;
1123 rsmresource_blk_t *blk;
1124
1125 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1126 "rsmresource_alloc enter\n"));
1127
1128 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1129
1130 /* Try to find an empty slot */
1131 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1132 blk = rsm_resource.rsmrc_root[i];
1133 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1134 /* found an empty slot in this blk */
1135 for (j = 0; j < RSMRC_BLKSZ; j++) {
1136 if (blk->rsmrcblk_blks[j] == NULL) {
1137 *rnum = (minor_t)
1138 (j + (i * RSMRC_BLKSZ));
1139 /*
1140 * obey gen page limits
1141 */
1142 if (*rnum >= max_segs + 1) {
1143 if (empty < 0) {
1144 rw_exit(&rsm_resource.
1145 rsmrc_lock);
1146 DBG_PRINTF((
1147 RSM_KERNEL_ALL,
1148 RSM_ERR,
1149 "rsmresource"
1150 "_alloc failed:"
1151 "not enough res"
1152 "%d\n", *rnum));
1153 return (RSMERR_INSUFFICIENT_RESOURCES);
1154 } else {
1155 /* use empty slot */
1156 break;
1157 }
1158
1159 }
1160
1161 blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1162 blk->rsmrcblk_avail--;
1163 rw_exit(&rsm_resource.rsmrc_lock);
1164 DBG_PRINTF((RSM_KERNEL_ALL,
1165 RSM_DEBUG_VERBOSE,
1166 "rsmresource_alloc done\n"));
1167 return (RSM_SUCCESS);
1168 }
1169 }
1170 } else if (blk == NULL && empty < 0) {
1171 /* remember first empty slot */
1172 empty = i;
1173 }
1174 }
1175
1176 /* Couldn't find anything, allocate a new blk */
1177 /*
1178 * Do we need to reallocate the root array
1179 */
1180 if (empty < 0) {
1181 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1182 /*
1183 * Allocate new array and copy current stuff into it
1184 */
1185 rsmresource_blk_t **p;
1186 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1187 RSMRC_BLKSZ;
1188 /*
1189 * Don't allocate more that max valid rnum
1190 */
1191 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1192 max_segs + 1) {
1193 rw_exit(&rsm_resource.rsmrc_lock);
1194 return (RSMERR_INSUFFICIENT_RESOURCES);
1195 }
1196
1197 p = (rsmresource_blk_t **)kmem_zalloc(
1198 newsz * sizeof (*p),
1199 KM_SLEEP);
1200
1201 if (rsm_resource.rsmrc_root) {
1202 uint_t oldsz;
1203
1204 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1205 (int)sizeof (*p));
1206
1207 /*
1208 * Copy old data into new space and
1209 * free old stuff
1210 */
1211 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1212 kmem_free(rsm_resource.rsmrc_root, oldsz);
1213 }
1214
1215 rsm_resource.rsmrc_root = p;
1216 rsm_resource.rsmrc_sz = (int)newsz;
1217 }
1218
1219 empty = rsm_resource.rsmrc_len;
1220 rsm_resource.rsmrc_len++;
1221 }
1222
1223 /*
1224 * Allocate a new blk
1225 */
1226 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1227 ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1228 rsm_resource.rsmrc_root[empty] = blk;
1229 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1230
1231 /*
1232 * Allocate slot
1233 */
1234
1235 *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1236
1237 /*
1238 * watch out not to exceed bounds of barrier page
1239 */
1240 if (*rnum >= max_segs + 1) {
1241 rw_exit(&rsm_resource.rsmrc_lock);
1242 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1243 "rsmresource_alloc failed %d\n", *rnum));
1244
1245 return (RSMERR_INSUFFICIENT_RESOURCES);
1246 }
1247 blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1248
1249
1250 rw_exit(&rsm_resource.rsmrc_lock);
1251
1252 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1253 "rsmresource_alloc done\n"));
1254
1255 return (RSM_SUCCESS);
1256 }
1257
1258 static rsmresource_t *
1259 rsmresource_free(minor_t rnum)
1260 {
1261
1262 /* search for available resource slot */
1263 int i, j;
1264 rsmresource_blk_t *blk;
1265 rsmresource_t *p;
1266
1267 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1268 "rsmresource_free enter\n"));
1269
1270 i = (int)(rnum / RSMRC_BLKSZ);
1271 j = (int)(rnum % RSMRC_BLKSZ);
1272
1273 if (i >= rsm_resource.rsmrc_len) {
1274 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1275 "rsmresource_free done\n"));
1276 return (NULL);
1277 }
1278
1279 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1280
1281 ASSERT(rsm_resource.rsmrc_root);
1282 ASSERT(i < rsm_resource.rsmrc_len);
1283 ASSERT(i < rsm_resource.rsmrc_sz);
1284 blk = rsm_resource.rsmrc_root[i];
1285 if (blk == NULL) {
1286 rw_exit(&rsm_resource.rsmrc_lock);
1287 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1288 "rsmresource_free done\n"));
1289 return (NULL);
1290 }
1291
1292 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1293
1294 p = blk->rsmrcblk_blks[j];
1295 if (p == RSMRC_RESERVED) {
1296 p = NULL;
1297 }
1298
1299 blk->rsmrcblk_blks[j] = NULL;
1300 blk->rsmrcblk_avail++;
1301 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1302 /* free this blk */
1303 kmem_free(blk, sizeof (*blk));
1304 rsm_resource.rsmrc_root[i] = NULL;
1305 }
1306
1307 rw_exit(&rsm_resource.rsmrc_lock);
1308
1309 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1310 "rsmresource_free done\n"));
1311
1312 return (p);
1313 }
1314
1315 static rsmresource_t *
1316 rsmresource_lookup(minor_t rnum, int lock)
1317 {
1318 int i, j;
1319 rsmresource_blk_t *blk;
1320 rsmresource_t *p;
1321
1322 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1323 "rsmresource_lookup enter\n"));
1324
1325 /* Find resource and lock it in READER mode */
1326 /* search for available resource slot */
1327
1328 i = (int)(rnum / RSMRC_BLKSZ);
1329 j = (int)(rnum % RSMRC_BLKSZ);
1330
1331 if (i >= rsm_resource.rsmrc_len) {
1332 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1333 "rsmresource_lookup done\n"));
1334 return (NULL);
1335 }
1336
1337 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1338
1339 blk = rsm_resource.rsmrc_root[i];
1340 if (blk != NULL) {
1341 ASSERT(i < rsm_resource.rsmrc_len);
1342 ASSERT(i < rsm_resource.rsmrc_sz);
1343
1344 p = blk->rsmrcblk_blks[j];
1345 if (lock == RSM_LOCK) {
1346 if (p != RSMRC_RESERVED) {
1347 mutex_enter(&p->rsmrc_lock);
1348 } else {
1349 p = NULL;
1350 }
1351 }
1352 } else {
1353 p = NULL;
1354 }
1355 rw_exit(&rsm_resource.rsmrc_lock);
1356
1357 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1358 "rsmresource_lookup done\n"));
1359
1360 return (p);
1361 }
1362
1363 static void
1364 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1365 {
1366 /* Find resource and lock it in READER mode */
1367 /* Caller can upgrade if need be */
1368 /* search for available resource slot */
1369 int i, j;
1370 rsmresource_blk_t *blk;
1371
1372 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1373 "rsmresource_insert enter\n"));
1374
1375 i = (int)(rnum / RSMRC_BLKSZ);
1376 j = (int)(rnum % RSMRC_BLKSZ);
1377
1378 p->rsmrc_type = type;
1379 p->rsmrc_num = rnum;
1380
1381 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1382
1383 ASSERT(rsm_resource.rsmrc_root);
1384 ASSERT(i < rsm_resource.rsmrc_len);
1385 ASSERT(i < rsm_resource.rsmrc_sz);
1386
1387 blk = rsm_resource.rsmrc_root[i];
1388 ASSERT(blk);
1389
1390 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1391
1392 blk->rsmrcblk_blks[j] = p;
1393
1394 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1395 "rsmresource_insert done\n"));
1396
1397 rw_exit(&rsm_resource.rsmrc_lock);
1398 }
1399
1400 static void
1401 rsmresource_destroy()
1402 {
1403 int i, j;
1404
1405 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1406 "rsmresource_destroy enter\n"));
1407
1408 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1409
1410 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1411 rsmresource_blk_t *blk;
1412
1413 blk = rsm_resource.rsmrc_root[i];
1414 if (blk == NULL) {
1415 continue;
1416 }
1417 for (j = 0; j < RSMRC_BLKSZ; j++) {
1418 if (blk->rsmrcblk_blks[j] != NULL) {
1419 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1420 "Not null slot %d, %lx\n", j,
1421 (size_t)blk->rsmrcblk_blks[j]));
1422 }
1423 }
1424 kmem_free(blk, sizeof (*blk));
1425 rsm_resource.rsmrc_root[i] = NULL;
1426 }
1427 if (rsm_resource.rsmrc_root) {
1428 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1429 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1430 rsm_resource.rsmrc_root = NULL;
1431 rsm_resource.rsmrc_len = 0;
1432 rsm_resource.rsmrc_sz = 0;
1433 }
1434
1435 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1436 "rsmresource_destroy done\n"));
1437
1438 rw_exit(&rsm_resource.rsmrc_lock);
1439 }
1440
1441
1442 /* ******************** Generic Key Hash Table Management ********* */
1443 static rsmresource_t *
1444 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1445 rsm_resource_state_t state)
1446 {
1447 rsmresource_t *p;
1448 uint_t hashval;
1449 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1450
1451 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1452
1453 hashval = rsmhash(key);
1454
1455 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1456 key, hashval));
1457
1458 rw_enter(&rhash->rsmhash_rw, RW_READER);
1459
1460 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1461
1462 for (; p; p = p->rsmrc_next) {
1463 if (p->rsmrc_key == key) {
1464 /* acquire resource lock */
1465 RSMRC_LOCK(p);
1466 break;
1467 }
1468 }
1469
1470 rw_exit(&rhash->rsmhash_rw);
1471
1472 if (p != NULL && p->rsmrc_state != state) {
1473 /* state changed, release lock and return null */
1474 RSMRC_UNLOCK(p);
1475 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1476 "rsmhash_lookup done: state changed\n"));
1477 return (NULL);
1478 }
1479
1480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1481
1482 return (p);
1483 }
1484
1485 static void
1486 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1487 {
1488 rsmresource_t *p, **back;
1489 uint_t hashval;
1490 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1491
1492 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1493
1494 hashval = rsmhash(rcelm->rsmrc_key);
1495
1496 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1497 rcelm->rsmrc_key, hashval));
1498
1499 /*
1500 * It's ok not to find the segment.
1501 */
1502 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1503
1504 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1505
1506 for (; (p = *back) != NULL; back = &p->rsmrc_next) {
1507 if (p == rcelm) {
1508 *back = rcelm->rsmrc_next;
1509 break;
1510 }
1511 }
1512
1513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1514
1515 rw_exit(&rhash->rsmhash_rw);
1516 }
1517
1518 static int
1519 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1520 int dup_check, rsm_resource_state_t state)
1521 {
1522 rsmresource_t *p = NULL, **bktp;
1523 uint_t hashval;
1524 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1525
1526 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1527
1528 /* lock table */
1529 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1530
1531 /*
1532 * If the current resource state is other than the state passed in
1533 * then the resource is (probably) already on the list. eg. for an
1534 * import segment if the state is not RSM_STATE_NEW then it's on the
1535 * list already.
1536 */
1537 RSMRC_LOCK(new);
1538 if (new->rsmrc_state != state) {
1539 RSMRC_UNLOCK(new);
1540 rw_exit(&rhash->rsmhash_rw);
1541 return (RSMERR_BAD_SEG_HNDL);
1542 }
1543
1544 hashval = rsmhash(key);
1545 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1546
1547 if (dup_check) {
1548 /*
1549 * Used for checking export segments; don't want to have
1550 * the same key used for multiple segments.
1551 */
1552
1553 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1554
1555 for (; p; p = p->rsmrc_next) {
1556 if (p->rsmrc_key == key) {
1557 RSMRC_UNLOCK(new);
1558 break;
1559 }
1560 }
1561 }
1562
1563 if (p == NULL) {
1564 /* Key doesn't exist, add it */
1565
1566 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1567
1568 new->rsmrc_key = key;
1569 new->rsmrc_next = *bktp;
1570 *bktp = new;
1571 }
1572
1573 rw_exit(&rhash->rsmhash_rw);
1574
1575 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1576
1577 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1578 }
1579
1580 /*
1581 * XOR each byte of the key.
1582 */
1583 static uint_t
1584 rsmhash(rsm_memseg_id_t key)
1585 {
1586 uint_t hash = key;
1587
1588 hash ^= (key >> 8);
1589 hash ^= (key >> 16);
1590 hash ^= (key >> 24);
1591
1592 return (hash % rsm_hash_size);
1593
1594 }
1595
1596 /*
1597 * generic function to get a specific bucket
1598 */
1599 static void *
1600 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1601 {
1602
1603 if (rhash->bucket == NULL)
1604 return (NULL);
1605 else
1606 return ((void *)rhash->bucket[hashval]);
1607 }
1608
1609 /*
1610 * generic function to get a specific bucket's address
1611 */
1612 static void **
1613 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1614 {
1615 if (rhash->bucket == NULL)
1616 return (NULL);
1617 else
1618 return ((void **)&(rhash->bucket[hashval]));
1619 }
1620
1621 /*
1622 * generic function to alloc a hash table
1623 */
1624 static void
1625 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1626 {
1627 rhash->bucket = (rsmresource_t **)
1628 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1629 }
1630
1631 /*
1632 * generic function to free a hash table
1633 */
1634 static void
1635 rsmhash_free(rsmhash_table_t *rhash, int size)
1636 {
1637
1638 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1639 rhash->bucket = NULL;
1640
1641 }
1642 /* *********************** Exported Segment Key Management ************ */
1643
1644 #define rsmexport_add(new, key) \
1645 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1646 RSM_STATE_BIND)
1647
1648 #define rsmexport_rm(arg) \
1649 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1650
1651 #define rsmexport_lookup(key) \
1652 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1653
1654 /* ************************** Import Segment List Management ********** */
1655
1656 /*
1657 * Add segment to import list. This will be useful for paging and loopback
1658 * segment unloading.
1659 */
1660 #define rsmimport_add(arg, key) \
1661 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1662 RSM_STATE_NEW)
1663
1664 #define rsmimport_rm(arg) \
1665 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1666
1667 /*
1668 * #define rsmimport_lookup(key) \
1669 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1670 */
1671
1672 /*
1673 * increase the ref count and make the import segment point to the
1674 * shared data structure. Return a pointer to the share data struct
1675 * and the shared data struct is locked upon return
1676 */
1677 static rsm_import_share_t *
1678 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1679 rsmseg_t *segp)
1680 {
1681 uint_t hash;
1682 rsmresource_t *p;
1683 rsm_import_share_t *shdatap;
1684 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1685
1686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1687
1688 hash = rsmhash(key);
1689 /* lock table */
1690 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1691 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1692 key, hash));
1693
1694 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1695
1696 for (; p; p = p->rsmrc_next) {
1697 /*
1698 * Look for an entry that is importing the same exporter
1699 * with the share data structure allocated.
1700 */
1701 if ((p->rsmrc_key == key) &&
1702 (p->rsmrc_node == node) &&
1703 (p->rsmrc_adapter == adapter) &&
1704 (((rsmseg_t *)p)->s_share != NULL)) {
1705 shdatap = ((rsmseg_t *)p)->s_share;
1706 break;
1707 }
1708 }
1709
1710 if (p == NULL) {
1711 /* we are the first importer, create the shared data struct */
1712 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1713 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1714 shdatap->rsmsi_segid = key;
1715 shdatap->rsmsi_node = node;
1716 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1717 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1718 }
1719
1720 rsmseglock_acquire(segp);
1721
1722 /* we grab the shared lock before returning from this function */
1723 mutex_enter(&shdatap->rsmsi_lock);
1724
1725 shdatap->rsmsi_refcnt++;
1726 segp->s_share = shdatap;
1727
1728 rsmseglock_release(segp);
1729
1730 rw_exit(&rsm_import_segs.rsmhash_rw);
1731
1732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1733
1734 return (shdatap);
1735 }
1736
1737 /*
1738 * the shared data structure should be locked before calling
1739 * rsmsharecv_signal().
1740 * Change the state and signal any waiting segments.
1741 */
1742 void
1743 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1744 {
1745 ASSERT(rsmsharelock_held(seg));
1746
1747 if (seg->s_share->rsmsi_state == oldstate) {
1748 seg->s_share->rsmsi_state = newstate;
1749 cv_broadcast(&seg->s_share->rsmsi_cv);
1750 }
1751 }
1752
1753 /*
1754 * Add to the hash table
1755 */
1756 static void
1757 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1758 void *cookie)
1759 {
1760
1761 importing_token_t *head;
1762 importing_token_t *new_token;
1763 int index;
1764
1765 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1766
1767 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1768
1769 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1770 new_token->importing_node = node;
1771 new_token->key = key;
1772 new_token->import_segment_cookie = cookie;
1773 new_token->importing_adapter_hwaddr = hwaddr;
1774
1775 index = rsmhash(key);
1776
1777 mutex_enter(&importer_list.lock);
1778
1779 head = importer_list.bucket[index];
1780 importer_list.bucket[index] = new_token;
1781 new_token->next = head;
1782 mutex_exit(&importer_list.lock);
1783
1784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1785 }
1786
1787 static void
1788 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie)
1789 {
1790
1791 importing_token_t *prev, *token = NULL;
1792 int index;
1793 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1794
1795 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1796
1797 index = rsmhash(key);
1798
1799 mutex_enter(&importer_list.lock);
1800
1801 token = importer_list.bucket[index];
1802
1803 prev = token;
1804 while (token != NULL) {
1805 if (token->importing_node == node &&
1806 token->import_segment_cookie == cookie) {
1807 if (prev == token)
1808 importer_list.bucket[index] = token->next;
1809 else
1810 prev->next = token->next;
1811 kmem_free((void *)token, sizeof (*token));
1812 break;
1813 } else {
1814 prev = token;
1815 token = token->next;
1816 }
1817 }
1818
1819 mutex_exit(&importer_list.lock);
1820
1821 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1822
1823
1824 }
1825
1826 /* **************************Segment Structure Management ************* */
1827
1828 /*
1829 * Free segment structure
1830 */
1831 static void
1832 rsmseg_free(rsmseg_t *seg)
1833 {
1834
1835 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1836
1837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1838
1839 /* need to take seglock here to avoid race with rsmmap_unmap() */
1840 rsmseglock_acquire(seg);
1841 if (seg->s_ckl != NULL) {
1842 /* Segment is still busy */
1843 seg->s_state = RSM_STATE_END;
1844 rsmseglock_release(seg);
1845 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1846 "rsmseg_free done\n"));
1847 return;
1848 }
1849
1850 rsmseglock_release(seg);
1851
1852 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1853
1854 /*
1855 * If it's an importer decrement the refcount
1856 * and if its down to zero free the shared data structure.
1857 * This is where failures during rsm_connect() are unrefcounted
1858 */
1859 if (seg->s_share != NULL) {
1860
1861 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1862
1863 rsmsharelock_acquire(seg);
1864
1865 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1866
1867 seg->s_share->rsmsi_refcnt--;
1868
1869 if (seg->s_share->rsmsi_refcnt == 0) {
1870 rsmsharelock_release(seg);
1871 mutex_destroy(&seg->s_share->rsmsi_lock);
1872 cv_destroy(&seg->s_share->rsmsi_cv);
1873 kmem_free((void *)(seg->s_share),
1874 sizeof (rsm_import_share_t));
1875 } else {
1876 rsmsharelock_release(seg);
1877 }
1878 /*
1879 * The following needs to be done after any
1880 * rsmsharelock calls which use seg->s_share.
1881 */
1882 seg->s_share = NULL;
1883 }
1884
1885 cv_destroy(&seg->s_cv);
1886 mutex_destroy(&seg->s_lock);
1887 rsmacl_free(seg->s_acl, seg->s_acl_len);
1888 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1889 if (seg->s_adapter)
1890 rsmka_release_adapter(seg->s_adapter);
1891
1892 kmem_free((void *)seg, sizeof (*seg));
1893
1894 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1895
1896 }
1897
1898
1899 static rsmseg_t *
1900 rsmseg_alloc(minor_t num, struct cred *cred)
1901 {
1902 rsmseg_t *new;
1903 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1904
1905 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1906 /*
1907 * allocate memory for new segment. This should be a segkmem cache.
1908 */
1909 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1910
1911 new->s_state = RSM_STATE_NEW;
1912 new->s_minor = num;
1913 new->s_acl_len = 0;
1914 new->s_cookie = NULL;
1915 new->s_adapter = NULL;
1916
1917 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1918 /* we don't have a key yet, will set at export/connect */
1919 new->s_uid = crgetuid(cred);
1920 new->s_gid = crgetgid(cred);
1921
1922 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1923 cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1924
1925 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1926
1927 return (new);
1928 }
1929
1930 /* ******************************** Driver Open/Close/Poll *************** */
1931
1932 /*ARGSUSED1*/
1933 static int
1934 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1935 {
1936 minor_t rnum;
1937 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1938
1939 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1940 /*
1941 * Char only
1942 */
1943 if (otyp != OTYP_CHR) {
1944 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1945 return (EINVAL);
1946 }
1947
1948 /*
1949 * Only zero can be opened, clones are used for resources.
1950 */
1951 if (getminor(*devp) != RSM_DRIVER_MINOR) {
1952 DBG_PRINTF((category, RSM_ERR,
1953 "rsm_open: bad minor %d\n", getminor(*devp)));
1954 return (ENODEV);
1955 }
1956
1957 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1958 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1959 return (EPERM);
1960 }
1961
1962 if (!(flag & FWRITE)) {
1963 /*
1964 * The library function _rsm_librsm_init calls open for
1965 * /dev/rsm with flag set to O_RDONLY. We want a valid
1966 * file descriptor to be returned for minor device zero.
1967 */
1968
1969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1970 "rsm_open RDONLY done\n"));
1971 return (DDI_SUCCESS);
1972 }
1973
1974 /*
1975 * - allocate new minor number and segment.
1976 * - add segment to list of all segments.
1977 * - set minordev data to segment
1978 * - update devp argument to new device
1979 * - update s_cred to cred; make sure you do crhold(cred);
1980 */
1981
1982 /* allocate a new resource number */
1983 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1984 /*
1985 * We will bind this minor to a specific resource in first
1986 * ioctl
1987 */
1988 *devp = makedevice(getmajor(*devp), rnum);
1989 } else {
1990 return (EAGAIN);
1991 }
1992
1993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1994 return (DDI_SUCCESS);
1995 }
1996
1997 static void
1998 rsmseg_close(rsmseg_t *seg, int force_flag)
1999 {
2000 int e = RSM_SUCCESS;
2001
2002 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2003
2004 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2005
2006 rsmseglock_acquire(seg);
2007 if (!force_flag && (seg->s_hdr.rsmrc_type ==
2008 RSM_RESOURCE_EXPORT_SEGMENT)) {
2009 /*
2010 * If we are processing rsm_close wait for force_destroy
2011 * processing to complete since force_destroy processing
2012 * needs to finish first before we can free the segment.
2013 * force_destroy is only for export segments
2014 */
2015 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2016 cv_wait(&seg->s_cv, &seg->s_lock);
2017 }
2018 }
2019 rsmseglock_release(seg);
2020
2021 /* It's ok to read the state without a lock */
2022 switch (seg->s_state) {
2023 case RSM_STATE_EXPORT:
2024 case RSM_STATE_EXPORT_QUIESCING:
2025 case RSM_STATE_EXPORT_QUIESCED:
2026 e = rsm_unpublish(seg, 1);
2027 /* FALLTHRU */
2028 case RSM_STATE_BIND_QUIESCED:
2029 /* FALLTHRU */
2030 case RSM_STATE_BIND:
2031 e = rsm_unbind(seg);
2032 if (e != RSM_SUCCESS && force_flag == 1)
2033 return;
2034 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2035 /* FALLTHRU */
2036 case RSM_STATE_NEW_QUIESCED:
2037 rsmseglock_acquire(seg);
2038 seg->s_state = RSM_STATE_NEW;
2039 cv_broadcast(&seg->s_cv);
2040 rsmseglock_release(seg);
2041 break;
2042 case RSM_STATE_NEW:
2043 break;
2044 case RSM_STATE_ZOMBIE:
2045 /*
2046 * Segments in this state have been removed off the
2047 * exported segments list and have been unpublished
2048 * and unbind. These segments have been removed during
2049 * a callback to the rsm_export_force_destroy, which
2050 * is called for the purpose of unlocking these
2051 * exported memory segments when a process exits but
2052 * leaves the segments locked down since rsm_close is
2053 * is not called for the segments. This can happen
2054 * when a process calls fork or exec and then exits.
2055 * Once the segments are in the ZOMBIE state, all that
2056 * remains is to destroy them when rsm_close is called.
2057 * This is done here. Thus, for such segments the
2058 * the state is changed to new so that later in this
2059 * function rsmseg_free is called.
2060 */
2061 rsmseglock_acquire(seg);
2062 seg->s_state = RSM_STATE_NEW;
2063 rsmseglock_release(seg);
2064 break;
2065 case RSM_STATE_MAP_QUIESCE:
2066 case RSM_STATE_ACTIVE:
2067 /* Disconnect will handle the unmap */
2068 case RSM_STATE_CONN_QUIESCE:
2069 case RSM_STATE_CONNECT:
2070 case RSM_STATE_DISCONNECT:
2071 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2072 (void) rsm_disconnect(seg);
2073 break;
2074 case RSM_STATE_MAPPING:
2075 /*FALLTHRU*/
2076 case RSM_STATE_END:
2077 DBG_PRINTF((category, RSM_ERR,
2078 "Invalid segment state %d in rsm_close\n", seg->s_state));
2079 break;
2080 default:
2081 DBG_PRINTF((category, RSM_ERR,
2082 "Invalid segment state %d in rsm_close\n", seg->s_state));
2083 break;
2084 }
2085
2086 /*
2087 * check state.
2088 * - make sure you do crfree(s_cred);
2089 * release segment and minor number
2090 */
2091 ASSERT(seg->s_state == RSM_STATE_NEW);
2092
2093 /*
2094 * The export_force_destroy callback is created to unlock
2095 * the exported segments of a process
2096 * when the process does a fork or exec and then exits calls this
2097 * function with the force flag set to 1 which indicates that the
2098 * segment state must be converted to ZOMBIE. This state means that the
2099 * segments still exist and have been unlocked and most importantly the
2100 * only operation allowed is to destroy them on an rsm_close.
2101 */
2102 if (force_flag) {
2103 rsmseglock_acquire(seg);
2104 seg->s_state = RSM_STATE_ZOMBIE;
2105 rsmseglock_release(seg);
2106 } else {
2107 rsmseg_free(seg);
2108 }
2109
2110 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2111 }
2112
2113 static int
2114 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2115 {
2116 minor_t rnum = getminor(dev);
2117 rsmresource_t *res;
2118 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2119
2120 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2121
2122 flag = flag; cred = cred;
2123
2124 if (otyp != OTYP_CHR)
2125 return (EINVAL);
2126
2127 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2128
2129 /*
2130 * At this point we are the last reference to the resource.
2131 * Free resource number from resource table.
2132 * It's ok to remove number before we free the segment.
2133 * We need to lock the resource to protect against remote calls.
2134 */
2135 if (rnum == RSM_DRIVER_MINOR ||
2136 (res = rsmresource_free(rnum)) == NULL) {
2137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2138 return (DDI_SUCCESS);
2139 }
2140
2141 switch (res->rsmrc_type) {
2142 case RSM_RESOURCE_EXPORT_SEGMENT:
2143 case RSM_RESOURCE_IMPORT_SEGMENT:
2144 rsmseg_close((rsmseg_t *)res, 0);
2145 break;
2146 case RSM_RESOURCE_BAR:
2147 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2148 break;
2149 default:
2150 break;
2151 }
2152
2153 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2154
2155 return (DDI_SUCCESS);
2156 }
2157
2158 /*
2159 * rsm_inc_pgcnt
2160 *
2161 * Description: increment rsm page counter.
2162 *
2163 * Parameters: pgcnt_t pnum; number of pages to be used
2164 *
2165 * Returns: RSM_SUCCESS if memory limit not exceeded
2166 * ENOSPC if memory limit exceeded. In this case, the
2167 * page counter remains unchanged.
2168 *
2169 */
2170 static int
2171 rsm_inc_pgcnt(pgcnt_t pnum)
2172 {
2173 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2174 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2175 return (RSM_SUCCESS);
2176 }
2177
2178 mutex_enter(&rsm_pgcnt_lock);
2179
2180 if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2181 /* ensure that limits have not been exceeded */
2182 mutex_exit(&rsm_pgcnt_lock);
2183 return (RSMERR_INSUFFICIENT_MEM);
2184 }
2185
2186 rsm_pgcnt += pnum;
2187 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2188 rsm_pgcnt));
2189 mutex_exit(&rsm_pgcnt_lock);
2190
2191 return (RSM_SUCCESS);
2192 }
2193
2194 /*
2195 * rsm_dec_pgcnt
2196 *
2197 * Description: decrement rsm page counter.
2198 *
2199 * Parameters: pgcnt_t pnum; number of pages freed
2200 *
2201 */
2202 static void
2203 rsm_dec_pgcnt(pgcnt_t pnum)
2204 {
2205 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2206
2207 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2208 return;
2209 }
2210
2211 mutex_enter(&rsm_pgcnt_lock);
2212 ASSERT(rsm_pgcnt >= pnum);
2213 rsm_pgcnt -= pnum;
2214 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2215 rsm_pgcnt));
2216 mutex_exit(&rsm_pgcnt_lock);
2217 }
2218
2219 static struct umem_callback_ops rsm_as_ops = {
2220 UMEM_CALLBACK_VERSION, /* version number */
2221 rsm_export_force_destroy,
2222 };
2223
2224 static int
2225 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2226 proc_t *procp)
2227 {
2228 int error = RSM_SUCCESS;
2229 ulong_t pnum;
2230 struct umem_callback_ops *callbackops = &rsm_as_ops;
2231
2232 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2233
2234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2235
2236 /*
2237 * Make sure vaddr and len are aligned on a page boundary
2238 */
2239 if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2240 return (RSMERR_BAD_ADDR);
2241 }
2242
2243 if (len & (PAGESIZE - 1)) {
2244 return (RSMERR_BAD_LENGTH);
2245 }
2246
2247 /*
2248 * Find number of pages
2249 */
2250 pnum = btopr(len);
2251 error = rsm_inc_pgcnt(pnum);
2252 if (error != RSM_SUCCESS) {
2253 DBG_PRINTF((category, RSM_ERR,
2254 "rsm_bind_pages:mem limit exceeded\n"));
2255 return (RSMERR_INSUFFICIENT_MEM);
2256 }
2257
2258 error = umem_lockmemory(vaddr, len,
2259 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2260 cookie,
2261 callbackops, procp);
2262
2263 if (error) {
2264 rsm_dec_pgcnt(pnum);
2265 DBG_PRINTF((category, RSM_ERR,
2266 "rsm_bind_pages:ddi_umem_lock failed\n"));
2267 /*
2268 * ddi_umem_lock, in the case of failure, returns one of
2269 * the following three errors. These are translated into
2270 * the RSMERR namespace and returned.
2271 */
2272 if (error == EFAULT)
2273 return (RSMERR_BAD_ADDR);
2274 else if (error == EACCES)
2275 return (RSMERR_PERM_DENIED);
2276 else
2277 return (RSMERR_INSUFFICIENT_MEM);
2278 }
2279
2280 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2281
2282 return (error);
2283
2284 }
2285
2286 static int
2287 rsm_unbind_pages(rsmseg_t *seg)
2288 {
2289 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2290
2291 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2292
2293 ASSERT(rsmseglock_held(seg));
2294
2295 if (seg->s_cookie != NULL) {
2296 /* unlock address range */
2297 ddi_umem_unlock(seg->s_cookie);
2298 rsm_dec_pgcnt(btopr(seg->s_len));
2299 seg->s_cookie = NULL;
2300 }
2301
2302 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2303
2304 return (RSM_SUCCESS);
2305 }
2306
2307
2308 static int
2309 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2310 {
2311 int e;
2312 adapter_t *adapter;
2313 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2314
2315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2316
2317 adapter = rsm_getadapter(msg, mode);
2318 if (adapter == NULL) {
2319 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2320 "rsm_bind done:no adapter\n"));
2321 return (RSMERR_CTLR_NOT_PRESENT);
2322 }
2323
2324 /* lock address range */
2325 if (msg->vaddr == NULL) {
2326 rsmka_release_adapter(adapter);
2327 DBG_PRINTF((category, RSM_ERR,
2328 "rsm: rsm_bind done: invalid vaddr\n"));
2329 return (RSMERR_BAD_ADDR);
2330 }
2331 if (msg->len <= 0) {
2332 rsmka_release_adapter(adapter);
2333 DBG_PRINTF((category, RSM_ERR,
2334 "rsm_bind: invalid length\n"));
2335 return (RSMERR_BAD_LENGTH);
2336 }
2337
2338 /* Lock segment */
2339 rsmseglock_acquire(seg);
2340
2341 while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2342 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2343 DBG_PRINTF((category, RSM_DEBUG,
2344 "rsm_bind done: cv_wait INTERRUPTED"));
2345 rsmka_release_adapter(adapter);
2346 rsmseglock_release(seg);
2347 return (RSMERR_INTERRUPTED);
2348 }
2349 }
2350
2351 ASSERT(seg->s_state == RSM_STATE_NEW);
2352
2353 ASSERT(seg->s_cookie == NULL);
2354
2355 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2356 if (e == RSM_SUCCESS) {
2357 seg->s_flags |= RSM_USER_MEMORY;
2358 if (msg->perm & RSM_ALLOW_REBIND) {
2359 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2360 }
2361 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2362 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2363 }
2364 seg->s_region.r_vaddr = msg->vaddr;
2365 /*
2366 * Set the s_pid value in the segment structure. This is used
2367 * to identify exported segments belonging to a particular
2368 * process so that when the process exits, these segments can
2369 * be unlocked forcefully even if rsm_close is not called on
2370 * process exit since there maybe other processes referencing
2371 * them (for example on a fork or exec).
2372 * The s_pid value is also used to authenticate the process
2373 * doing a publish or unpublish on the export segment. Only
2374 * the creator of the export segment has a right to do a
2375 * publish or unpublish and unbind on the segment.
2376 */
2377 seg->s_pid = ddi_get_pid();
2378 seg->s_len = msg->len;
2379 seg->s_state = RSM_STATE_BIND;
2380 seg->s_adapter = adapter;
2381 seg->s_proc = curproc;
2382 } else {
2383 rsmka_release_adapter(adapter);
2384 DBG_PRINTF((category, RSM_WARNING,
2385 "unable to lock down pages\n"));
2386 }
2387
2388 msg->rnum = seg->s_minor;
2389 /* Unlock segment */
2390 rsmseglock_release(seg);
2391
2392 if (e == RSM_SUCCESS) {
2393 /* copyout the resource number */
2394 #ifdef _MULTI_DATAMODEL
2395 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2396 rsm_ioctlmsg32_t msg32;
2397
2398 msg32.rnum = msg->rnum;
2399 if (ddi_copyout((caddr_t)&msg32.rnum,
2400 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2401 sizeof (minor_t), mode)) {
2402 rsmka_release_adapter(adapter);
2403 e = RSMERR_BAD_ADDR;
2404 }
2405 }
2406 #endif
2407 if (ddi_copyout((caddr_t)&msg->rnum,
2408 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2409 sizeof (minor_t), mode)) {
2410 rsmka_release_adapter(adapter);
2411 e = RSMERR_BAD_ADDR;
2412 }
2413 }
2414
2415 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2416
2417 return (e);
2418 }
2419
2420 static void
2421 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2422 rsm_memseg_id_t ex_segid,
2423 ddi_umem_cookie_t cookie)
2424
2425 {
2426 rsmresource_t *p = NULL;
2427 rsmhash_table_t *rhash = &rsm_import_segs;
2428 uint_t index;
2429
2430 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2431 "rsm_remap_local_importers enter\n"));
2432
2433 index = rsmhash(ex_segid);
2434
2435 rw_enter(&rhash->rsmhash_rw, RW_READER);
2436
2437 p = rsmhash_getbkt(rhash, index);
2438
2439 for (; p; p = p->rsmrc_next) {
2440 rsmseg_t *seg = (rsmseg_t *)p;
2441 rsmseglock_acquire(seg);
2442 /*
2443 * Change the s_cookie value of only the local importers
2444 * which have been mapped (in state RSM_STATE_ACTIVE).
2445 * Note that there is no need to change the s_cookie value
2446 * if the imported segment is in RSM_STATE_MAPPING since
2447 * eventually the s_cookie will be updated via the mapping
2448 * functionality.
2449 */
2450 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2451 (seg->s_state == RSM_STATE_ACTIVE)) {
2452 seg->s_cookie = cookie;
2453 }
2454 rsmseglock_release(seg);
2455 }
2456 rw_exit(&rhash->rsmhash_rw);
2457
2458 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2459 "rsm_remap_local_importers done\n"));
2460 }
2461
2462 static int
2463 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2464 {
2465 int e;
2466 adapter_t *adapter;
2467 ddi_umem_cookie_t cookie;
2468 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2469
2470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2471
2472 /* Check for permissions to rebind */
2473 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2474 return (RSMERR_REBIND_NOT_ALLOWED);
2475 }
2476
2477 if (seg->s_pid != ddi_get_pid() &&
2478 ddi_get_pid() != 0) {
2479 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2480 return (RSMERR_NOT_CREATOR);
2481 }
2482
2483 /*
2484 * We will not be allowing partial rebind and hence length passed
2485 * in must be same as segment length
2486 */
2487 if (msg->vaddr == NULL) {
2488 DBG_PRINTF((category, RSM_ERR,
2489 "rsm_rebind done: null msg->vaddr\n"));
2490 return (RSMERR_BAD_ADDR);
2491 }
2492 if (msg->len != seg->s_len) {
2493 DBG_PRINTF((category, RSM_ERR,
2494 "rsm_rebind: invalid length\n"));
2495 return (RSMERR_BAD_LENGTH);
2496 }
2497
2498 /* Lock segment */
2499 rsmseglock_acquire(seg);
2500
2501 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2502 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2503 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2504 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2505 rsmseglock_release(seg);
2506 DBG_PRINTF((category, RSM_DEBUG,
2507 "rsm_rebind done: cv_wait INTERRUPTED"));
2508 return (RSMERR_INTERRUPTED);
2509 }
2510 }
2511
2512 /* verify segment state */
2513 if ((seg->s_state != RSM_STATE_BIND) &&
2514 (seg->s_state != RSM_STATE_EXPORT)) {
2515 /* Unlock segment */
2516 rsmseglock_release(seg);
2517 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2518 "rsm_rebind done: invalid state\n"));
2519 return (RSMERR_BAD_SEG_HNDL);
2520 }
2521
2522 ASSERT(seg->s_cookie != NULL);
2523
2524 if (msg->vaddr == seg->s_region.r_vaddr) {
2525 rsmseglock_release(seg);
2526 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2527 return (RSM_SUCCESS);
2528 }
2529
2530 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2531 if (e == RSM_SUCCESS) {
2532 struct buf *xbuf;
2533 dev_t sdev = 0;
2534 rsm_memory_local_t mem;
2535
2536 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2537 sdev, 0, NULL, DDI_UMEM_SLEEP);
2538 ASSERT(xbuf != NULL);
2539
2540 mem.ms_type = RSM_MEM_BUF;
2541 mem.ms_bp = xbuf;
2542
2543 adapter = seg->s_adapter;
2544 e = adapter->rsmpi_ops->rsm_rebind(
2545 seg->s_handle.out, 0, &mem,
2546 RSM_RESOURCE_DONTWAIT, NULL);
2547
2548 if (e == RSM_SUCCESS) {
2549 /*
2550 * unbind the older pages, and unload local importers;
2551 * but don't disconnect importers
2552 */
2553 (void) rsm_unbind_pages(seg);
2554 seg->s_cookie = cookie;
2555 seg->s_region.r_vaddr = msg->vaddr;
2556 rsm_remap_local_importers(my_nodeid, seg->s_segid,
2557 cookie);
2558 } else {
2559 /*
2560 * Unbind the pages associated with "cookie" by the
2561 * rsm_bind_pages calls prior to this. This is
2562 * similar to what is done in the rsm_unbind_pages
2563 * routine for the seg->s_cookie.
2564 */
2565 ddi_umem_unlock(cookie);
2566 rsm_dec_pgcnt(btopr(msg->len));
2567 DBG_PRINTF((category, RSM_ERR,
2568 "rsm_rebind failed with %d\n", e));
2569 }
2570 /*
2571 * At present there is no dependency on the existence of xbuf.
2572 * So we can free it here. If in the future this changes, it can
2573 * be freed sometime during the segment destroy.
2574 */
2575 freerbuf(xbuf);
2576 }
2577
2578 /* Unlock segment */
2579 rsmseglock_release(seg);
2580
2581 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2582
2583 return (e);
2584 }
2585
2586 static int
2587 rsm_unbind(rsmseg_t *seg)
2588 {
2589 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2590
2591 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2592
2593 rsmseglock_acquire(seg);
2594
2595 /* verify segment state */
2596 if ((seg->s_state != RSM_STATE_BIND) &&
2597 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2598 rsmseglock_release(seg);
2599 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2600 "rsm_unbind: invalid state\n"));
2601 return (RSMERR_BAD_SEG_HNDL);
2602 }
2603
2604 /* unlock current range */
2605 (void) rsm_unbind_pages(seg);
2606
2607 if (seg->s_state == RSM_STATE_BIND) {
2608 seg->s_state = RSM_STATE_NEW;
2609 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2610 seg->s_state = RSM_STATE_NEW_QUIESCED;
2611 }
2612
2613 rsmseglock_release(seg);
2614
2615 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2616
2617 return (RSM_SUCCESS);
2618 }
2619
2620 /* **************************** Exporter Access List Management ******* */
2621 static void
2622 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2623 {
2624 int acl_sz;
2625 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2626
2627 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2628
2629 /* acl could be NULL */
2630
2631 if (acl != NULL && acl_len > 0) {
2632 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2633 kmem_free((void *)acl, acl_sz);
2634 }
2635
2636 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2637 }
2638
2639 static void
2640 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2641 {
2642 int acl_sz;
2643 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2644
2645 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2646
2647 if (acl != NULL && acl_len > 0) {
2648 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2649 kmem_free((void *)acl, acl_sz);
2650 }
2651
2652 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2653
2654 }
2655
2656 static int
2657 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2658 rsmapi_access_entry_t **list, int *len, int loopback)
2659 {
2660 rsmapi_access_entry_t *acl;
2661 int acl_len;
2662 int i;
2663 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2664
2665 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2666
2667 *len = 0;
2668 *list = NULL;
2669
2670 acl_len = msg->acl_len;
2671 if ((loopback && acl_len > 1) || (acl_len < 0) ||
2672 (acl_len > MAX_NODES)) {
2673 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2674 "rsmacl_build done: acl invalid\n"));
2675 return (RSMERR_BAD_ACL);
2676 }
2677
2678 if (acl_len > 0 && acl_len <= MAX_NODES) {
2679 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2680
2681 acl = kmem_alloc(acl_size, KM_SLEEP);
2682
2683 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2684 acl_size, mode)) {
2685 kmem_free((void *) acl, acl_size);
2686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2687 "rsmacl_build done: BAD_ADDR\n"));
2688 return (RSMERR_BAD_ADDR);
2689 }
2690
2691 /*
2692 * Verify access list
2693 */
2694 for (i = 0; i < acl_len; i++) {
2695 if (acl[i].ae_node > MAX_NODES ||
2696 (loopback && (acl[i].ae_node != my_nodeid)) ||
2697 acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2698 /* invalid entry */
2699 kmem_free((void *) acl, acl_size);
2700 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2701 "rsmacl_build done: EINVAL\n"));
2702 return (RSMERR_BAD_ACL);
2703 }
2704 }
2705
2706 *len = acl_len;
2707 *list = acl;
2708 }
2709
2710 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2711
2712 return (DDI_SUCCESS);
2713 }
2714
2715 static int
2716 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2717 int acl_len, adapter_t *adapter)
2718 {
2719 rsm_access_entry_t *acl;
2720 rsm_addr_t hwaddr;
2721 int i;
2722 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2723
2724 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2725
2726 if (src != NULL) {
2727 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2728 acl = kmem_alloc(acl_size, KM_SLEEP);
2729
2730 /*
2731 * translate access list
2732 */
2733 for (i = 0; i < acl_len; i++) {
2734 if (src[i].ae_node == my_nodeid) {
2735 acl[i].ae_addr = adapter->hwaddr;
2736 } else {
2737 hwaddr = get_remote_hwaddr(adapter,
2738 src[i].ae_node);
2739 if ((int64_t)hwaddr < 0) {
2740 /* invalid hwaddr */
2741 kmem_free((void *) acl, acl_size);
2742 DBG_PRINTF((category,
2743 RSM_DEBUG_VERBOSE,
2744 "rsmpiacl_create done:"
2745 "EINVAL hwaddr\n"));
2746 return (RSMERR_INTERNAL_ERROR);
2747 }
2748 acl[i].ae_addr = hwaddr;
2749 }
2750 /* rsmpi understands only RSM_PERM_XXXX */
2751 acl[i].ae_permission =
2752 src[i].ae_permission & RSM_PERM_RDWR;
2753 }
2754 *dest = acl;
2755 } else {
2756 *dest = NULL;
2757 }
2758
2759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2760
2761 return (RSM_SUCCESS);
2762 }
2763
2764 static int
2765 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2766 rsmipc_reply_t *reply)
2767 {
2768
2769 int i;
2770 rsmseg_t *seg;
2771 rsm_memseg_id_t key = req->rsmipc_key;
2772 rsm_permission_t perm = req->rsmipc_perm;
2773 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2774
2775 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2776 "rsmsegacl_validate enter\n"));
2777
2778 /*
2779 * Find segment and grab its lock. The reason why we grab the segment
2780 * lock in side the search is to avoid the race when the segment is
2781 * being deleted and we already have a pointer to it.
2782 */
2783 seg = rsmexport_lookup(key);
2784 if (!seg) {
2785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2786 "rsmsegacl_validate done: %u ENXIO\n", key));
2787 return (RSMERR_SEG_NOT_PUBLISHED);
2788 }
2789
2790 ASSERT(rsmseglock_held(seg));
2791 ASSERT(seg->s_state == RSM_STATE_EXPORT);
2792
2793 /*
2794 * We implement a 2-level protection scheme.
2795 * First, we check if local/remote host has access rights.
2796 * Second, we check if the user has access rights.
2797 *
2798 * This routine only validates the rnode access_list
2799 */
2800 if (seg->s_acl_len > 0) {
2801 /*
2802 * Check host access list
2803 */
2804 ASSERT(seg->s_acl != NULL);
2805 for (i = 0; i < seg->s_acl_len; i++) {
2806 if (seg->s_acl[i].ae_node == rnode) {
2807 perm &= seg->s_acl[i].ae_permission;
2808 goto found;
2809 }
2810 }
2811 /* rnode is not found in the list */
2812 rsmseglock_release(seg);
2813 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2814 "rsmsegacl_validate done: EPERM\n"));
2815 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2816 } else {
2817 /* use default owner creation umask */
2818 perm &= seg->s_mode;
2819 }
2820
2821 found:
2822 /* update perm for this node */
2823 reply->rsmipc_mode = perm;
2824 reply->rsmipc_uid = seg->s_uid;
2825 reply->rsmipc_gid = seg->s_gid;
2826 reply->rsmipc_segid = seg->s_segid;
2827 reply->rsmipc_seglen = seg->s_len;
2828
2829 /*
2830 * Perm of requesting node is valid; source will validate user
2831 */
2832 rsmseglock_release(seg);
2833
2834 /*
2835 * Add the importer to the list right away, if connect fails
2836 * the importer will ask the exporter to remove it.
2837 */
2838 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2839 req->rsmipc_segment_cookie);
2840
2841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2842
2843 return (RSM_SUCCESS);
2844 }
2845
2846
2847 /* ************************** Exporter Calls ************************* */
2848
2849 static int
2850 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2851 {
2852 int e;
2853 int acl_len;
2854 rsmapi_access_entry_t *acl;
2855 rsm_access_entry_t *rsmpi_acl;
2856 rsm_memory_local_t mem;
2857 struct buf *xbuf;
2858 dev_t sdev = 0;
2859 adapter_t *adapter;
2860 rsm_memseg_id_t segment_id = 0;
2861 int loopback_flag = 0;
2862 int create_flags = 0;
2863 rsm_resource_callback_t callback_flag;
2864 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2865
2866 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2867
2868 if (seg->s_adapter == &loopback_adapter)
2869 loopback_flag = 1;
2870
2871 if (seg->s_pid != ddi_get_pid() &&
2872 ddi_get_pid() != 0) {
2873 DBG_PRINTF((category, RSM_ERR,
2874 "rsm_publish: Not creator\n"));
2875 return (RSMERR_NOT_CREATOR);
2876 }
2877
2878 /*
2879 * Get per node access list
2880 */
2881 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2882 if (e != DDI_SUCCESS) {
2883 DBG_PRINTF((category, RSM_ERR,
2884 "rsm_publish done: rsmacl_build failed\n"));
2885 return (e);
2886 }
2887
2888 /*
2889 * The application provided msg->key is used for resolving a
2890 * segment id according to the following:
2891 * key = 0 Kernel Agent selects the segment id
2892 * key <= RSM_DLPI_ID_END Reserved for system usage except
2893 * RSMLIB range
2894 * key < RSM_USER_APP_ID_BASE segment id = key
2895 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2896 *
2897 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2898 * overflows to zero after 0x80000000 allocations.
2899 * An algorithm is needed which allows reinitialization and provides
2900 * for reallocation after overflow. For now, ENOMEM is returned
2901 * once the overflow condition has occurred.
2902 */
2903 if (msg->key == 0) {
2904 mutex_enter(&rsm_lock);
2905 segment_id = rsm_nextavail_segmentid;
2906 if (segment_id != 0) {
2907 rsm_nextavail_segmentid++;
2908 mutex_exit(&rsm_lock);
2909 } else {
2910 mutex_exit(&rsm_lock);
2911 DBG_PRINTF((category, RSM_ERR,
2912 "rsm_publish done: no more keys avlbl\n"));
2913 return (RSMERR_INSUFFICIENT_RESOURCES);
2914 }
2915 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2916 /* range reserved for internal use by base/ndi libraries */
2917 segment_id = msg->key;
2918 else if (msg->key <= RSM_DLPI_ID_END)
2919 return (RSMERR_RESERVED_SEGID);
2920 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2921 segment_id = msg->key;
2922 else {
2923 DBG_PRINTF((category, RSM_ERR,
2924 "rsm_publish done: invalid key %u\n", msg->key));
2925 return (RSMERR_RESERVED_SEGID);
2926 }
2927
2928 /* Add key to exportlist; The segment lock is held on success */
2929 e = rsmexport_add(seg, segment_id);
2930 if (e) {
2931 rsmacl_free(acl, acl_len);
2932 DBG_PRINTF((category, RSM_ERR,
2933 "rsm_publish done: export_add failed: %d\n", e));
2934 return (e);
2935 }
2936
2937 seg->s_segid = segment_id;
2938
2939 if ((seg->s_state != RSM_STATE_BIND) &&
2940 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2941 /* state changed since then, free acl and return */
2942 rsmseglock_release(seg);
2943 rsmexport_rm(seg);
2944 rsmacl_free(acl, acl_len);
2945 DBG_PRINTF((category, RSM_ERR,
2946 "rsm_publish done: segment in wrong state: %d\n",
2947 seg->s_state));
2948 return (RSMERR_BAD_SEG_HNDL);
2949 }
2950
2951 /*
2952 * If this is for a local memory handle and permissions are zero,
2953 * then the surrogate segment is very large and we want to skip
2954 * allocation of DVMA space.
2955 *
2956 * Careful! If the user didn't use an ACL list, acl will be a NULL
2957 * pointer. Check that before dereferencing it.
2958 */
2959 if (acl != (rsmapi_access_entry_t *)NULL) {
2960 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2961 goto skipdriver;
2962 }
2963
2964 /* create segment */
2965 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2966 sdev, 0, NULL, DDI_UMEM_SLEEP);
2967 ASSERT(xbuf != NULL);
2968
2969 mem.ms_type = RSM_MEM_BUF;
2970 mem.ms_bp = xbuf;
2971
2972 /* This call includes a bind operations */
2973
2974 adapter = seg->s_adapter;
2975 /*
2976 * create a acl list with hwaddr for RSMPI publish
2977 */
2978 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2979
2980 if (e != RSM_SUCCESS) {
2981 rsmseglock_release(seg);
2982 rsmexport_rm(seg);
2983 rsmacl_free(acl, acl_len);
2984 freerbuf(xbuf);
2985 DBG_PRINTF((category, RSM_ERR,
2986 "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2987 return (e);
2988 }
2989
2990 if (seg->s_state == RSM_STATE_BIND) {
2991 /* create segment */
2992
2993 /* This call includes a bind operations */
2994
2995 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2996 create_flags = RSM_ALLOW_UNBIND_REBIND;
2997 }
2998
2999 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
3000 callback_flag = RSM_RESOURCE_DONTWAIT;
3001 } else {
3002 callback_flag = RSM_RESOURCE_SLEEP;
3003 }
3004
3005 e = adapter->rsmpi_ops->rsm_seg_create(
3006 adapter->rsmpi_handle,
3007 &seg->s_handle.out, seg->s_len,
3008 create_flags, &mem,
3009 callback_flag, NULL);
3010 /*
3011 * At present there is no dependency on the existence of xbuf.
3012 * So we can free it here. If in the future this changes, it can
3013 * be freed sometime during the segment destroy.
3014 */
3015 freerbuf(xbuf);
3016
3017 if (e != RSM_SUCCESS) {
3018 rsmseglock_release(seg);
3019 rsmexport_rm(seg);
3020 rsmacl_free(acl, acl_len);
3021 rsmpiacl_free(rsmpi_acl, acl_len);
3022 DBG_PRINTF((category, RSM_ERR,
3023 "rsm_publish done: export_create failed: %d\n", e));
3024 /*
3025 * The following assertion ensures that the two errors
3026 * related to the length and its alignment do not occur
3027 * since they have been checked during export_create
3028 */
3029 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3030 e != RSMERR_BAD_LENGTH);
3031 if (e == RSMERR_NOT_MEM)
3032 e = RSMERR_INSUFFICIENT_MEM;
3033
3034 return (e);
3035 }
3036 /* export segment, this should create an IMMU mapping */
3037 e = adapter->rsmpi_ops->rsm_publish(
3038 seg->s_handle.out,
3039 rsmpi_acl, acl_len,
3040 seg->s_segid,
3041 RSM_RESOURCE_DONTWAIT, NULL);
3042
3043 if (e != RSM_SUCCESS) {
3044 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3045 rsmseglock_release(seg);
3046 rsmexport_rm(seg);
3047 rsmacl_free(acl, acl_len);
3048 rsmpiacl_free(rsmpi_acl, acl_len);
3049 DBG_PRINTF((category, RSM_ERR,
3050 "rsm_publish done: export_publish failed: %d\n",
3051 e));
3052 return (e);
3053 }
3054 }
3055
3056 seg->s_acl_in = rsmpi_acl;
3057
3058 skipdriver:
3059 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3060 seg->s_acl_len = acl_len;
3061 seg->s_acl = acl;
3062
3063 if (seg->s_state == RSM_STATE_BIND) {
3064 seg->s_state = RSM_STATE_EXPORT;
3065 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3066 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3067 cv_broadcast(&seg->s_cv);
3068 }
3069
3070 rsmseglock_release(seg);
3071
3072 /*
3073 * If the segment id was solicited, then return it in
3074 * the original incoming message.
3075 */
3076 if (msg->key == 0) {
3077 msg->key = segment_id;
3078 #ifdef _MULTI_DATAMODEL
3079 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3080 rsm_ioctlmsg32_t msg32;
3081
3082 msg32.key = msg->key;
3083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3084 "rsm_publish done\n"));
3085 return (ddi_copyout((caddr_t)&msg32,
3086 (caddr_t)dataptr, sizeof (msg32), mode));
3087 }
3088 #endif
3089 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3090 "rsm_publish done\n"));
3091 return (ddi_copyout((caddr_t)msg,
3092 (caddr_t)dataptr, sizeof (*msg), mode));
3093 }
3094
3095 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3096 return (DDI_SUCCESS);
3097 }
3098
3099 /*
3100 * This function modifies the access control list of an already published
3101 * segment. There is no effect on import segments which are already
3102 * connected.
3103 */
3104 static int
3105 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3106 {
3107 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl;
3108 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl;
3109 int new_acl_len, old_acl_len, tmp_acl_len;
3110 int e, i;
3111 adapter_t *adapter;
3112 int loopback_flag = 0;
3113 rsm_memseg_id_t key;
3114 rsm_permission_t permission;
3115 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3116
3117 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3118
3119 if ((seg->s_state != RSM_STATE_EXPORT) &&
3120 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3121 (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3122 return (RSMERR_SEG_NOT_PUBLISHED);
3123
3124 if (seg->s_pid != ddi_get_pid() &&
3125 ddi_get_pid() != 0) {
3126 DBG_PRINTF((category, RSM_ERR,
3127 "rsm_republish: Not owner\n"));
3128 return (RSMERR_NOT_CREATOR);
3129 }
3130
3131 if (seg->s_adapter == &loopback_adapter)
3132 loopback_flag = 1;
3133
3134 /*
3135 * Build new list first
3136 */
3137 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3138 if (e) {
3139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3140 "rsm_republish done: rsmacl_build failed %d", e));
3141 return (e);
3142 }
3143
3144 /* Lock segment */
3145 rsmseglock_acquire(seg);
3146 /*
3147 * a republish is in progress - REPUBLISH message is being
3148 * sent to the importers so wait for it to complete OR
3149 * wait till DR completes
3150 */
3151 while (((seg->s_state == RSM_STATE_EXPORT) &&
3152 (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3153 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3154 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3155 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3156 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3157 "rsm_republish done: cv_wait INTERRUPTED"));
3158 rsmseglock_release(seg);
3159 rsmacl_free(new_acl, new_acl_len);
3160 return (RSMERR_INTERRUPTED);
3161 }
3162 }
3163
3164 /* recheck if state is valid */
3165 if (seg->s_state != RSM_STATE_EXPORT) {
3166 rsmseglock_release(seg);
3167 rsmacl_free(new_acl, new_acl_len);
3168 return (RSMERR_SEG_NOT_PUBLISHED);
3169 }
3170
3171 key = seg->s_key;
3172 old_acl = seg->s_acl;
3173 old_acl_len = seg->s_acl_len;
3174
3175 seg->s_acl = new_acl;
3176 seg->s_acl_len = new_acl_len;
3177
3178 /*
3179 * This call will only be meaningful if and when the interconnect
3180 * layer makes use of the access list
3181 */
3182 adapter = seg->s_adapter;
3183 /*
3184 * create a acl list with hwaddr for RSMPI publish
3185 */
3186 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3187
3188 if (e != RSM_SUCCESS) {
3189 seg->s_acl = old_acl;
3190 seg->s_acl_len = old_acl_len;
3191 rsmseglock_release(seg);
3192 rsmacl_free(new_acl, new_acl_len);
3193 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3194 "rsm_republish done: rsmpiacl_create failed %d", e));
3195 return (e);
3196 }
3197 rsmpi_old_acl = seg->s_acl_in;
3198 seg->s_acl_in = rsmpi_new_acl;
3199
3200 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3201 seg->s_acl_in, seg->s_acl_len,
3202 RSM_RESOURCE_DONTWAIT, NULL);
3203
3204 if (e != RSM_SUCCESS) {
3205 seg->s_acl = old_acl;
3206 seg->s_acl_in = rsmpi_old_acl;
3207 seg->s_acl_len = old_acl_len;
3208 rsmseglock_release(seg);
3209 rsmacl_free(new_acl, new_acl_len);
3210 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3211
3212 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3213 "rsm_republish done: rsmpi republish failed %d\n", e));
3214 return (e);
3215 }
3216
3217 /* create a tmp copy of the new acl */
3218 tmp_acl_len = new_acl_len;
3219 if (tmp_acl_len > 0) {
3220 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3221 for (i = 0; i < tmp_acl_len; i++) {
3222 tmp_acl[i].ae_node = new_acl[i].ae_node;
3223 tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3224 }
3225 /*
3226 * The default permission of a node which was in the old
3227 * ACL but not in the new ACL is 0 ie no access.
3228 */
3229 permission = 0;
3230 } else {
3231 /*
3232 * NULL acl means all importers can connect and
3233 * default permission will be owner creation umask
3234 */
3235 tmp_acl = NULL;
3236 permission = seg->s_mode;
3237 }
3238
3239 /* make other republishers to wait for republish to complete */
3240 seg->s_flags |= RSM_REPUBLISH_WAIT;
3241
3242 rsmseglock_release(seg);
3243
3244 /* send the new perms to the importing nodes */
3245 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3246
3247 rsmseglock_acquire(seg);
3248 seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3249 /* wake up any one waiting for republish to complete */
3250 cv_broadcast(&seg->s_cv);
3251 rsmseglock_release(seg);
3252
3253 rsmacl_free(tmp_acl, tmp_acl_len);
3254 rsmacl_free(old_acl, old_acl_len);
3255 rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3256
3257 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3258 return (DDI_SUCCESS);
3259 }
3260
3261 static int
3262 rsm_unpublish(rsmseg_t *seg, int mode)
3263 {
3264 rsmapi_access_entry_t *acl;
3265 rsm_access_entry_t *rsmpi_acl;
3266 int acl_len;
3267 int e;
3268 adapter_t *adapter;
3269 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3270
3271 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3272
3273 if (seg->s_pid != ddi_get_pid() &&
3274 ddi_get_pid() != 0) {
3275 DBG_PRINTF((category, RSM_ERR,
3276 "rsm_unpublish: Not creator\n"));
3277 return (RSMERR_NOT_CREATOR);
3278 }
3279
3280 rsmseglock_acquire(seg);
3281 /*
3282 * wait for QUIESCING to complete here before rsmexport_rm
3283 * is called because the SUSPEND_COMPLETE mesg which changes
3284 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3285 * signals the cv_wait needs to find it in the hashtable.
3286 */
3287 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3288 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3289 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3290 rsmseglock_release(seg);
3291 DBG_PRINTF((category, RSM_ERR,
3292 "rsm_unpublish done: cv_wait INTR qscing"
3293 "getv/putv in progress"));
3294 return (RSMERR_INTERRUPTED);
3295 }
3296 }
3297
3298 /* verify segment state */
3299 if ((seg->s_state != RSM_STATE_EXPORT) &&
3300 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3301 rsmseglock_release(seg);
3302 DBG_PRINTF((category, RSM_ERR,
3303 "rsm_unpublish done: bad state %x\n", seg->s_state));
3304 return (RSMERR_SEG_NOT_PUBLISHED);
3305 }
3306
3307 rsmseglock_release(seg);
3308
3309 rsmexport_rm(seg);
3310
3311 rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3312
3313 rsmseglock_acquire(seg);
3314 /*
3315 * wait for republish to complete
3316 */
3317 while ((seg->s_state == RSM_STATE_EXPORT) &&
3318 (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3319 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3320 DBG_PRINTF((category, RSM_ERR,
3321 "rsm_unpublish done: cv_wait INTR repubing"));
3322 rsmseglock_release(seg);
3323 return (RSMERR_INTERRUPTED);
3324 }
3325 }
3326
3327 if ((seg->s_state != RSM_STATE_EXPORT) &&
3328 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3329 DBG_PRINTF((category, RSM_ERR,
3330 "rsm_unpublish done: invalid state"));
3331 rsmseglock_release(seg);
3332 return (RSMERR_SEG_NOT_PUBLISHED);
3333 }
3334
3335 /*
3336 * check for putv/get surrogate segment which was not published
3337 * to the driver.
3338 *
3339 * Be certain to see if there is an ACL first! If this segment was
3340 * not published with an ACL, acl will be a null pointer. Check
3341 * that before dereferencing it.
3342 */
3343 acl = seg->s_acl;
3344 if (acl != (rsmapi_access_entry_t *)NULL) {
3345 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3346 goto bypass;
3347 }
3348
3349 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3350 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3351 goto bypass;
3352
3353 adapter = seg->s_adapter;
3354 for (;;) {
3355 if (seg->s_state != RSM_STATE_EXPORT) {
3356 rsmseglock_release(seg);
3357 DBG_PRINTF((category, RSM_ERR,
3358 "rsm_unpublish done: bad state %x\n",
3359 seg->s_state));
3360 return (RSMERR_SEG_NOT_PUBLISHED);
3361 }
3362
3363 /* unpublish from adapter */
3364 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3365
3366 if (e == RSM_SUCCESS) {
3367 break;
3368 }
3369
3370 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3371 /*
3372 * wait for unpublish to succeed, it's busy.
3373 */
3374 seg->s_flags |= RSM_EXPORT_WAIT;
3375
3376 /* wait for a max of 1 ms - this is an empirical */
3377 /* value that was found by some minimal testing */
3378 /* can be fine tuned when we have better numbers */
3379 /* A long term fix would be to send cv_signal */
3380 /* from the intr callback routine */
3381 /* currently nobody signals this wait */
3382 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3383 drv_usectohz(1000), TR_CLOCK_TICK);
3384
3385 DBG_PRINTF((category, RSM_ERR,
3386 "rsm_unpublish: SEG_IN_USE\n"));
3387
3388 seg->s_flags &= ~RSM_EXPORT_WAIT;
3389 } else {
3390 if (mode == 1) {
3391 DBG_PRINTF((category, RSM_ERR,
3392 "rsm:rsmpi unpublish err %x\n", e));
3393 seg->s_state = RSM_STATE_BIND;
3394 }
3395 rsmseglock_release(seg);
3396 return (e);
3397 }
3398 }
3399
3400 /* Free segment */
3401 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3402
3403 if (e != RSM_SUCCESS) {
3404 DBG_PRINTF((category, RSM_ERR,
3405 "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3406 seg->s_key, e));
3407 }
3408
3409 bypass:
3410 acl = seg->s_acl;
3411 rsmpi_acl = seg->s_acl_in;
3412 acl_len = seg->s_acl_len;
3413
3414 seg->s_acl = NULL;
3415 seg->s_acl_in = NULL;
3416 seg->s_acl_len = 0;
3417
3418 if (seg->s_state == RSM_STATE_EXPORT) {
3419 seg->s_state = RSM_STATE_BIND;
3420 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3421 seg->s_state = RSM_STATE_BIND_QUIESCED;
3422 cv_broadcast(&seg->s_cv);
3423 }
3424
3425 rsmseglock_release(seg);
3426
3427 rsmacl_free(acl, acl_len);
3428 rsmpiacl_free(rsmpi_acl, acl_len);
3429
3430 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3431
3432 return (DDI_SUCCESS);
3433 }
3434
3435 /*
3436 * Called from rsm_unpublish to force an unload and disconnection of all
3437 * importers of the unpublished segment.
3438 *
3439 * First build the list of segments requiring a force disconnect, then
3440 * send a request for each.
3441 */
3442 static void
3443 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3444 rsm_node_id_t ex_nodeid)
3445 {
3446 rsmipc_request_t request;
3447 importing_token_t *prev_token, *token, *tmp_token, *tokp;
3448 importing_token_t *force_disconnect_list = NULL;
3449 int index;
3450
3451 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3452 "rsm_send_importer_disconnects enter\n"));
3453
3454 index = rsmhash(ex_segid);
3455
3456 mutex_enter(&importer_list.lock);
3457
3458 prev_token = NULL;
3459 token = importer_list.bucket[index];
3460
3461 while (token != NULL) {
3462 if (token->key == ex_segid) {
3463 /*
3464 * take it off the importer list and add it
3465 * to the force disconnect list.
3466 */
3467 if (prev_token == NULL)
3468 importer_list.bucket[index] = token->next;
3469 else
3470 prev_token->next = token->next;
3471 tmp_token = token;
3472 token = token->next;
3473 if (force_disconnect_list == NULL) {
3474 force_disconnect_list = tmp_token;
3475 tmp_token->next = NULL;
3476 } else {
3477 tokp = force_disconnect_list;
3478 /*
3479 * make sure that the tmp_token's node
3480 * is not already on the force disconnect
3481 * list.
3482 */
3483 while (tokp != NULL) {
3484 if (tokp->importing_node ==
3485 tmp_token->importing_node) {
3486 break;
3487 }
3488 tokp = tokp->next;
3489 }
3490 if (tokp == NULL) {
3491 tmp_token->next =
3492 force_disconnect_list;
3493 force_disconnect_list = tmp_token;
3494 } else {
3495 kmem_free((void *)tmp_token,
3496 sizeof (*token));
3497 }
3498 }
3499
3500 } else {
3501 prev_token = token;
3502 token = token->next;
3503 }
3504 }
3505 mutex_exit(&importer_list.lock);
3506
3507 token = force_disconnect_list;
3508 while (token != NULL) {
3509 if (token->importing_node == my_nodeid) {
3510 rsm_force_unload(ex_nodeid, ex_segid,
3511 DISCONNECT);
3512 } else {
3513 request.rsmipc_hdr.rsmipc_type =
3514 RSMIPC_MSG_DISCONNECT;
3515 request.rsmipc_key = token->key;
3516 for (;;) {
3517 if (rsmipc_send(token->importing_node,
3518 &request,
3519 RSM_NO_REPLY) == RSM_SUCCESS) {
3520 break;
3521 } else {
3522 delay(drv_usectohz(10000));
3523 }
3524 }
3525 }
3526 tmp_token = token;
3527 token = token->next;
3528 kmem_free((void *)tmp_token, sizeof (*token));
3529 }
3530
3531 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3532 "rsm_send_importer_disconnects done\n"));
3533 }
3534
3535 /*
3536 * This function is used as a callback for unlocking the pages locked
3537 * down by a process which then does a fork or an exec.
3538 * It marks the export segments corresponding to umem cookie given by
3539 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3540 * destroyed later when an rsm_close occurs).
3541 */
3542 static void
3543 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3544 {
3545 rsmresource_blk_t *blk;
3546 rsmresource_t *p;
3547 rsmseg_t *eseg = NULL;
3548 int i, j;
3549 int found = 0;
3550
3551 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3552 "rsm_export_force_destroy enter\n"));
3553
3554 /*
3555 * Walk the resource list and locate the export segment (either
3556 * in the BIND or the EXPORT state) which corresponds to the
3557 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3558 * Change the state to ZOMBIE by calling rsmseg_close with the
3559 * force_flag argument (the second argument) set to 1. Also,
3560 * unpublish and unbind the segment, but don't free it. Free it
3561 * only on a rsm_close call for the segment.
3562 */
3563 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3564
3565 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3566 blk = rsm_resource.rsmrc_root[i];
3567 if (blk == NULL) {
3568 continue;
3569 }
3570
3571 for (j = 0; j < RSMRC_BLKSZ; j++) {
3572 p = blk->rsmrcblk_blks[j];
3573 if ((p != NULL) && (p != RSMRC_RESERVED) &&
3574 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3575 eseg = (rsmseg_t *)p;
3576 if (eseg->s_cookie != ck)
3577 continue; /* continue searching */
3578 /*
3579 * Found the segment, set flag to indicate
3580 * force destroy processing is in progress
3581 */
3582 rsmseglock_acquire(eseg);
3583 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3584 rsmseglock_release(eseg);
3585 found = 1;
3586 break;
3587 }
3588 }
3589
3590 if (found)
3591 break;
3592 }
3593
3594 rw_exit(&rsm_resource.rsmrc_lock);
3595
3596 if (found) {
3597 ASSERT(eseg != NULL);
3598 /* call rsmseg_close with force flag set to 1 */
3599 rsmseg_close(eseg, 1);
3600 /*
3601 * force destroy processing done, clear flag and signal any
3602 * thread waiting in rsmseg_close.
3603 */
3604 rsmseglock_acquire(eseg);
3605 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3606 cv_broadcast(&eseg->s_cv);
3607 rsmseglock_release(eseg);
3608 }
3609
3610 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3611 "rsm_export_force_destroy done\n"));
3612 }
3613
3614 /* ******************************* Remote Calls *********************** */
3615 static void
3616 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3617 {
3618 rsmipc_reply_t reply;
3619 DBG_DEFINE(category,
3620 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3621
3622 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3623 "rsm_intr_segconnect enter\n"));
3624
3625 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3626
3627 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3628 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3629
3630 (void) rsmipc_send(src, NULL, &reply);
3631
3632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3633 "rsm_intr_segconnect done\n"));
3634 }
3635
3636
3637 /*
3638 * When an exported segment is unpublished the exporter sends an ipc
3639 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher
3640 * calls this function. The import list is scanned; segments which match the
3641 * exported segment id are unloaded and disconnected.
3642 *
3643 * Will also be called from rsm_rebind with disconnect_flag FALSE.
3644 *
3645 */
3646 static void
3647 rsm_force_unload(rsm_node_id_t src_nodeid,
3648 rsm_memseg_id_t ex_segid,
3649 boolean_t disconnect_flag)
3650
3651 {
3652 rsmresource_t *p = NULL;
3653 rsmhash_table_t *rhash = &rsm_import_segs;
3654 uint_t index;
3655 DBG_DEFINE(category,
3656 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3657
3658 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3659
3660 index = rsmhash(ex_segid);
3661
3662 rw_enter(&rhash->rsmhash_rw, RW_READER);
3663
3664 p = rsmhash_getbkt(rhash, index);
3665
3666 for (; p; p = p->rsmrc_next) {
3667 rsmseg_t *seg = (rsmseg_t *)p;
3668 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3669 /*
3670 * In order to make rsmseg_unload and rsm_force_unload
3671 * thread safe, acquire the segment lock here.
3672 * rsmseg_unload is responsible for releasing the lock.
3673 * rsmseg_unload releases the lock just before a call
3674 * to rsmipc_send or in case of an early exit which
3675 * occurs if the segment was in the state
3676 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3677 */
3678 rsmseglock_acquire(seg);
3679 if (disconnect_flag)
3680 seg->s_flags |= RSM_FORCE_DISCONNECT;
3681 rsmseg_unload(seg);
3682 }
3683 }
3684 rw_exit(&rhash->rsmhash_rw);
3685
3686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3687 }
3688
3689 static void
3690 rsm_intr_reply(rsmipc_msghdr_t *msg)
3691 {
3692 /*
3693 * Find slot for cookie in reply.
3694 * Match sequence with sequence in cookie
3695 * If no match; return
3696 * Try to grap lock of slot, if locked return
3697 * copy data into reply slot area
3698 * signal waiter
3699 */
3700 rsmipc_slot_t *slot;
3701 rsmipc_cookie_t *cookie;
3702 void *data = (void *) msg;
3703 size_t size = sizeof (rsmipc_reply_t);
3704 DBG_DEFINE(category,
3705 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3706
3707 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3708
3709 cookie = &msg->rsmipc_cookie;
3710 if (cookie->ic.index >= RSMIPC_SZ) {
3711 DBG_PRINTF((category, RSM_ERR,
3712 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3713 return;
3714 }
3715
3716 ASSERT(cookie->ic.index < RSMIPC_SZ);
3717 slot = &rsm_ipc.slots[cookie->ic.index];
3718 mutex_enter(&slot->rsmipc_lock);
3719 if (slot->rsmipc_cookie.value == cookie->value) {
3720 /* found a match */
3721 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3722 bcopy(data, slot->rsmipc_data, size);
3723 RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3724 cv_signal(&slot->rsmipc_cv);
3725 }
3726 } else {
3727 DBG_PRINTF((category, RSM_DEBUG,
3728 "rsm: rsm_intr_reply mismatched reply %d\n",
3729 cookie->ic.index));
3730 }
3731 mutex_exit(&slot->rsmipc_lock);
3732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3733 }
3734
3735 /*
3736 * This function gets dispatched on the worker thread when we receive
3737 * the SQREADY message. This function sends the SQREADY_ACK message.
3738 */
3739 static void
3740 rsm_sqready_ack_deferred(void *arg)
3741 {
3742 path_t *path = (path_t *)arg;
3743 DBG_DEFINE(category,
3744 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3745
3746 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3747 "rsm_sqready_ack_deferred enter\n"));
3748
3749 mutex_enter(&path->mutex);
3750
3751 /*
3752 * If path is not active no point in sending the ACK
3753 * because the whole SQREADY protocol will again start
3754 * when the path becomes active.
3755 */
3756 if (path->state != RSMKA_PATH_ACTIVE) {
3757 /*
3758 * decrement the path refcnt incremented in rsm_proc_sqready
3759 */
3760 PATH_RELE_NOLOCK(path);
3761 mutex_exit(&path->mutex);
3762 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3763 "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3764 return;
3765 }
3766
3767 /* send an SQREADY_ACK message */
3768 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3769
3770 /* initialize credits to the max level */
3771 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3772
3773 /* wake up any send that is waiting for credits */
3774 cv_broadcast(&path->sendq_token.sendq_cv);
3775
3776 /*
3777 * decrement the path refcnt since we incremented it in
3778 * rsm_proc_sqready
3779 */
3780 PATH_RELE_NOLOCK(path);
3781
3782 mutex_exit(&path->mutex);
3783
3784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3785 "rsm_sqready_ack_deferred done\n"));
3786 }
3787
3788 /*
3789 * Process the SQREADY message
3790 */
3791 static void
3792 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3793 rsm_intr_hand_arg_t arg)
3794 {
3795 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3796 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3797 path_t *path;
3798 DBG_DEFINE(category,
3799 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3800
3801 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3802
3803 /* look up the path - incr the path refcnt */
3804 path = rsm_find_path(hdlr_argp->adapter_name,
3805 hdlr_argp->adapter_instance, src_hwaddr);
3806
3807 /*
3808 * No path exists or path is not active - drop the message
3809 */
3810 if (path == NULL) {
3811 DBG_PRINTF((category, RSM_DEBUG,
3812 "rsm_proc_sqready done: msg dropped no path\n"));
3813 return;
3814 }
3815
3816 mutex_exit(&path->mutex);
3817
3818 /* drain any tasks from the previous incarnation */
3819 taskq_wait(path->recv_taskq);
3820
3821 mutex_enter(&path->mutex);
3822 /*
3823 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3824 * in the meanwhile we received an SQREADY message, blindly reset
3825 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3826 * and forget about the SQREADY that we sent.
3827 */
3828 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3829
3830 if (path->state != RSMKA_PATH_ACTIVE) {
3831 /* decr refcnt and drop the mutex */
3832 PATH_RELE_NOLOCK(path);
3833 mutex_exit(&path->mutex);
3834 DBG_PRINTF((category, RSM_DEBUG,
3835 "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3836 return;
3837 }
3838
3839 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3840 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3841
3842 /*
3843 * The sender's local incarnation number is our remote incarnation
3844 * number save it in the path data structure
3845 */
3846 path->remote_incn = msg->rsmipc_local_incn;
3847 path->sendq_token.msgbuf_avail = 0;
3848 path->procmsg_cnt = 0;
3849
3850 /*
3851 * path is active - dispatch task to send SQREADY_ACK - remember
3852 * RSMPI calls can't be done in interrupt context
3853 *
3854 * We can use the recv_taskq to send because the remote endpoint
3855 * cannot start sending messages till it receives SQREADY_ACK hence
3856 * at this point there are no tasks on recv_taskq.
3857 *
3858 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3859 */
3860 (void) taskq_dispatch(path->recv_taskq,
3861 rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3862
3863 mutex_exit(&path->mutex);
3864
3865
3866 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3867 }
3868
3869 /*
3870 * Process the SQREADY_ACK message
3871 */
3872 static void
3873 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3874 rsm_intr_hand_arg_t arg)
3875 {
3876 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3877 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3878 path_t *path;
3879 DBG_DEFINE(category,
3880 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3881
3882 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3883 "rsm_proc_sqready_ack enter\n"));
3884
3885 /* look up the path - incr the path refcnt */
3886 path = rsm_find_path(hdlr_argp->adapter_name,
3887 hdlr_argp->adapter_instance, src_hwaddr);
3888
3889 /*
3890 * drop the message if - no path exists or path is not active
3891 * or if its not waiting for SQREADY_ACK message
3892 */
3893 if (path == NULL) {
3894 DBG_PRINTF((category, RSM_DEBUG,
3895 "rsm_proc_sqready_ack done: msg dropped no path\n"));
3896 return;
3897 }
3898
3899 if ((path->state != RSMKA_PATH_ACTIVE) ||
3900 !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3901 /* decrement the refcnt */
3902 PATH_RELE_NOLOCK(path);
3903 mutex_exit(&path->mutex);
3904 DBG_PRINTF((category, RSM_DEBUG,
3905 "rsm_proc_sqready_ack done: msg dropped\n"));
3906 return;
3907 }
3908
3909 /*
3910 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3911 * sent, if not drop it.
3912 */
3913 if (path->local_incn != msghdr->rsmipc_incn) {
3914 /* decrement the refcnt */
3915 PATH_RELE_NOLOCK(path);
3916 mutex_exit(&path->mutex);
3917 DBG_PRINTF((category, RSM_DEBUG,
3918 "rsm_proc_sqready_ack done: msg old incn %lld\n",
3919 msghdr->rsmipc_incn));
3920 return;
3921 }
3922
3923 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3924 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3925
3926 /*
3927 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3928 */
3929 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3930
3931 /* save the remote sendq incn number */
3932 path->remote_incn = msg->rsmipc_local_incn;
3933
3934 /* initialize credits to the max level */
3935 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3936
3937 /* wake up any send that is waiting for credits */
3938 cv_broadcast(&path->sendq_token.sendq_cv);
3939
3940 /* decrement the refcnt */
3941 PATH_RELE_NOLOCK(path);
3942
3943 mutex_exit(&path->mutex);
3944
3945 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3946 "rsm_proc_sqready_ack done\n"));
3947 }
3948
3949 /*
3950 * process the RSMIPC_MSG_CREDIT message
3951 */
3952 static void
3953 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3954 rsm_intr_hand_arg_t arg)
3955 {
3956 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3957 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3958 path_t *path;
3959 DBG_DEFINE(category,
3960 RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3961 RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3962
3963 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3964
3965 /* look up the path - incr the path refcnt */
3966 path = rsm_find_path(hdlr_argp->adapter_name,
3967 hdlr_argp->adapter_instance, src_hwaddr);
3968
3969 if (path == NULL) {
3970 DBG_PRINTF((category, RSM_DEBUG,
3971 "rsm_add_credits enter: path not found\n"));
3972 return;
3973 }
3974
3975 /* the path is not active - discard credits */
3976 if (path->state != RSMKA_PATH_ACTIVE) {
3977 PATH_RELE_NOLOCK(path);
3978 mutex_exit(&path->mutex);
3979 DBG_PRINTF((category, RSM_DEBUG,
3980 "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3981 return;
3982 }
3983
3984 /*
3985 * Check if these credits are for current incarnation of the path.
3986 */
3987 if (path->local_incn != msghdr->rsmipc_incn) {
3988 /* decrement the refcnt */
3989 PATH_RELE_NOLOCK(path);
3990 mutex_exit(&path->mutex);
3991 DBG_PRINTF((category, RSM_DEBUG,
3992 "rsm_add_credits enter: old incn %lld\n",
3993 msghdr->rsmipc_incn));
3994 return;
3995 }
3996
3997 DBG_PRINTF((category, RSM_DEBUG,
3998 "rsm_add_credits:path=%lx new-creds=%d "
3999 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
4000 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4001 src_hwaddr));
4002
4003
4004 /* add credits to the path's sendq */
4005 path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4006
4007 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4008
4009 /* wake up any send that is waiting for credits */
4010 cv_broadcast(&path->sendq_token.sendq_cv);
4011
4012 /* decrement the refcnt */
4013 PATH_RELE_NOLOCK(path);
4014
4015 mutex_exit(&path->mutex);
4016
4017 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4018 }
4019
4020 static void
4021 rsm_intr_event(rsmipc_request_t *msg)
4022 {
4023 rsmseg_t *seg;
4024 rsmresource_t *p;
4025 rsm_node_id_t src_node;
4026 DBG_DEFINE(category,
4027 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4028
4029 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4030
4031 src_node = msg->rsmipc_hdr.rsmipc_src;
4032
4033 if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4034 /* This is for an import segment */
4035 uint_t hashval = rsmhash(msg->rsmipc_key);
4036
4037 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4038
4039 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4040
4041 for (; p; p = p->rsmrc_next) {
4042 if ((p->rsmrc_key == msg->rsmipc_key) &&
4043 (p->rsmrc_node == src_node)) {
4044 seg = (rsmseg_t *)p;
4045 rsmseglock_acquire(seg);
4046
4047 atomic_inc_32(&seg->s_pollevent);
4048
4049 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4050 pollwakeup(&seg->s_poll, POLLRDNORM);
4051
4052 rsmseglock_release(seg);
4053 }
4054 }
4055
4056 rw_exit(&rsm_import_segs.rsmhash_rw);
4057 } else {
4058 /* This is for an export segment */
4059 seg = rsmexport_lookup(msg->rsmipc_key);
4060 if (!seg) {
4061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4062 "rsm_intr_event done: exp seg not found\n"));
4063 return;
4064 }
4065
4066 ASSERT(rsmseglock_held(seg));
4067
4068 atomic_inc_32(&seg->s_pollevent);
4069
4070 /*
4071 * We must hold the segment lock here, or else the segment
4072 * can be freed while pollwakeup is using it. This implies
4073 * that we MUST NOT grab the segment lock during rsm_chpoll,
4074 * as outlined in the chpoll(2) man page.
4075 */
4076 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4077 pollwakeup(&seg->s_poll, POLLRDNORM);
4078
4079 rsmseglock_release(seg);
4080 }
4081
4082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4083 }
4084
4085 /*
4086 * The exporter did a republish and changed the ACL - this change is only
4087 * visible to new importers.
4088 */
4089 static void
4090 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4091 rsm_permission_t perm)
4092 {
4093
4094 rsmresource_t *p;
4095 rsmseg_t *seg;
4096 uint_t hashval = rsmhash(key);
4097 DBG_DEFINE(category,
4098 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4099
4100 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4101
4102 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4103
4104 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4105
4106 for (; p; p = p->rsmrc_next) {
4107 /*
4108 * find the importer and update the permission in the shared
4109 * data structure. Any new importers will use the new perms
4110 */
4111 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4112 seg = (rsmseg_t *)p;
4113
4114 rsmseglock_acquire(seg);
4115 rsmsharelock_acquire(seg);
4116 seg->s_share->rsmsi_mode = perm;
4117 rsmsharelock_release(seg);
4118 rsmseglock_release(seg);
4119
4120 break;
4121 }
4122 }
4123
4124 rw_exit(&rsm_import_segs.rsmhash_rw);
4125
4126 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4127 }
4128
4129 void
4130 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4131 {
4132 int done = 1; /* indicate all SUSPENDS have been acked */
4133 list_element_t *elem;
4134 DBG_DEFINE(category,
4135 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4136
4137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4138 "rsm_suspend_complete enter\n"));
4139
4140 mutex_enter(&rsm_suspend_list.list_lock);
4141
4142 if (rsm_suspend_list.list_head == NULL) {
4143 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4144 "rsm_suspend_complete done: suspend_list is empty\n"));
4145 mutex_exit(&rsm_suspend_list.list_lock);
4146 return;
4147 }
4148
4149 elem = rsm_suspend_list.list_head;
4150 while (elem != NULL) {
4151 if (elem->nodeid == src_node) {
4152 /* clear the pending flag for the node */
4153 elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4154 elem->flags |= flag;
4155 }
4156
4157 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4158 done = 0; /* still some nodes have not yet ACKED */
4159
4160 elem = elem->next;
4161 }
4162
4163 mutex_exit(&rsm_suspend_list.list_lock);
4164
4165 if (!done) {
4166 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4167 "rsm_suspend_complete done: acks pending\n"));
4168 return;
4169 }
4170 /*
4171 * Now that we are done with suspending all the remote importers
4172 * time to quiesce the local exporters
4173 */
4174 exporter_quiesce();
4175
4176 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4177 "rsm_suspend_complete done\n"));
4178 }
4179
4180 static void
4181 exporter_quiesce()
4182 {
4183 int i, e;
4184 rsmresource_t *current;
4185 rsmseg_t *seg;
4186 adapter_t *adapter;
4187 DBG_DEFINE(category,
4188 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4189
4190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4191 /*
4192 * The importers send a SUSPEND_COMPLETE to the exporter node
4193 * Unpublish, unbind the export segment and
4194 * move the segments to the EXPORT_QUIESCED state
4195 */
4196
4197 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4198
4199 for (i = 0; i < rsm_hash_size; i++) {
4200 current = rsm_export_segs.bucket[i];
4201 while (current != NULL) {
4202 seg = (rsmseg_t *)current;
4203 rsmseglock_acquire(seg);
4204 if (current->rsmrc_state ==
4205 RSM_STATE_EXPORT_QUIESCING) {
4206 adapter = seg->s_adapter;
4207 /*
4208 * some local memory handles are not published
4209 * check if it was published
4210 */
4211 if ((seg->s_acl == NULL) ||
4212 (seg->s_acl[0].ae_node != my_nodeid) ||
4213 (seg->s_acl[0].ae_permission != 0)) {
4214
4215 e = adapter->rsmpi_ops->rsm_unpublish(
4216 seg->s_handle.out);
4217 DBG_PRINTF((category, RSM_DEBUG,
4218 "exporter_quiesce:unpub %d\n", e));
4219
4220 e = adapter->rsmpi_ops->rsm_seg_destroy(
4221 seg->s_handle.out);
4222
4223 DBG_PRINTF((category, RSM_DEBUG,
4224 "exporter_quiesce:destroy %d\n",
4225 e));
4226 }
4227
4228 (void) rsm_unbind_pages(seg);
4229 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4230 cv_broadcast(&seg->s_cv);
4231 }
4232 rsmseglock_release(seg);
4233 current = current->rsmrc_next;
4234 }
4235 }
4236 rw_exit(&rsm_export_segs.rsmhash_rw);
4237
4238 /*
4239 * All the local segments we are done with the pre-del processing
4240 * - time to move to PREDEL_COMPLETED.
4241 */
4242
4243 mutex_enter(&rsm_drv_data.drv_lock);
4244
4245 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4246
4247 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4248
4249 cv_broadcast(&rsm_drv_data.drv_cv);
4250
4251 mutex_exit(&rsm_drv_data.drv_lock);
4252
4253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4254 }
4255
4256 static void
4257 importer_suspend(rsm_node_id_t src_node)
4258 {
4259 int i;
4260 int susp_flg; /* true means already suspended */
4261 int num_importers;
4262 rsmresource_t *p = NULL, *curp;
4263 rsmhash_table_t *rhash = &rsm_import_segs;
4264 rsmseg_t *seg;
4265 rsmipc_request_t request;
4266 DBG_DEFINE(category,
4267 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4268
4269 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4270
4271 rw_enter(&rhash->rsmhash_rw, RW_READER);
4272 for (i = 0; i < rsm_hash_size; i++) {
4273 p = rhash->bucket[i];
4274
4275 /*
4276 * Suspend all importers with same <node, key> pair.
4277 * After the last one of the shared importers has been
4278 * suspended - suspend the shared mappings/connection.
4279 */
4280 for (; p; p = p->rsmrc_next) {
4281 rsmseg_t *first = (rsmseg_t *)p;
4282 if ((first->s_node != src_node) ||
4283 (first->s_state == RSM_STATE_DISCONNECT))
4284 continue; /* go to next entry */
4285 /*
4286 * search the rest of the bucket for
4287 * other siblings (imprtrs with the same key)
4288 * of "first" and suspend them.
4289 * All importers with same key fall in
4290 * the same bucket.
4291 */
4292 num_importers = 0;
4293 for (curp = p; curp; curp = curp->rsmrc_next) {
4294 seg = (rsmseg_t *)curp;
4295
4296 rsmseglock_acquire(seg);
4297
4298 if ((seg->s_node != first->s_node) ||
4299 (seg->s_key != first->s_key) ||
4300 (seg->s_state == RSM_STATE_DISCONNECT)) {
4301 /*
4302 * either not a peer segment or its a
4303 * disconnected segment - skip it
4304 */
4305 rsmseglock_release(seg);
4306 continue;
4307 }
4308
4309 rsmseg_suspend(seg, &susp_flg);
4310
4311 if (susp_flg) { /* seg already suspended */
4312 rsmseglock_release(seg);
4313 break; /* the inner for loop */
4314 }
4315
4316 num_importers++;
4317 rsmsharelock_acquire(seg);
4318 /*
4319 * we've processed all importers that are
4320 * siblings of "first"
4321 */
4322 if (num_importers ==
4323 seg->s_share->rsmsi_refcnt) {
4324 rsmsharelock_release(seg);
4325 rsmseglock_release(seg);
4326 break;
4327 }
4328 rsmsharelock_release(seg);
4329 rsmseglock_release(seg);
4330 }
4331
4332 /*
4333 * All the importers with the same key and
4334 * nodeid as "first" have been suspended.
4335 * Now suspend the shared connect/mapping.
4336 * This is done only once.
4337 */
4338 if (!susp_flg) {
4339 rsmsegshare_suspend(seg);
4340 }
4341 }
4342 }
4343
4344 rw_exit(&rhash->rsmhash_rw);
4345
4346 /* send an ACK for SUSPEND message */
4347 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4348 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4349
4350
4351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4352
4353 }
4354
4355 static void
4356 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4357 {
4358 int recheck_state;
4359 rsmcookie_t *hdl;
4360 DBG_DEFINE(category,
4361 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4362
4363 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4364 "rsmseg_suspend enter: key=%u\n", seg->s_key));
4365
4366 *susp_flg = 0;
4367
4368 ASSERT(rsmseglock_held(seg));
4369 /* wait if putv/getv is in progress */
4370 while (seg->s_rdmacnt > 0)
4371 cv_wait(&seg->s_cv, &seg->s_lock);
4372
4373 do {
4374 recheck_state = 0;
4375
4376 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4377 "rsmseg_suspend:segment %x state=%d\n",
4378 seg->s_key, seg->s_state));
4379
4380 switch (seg->s_state) {
4381 case RSM_STATE_NEW:
4382 /* not a valid state */
4383 break;
4384 case RSM_STATE_CONNECTING:
4385 seg->s_state = RSM_STATE_ABORT_CONNECT;
4386 break;
4387 case RSM_STATE_ABORT_CONNECT:
4388 break;
4389 case RSM_STATE_CONNECT:
4390 seg->s_handle.in = NULL;
4391 seg->s_state = RSM_STATE_CONN_QUIESCE;
4392 break;
4393 case RSM_STATE_MAPPING:
4394 /* wait until segment leaves the mapping state */
4395 while (seg->s_state == RSM_STATE_MAPPING)
4396 cv_wait(&seg->s_cv, &seg->s_lock);
4397 recheck_state = 1;
4398 break;
4399 case RSM_STATE_ACTIVE:
4400 /* unload the mappings */
4401 if (seg->s_ckl != NULL) {
4402 hdl = seg->s_ckl;
4403 for (; hdl != NULL; hdl = hdl->c_next) {
4404 (void) devmap_unload(hdl->c_dhp,
4405 hdl->c_off, hdl->c_len);
4406 }
4407 }
4408 seg->s_mapinfo = NULL;
4409 seg->s_state = RSM_STATE_MAP_QUIESCE;
4410 break;
4411 case RSM_STATE_CONN_QUIESCE:
4412 /* FALLTHRU */
4413 case RSM_STATE_MAP_QUIESCE:
4414 /* rsmseg_suspend already done for seg */
4415 *susp_flg = 1;
4416 break;
4417 case RSM_STATE_DISCONNECT:
4418 break;
4419 default:
4420 ASSERT(0); /* invalid state */
4421 }
4422 } while (recheck_state);
4423
4424 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4425 }
4426
4427 static void
4428 rsmsegshare_suspend(rsmseg_t *seg)
4429 {
4430 int e;
4431 adapter_t *adapter;
4432 rsm_import_share_t *sharedp;
4433 DBG_DEFINE(category,
4434 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4435
4436 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4437 "rsmsegshare_suspend enter\n"));
4438
4439 rsmseglock_acquire(seg);
4440 rsmsharelock_acquire(seg);
4441
4442 sharedp = seg->s_share;
4443 adapter = seg->s_adapter;
4444 switch (sharedp->rsmsi_state) {
4445 case RSMSI_STATE_NEW:
4446 break;
4447 case RSMSI_STATE_CONNECTING:
4448 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4449 break;
4450 case RSMSI_STATE_ABORT_CONNECT:
4451 break;
4452 case RSMSI_STATE_CONNECTED:
4453 /* do the rsmpi disconnect */
4454 if (sharedp->rsmsi_node != my_nodeid) {
4455 e = adapter->rsmpi_ops->
4456 rsm_disconnect(sharedp->rsmsi_handle);
4457
4458 DBG_PRINTF((category, RSM_DEBUG,
4459 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4460 sharedp->rsmsi_segid, e));
4461 }
4462
4463 sharedp->rsmsi_handle = NULL;
4464
4465 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4466 break;
4467 case RSMSI_STATE_CONN_QUIESCE:
4468 break;
4469 case RSMSI_STATE_MAPPED:
4470 /* do the rsmpi unmap and disconnect */
4471 if (sharedp->rsmsi_node != my_nodeid) {
4472 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4473
4474 DBG_PRINTF((category, RSM_DEBUG,
4475 "rsmshare_suspend: rsmpi unmap %d\n", e));
4476
4477 e = adapter->rsmpi_ops->
4478 rsm_disconnect(sharedp->rsmsi_handle);
4479 DBG_PRINTF((category, RSM_DEBUG,
4480 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4481 sharedp->rsmsi_segid, e));
4482 }
4483
4484 sharedp->rsmsi_handle = NULL;
4485
4486 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4487 break;
4488 case RSMSI_STATE_MAP_QUIESCE:
4489 break;
4490 case RSMSI_STATE_DISCONNECTED:
4491 break;
4492 default:
4493 ASSERT(0); /* invalid state */
4494 }
4495
4496 rsmsharelock_release(seg);
4497 rsmseglock_release(seg);
4498
4499 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4500 "rsmsegshare_suspend done\n"));
4501 }
4502
4503 /*
4504 * This should get called on receiving a RESUME message or from
4505 * the pathmanger if the node undergoing DR dies.
4506 */
4507 static void
4508 importer_resume(rsm_node_id_t src_node)
4509 {
4510 int i;
4511 rsmresource_t *p = NULL;
4512 rsmhash_table_t *rhash = &rsm_import_segs;
4513 void *cookie;
4514 DBG_DEFINE(category,
4515 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4516
4517 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4518
4519 rw_enter(&rhash->rsmhash_rw, RW_READER);
4520
4521 for (i = 0; i < rsm_hash_size; i++) {
4522 p = rhash->bucket[i];
4523
4524 for (; p; p = p->rsmrc_next) {
4525 rsmseg_t *seg = (rsmseg_t *)p;
4526
4527 rsmseglock_acquire(seg);
4528
4529 /* process only importers of node undergoing DR */
4530 if (seg->s_node != src_node) {
4531 rsmseglock_release(seg);
4532 continue;
4533 }
4534
4535 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4536 rsmipc_request_t request;
4537 /*
4538 * rsmpi map/connect failed
4539 * inform the exporter so that it can
4540 * remove the importer.
4541 */
4542 request.rsmipc_hdr.rsmipc_type =
4543 RSMIPC_MSG_NOTIMPORTING;
4544 request.rsmipc_key = seg->s_segid;
4545 request.rsmipc_segment_cookie = cookie;
4546 rsmseglock_release(seg);
4547 (void) rsmipc_send(seg->s_node, &request,
4548 RSM_NO_REPLY);
4549 } else {
4550 rsmseglock_release(seg);
4551 }
4552 }
4553 }
4554
4555 rw_exit(&rhash->rsmhash_rw);
4556
4557 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4558 }
4559
4560 static int
4561 rsmseg_resume(rsmseg_t *seg, void **cookie)
4562 {
4563 int e;
4564 int retc;
4565 off_t dev_offset;
4566 size_t maplen;
4567 uint_t maxprot;
4568 rsm_mapinfo_t *p;
4569 rsmcookie_t *hdl;
4570 rsm_import_share_t *sharedp;
4571 DBG_DEFINE(category,
4572 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4573
4574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4575 "rsmseg_resume enter: key=%u\n", seg->s_key));
4576
4577 *cookie = NULL;
4578
4579 ASSERT(rsmseglock_held(seg));
4580
4581 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4582 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4583 return (RSM_SUCCESS);
4584 }
4585
4586 sharedp = seg->s_share;
4587
4588 rsmsharelock_acquire(seg);
4589
4590 /* resume the shared connection and/or mapping */
4591 retc = rsmsegshare_resume(seg);
4592
4593 if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4594 /* shared state can either be connected or mapped */
4595 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4596 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4597 ASSERT(retc == RSM_SUCCESS);
4598 seg->s_handle.in = sharedp->rsmsi_handle;
4599 rsmsharelock_release(seg);
4600 seg->s_state = RSM_STATE_CONNECT;
4601
4602 } else { /* error in rsmpi connect during resume */
4603 seg->s_handle.in = NULL;
4604 seg->s_state = RSM_STATE_DISCONNECT;
4605
4606 sharedp->rsmsi_refcnt--;
4607 cookie = (void *)sharedp->rsmsi_cookie;
4608
4609 if (sharedp->rsmsi_refcnt == 0) {
4610 ASSERT(sharedp->rsmsi_mapcnt == 0);
4611 rsmsharelock_release(seg);
4612
4613 /* clean up the shared data structure */
4614 mutex_destroy(&sharedp->rsmsi_lock);
4615 cv_destroy(&sharedp->rsmsi_cv);
4616 kmem_free((void *)(sharedp),
4617 sizeof (rsm_import_share_t));
4618
4619 } else {
4620 rsmsharelock_release(seg);
4621 }
4622 /*
4623 * The following needs to be done after any
4624 * rsmsharelock calls which use seg->s_share.
4625 */
4626 seg->s_share = NULL;
4627 }
4628
4629 /* signal any waiting segment */
4630 cv_broadcast(&seg->s_cv);
4631
4632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4633 "rsmseg_resume done:state=%d\n", seg->s_state));
4634 return (retc);
4635 }
4636
4637 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4638
4639 /* Setup protections for remap */
4640 maxprot = PROT_USER;
4641 if (seg->s_mode & RSM_PERM_READ) {
4642 maxprot |= PROT_READ;
4643 }
4644 if (seg->s_mode & RSM_PERM_WRITE) {
4645 maxprot |= PROT_WRITE;
4646 }
4647
4648 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4649 /* error in rsmpi connect or map during resume */
4650
4651 /* remap to trash page */
4652 ASSERT(seg->s_ckl != NULL);
4653
4654 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4655 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4656 remap_cookie, hdl->c_off, hdl->c_len,
4657 maxprot, 0, NULL);
4658
4659 DBG_PRINTF((category, RSM_ERR,
4660 "rsmseg_resume:remap=%d\n", e));
4661 }
4662
4663 seg->s_handle.in = NULL;
4664 seg->s_state = RSM_STATE_DISCONNECT;
4665
4666 sharedp->rsmsi_refcnt--;
4667
4668 sharedp->rsmsi_mapcnt--;
4669 seg->s_mapinfo = NULL;
4670
4671 if (sharedp->rsmsi_refcnt == 0) {
4672 ASSERT(sharedp->rsmsi_mapcnt == 0);
4673 rsmsharelock_release(seg);
4674
4675 /* clean up the shared data structure */
4676 mutex_destroy(&sharedp->rsmsi_lock);
4677 cv_destroy(&sharedp->rsmsi_cv);
4678 kmem_free((void *)(sharedp),
4679 sizeof (rsm_import_share_t));
4680
4681 } else {
4682 rsmsharelock_release(seg);
4683 }
4684 /*
4685 * The following needs to be done after any
4686 * rsmsharelock calls which use seg->s_share.
4687 */
4688 seg->s_share = NULL;
4689
4690 /* signal any waiting segment */
4691 cv_broadcast(&seg->s_cv);
4692
4693 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4694 "rsmseg_resume done:seg=%x,err=%d\n",
4695 seg->s_key, retc));
4696 return (retc);
4697
4698 }
4699
4700 seg->s_handle.in = sharedp->rsmsi_handle;
4701
4702 if (seg->s_node == my_nodeid) { /* loopback */
4703 ASSERT(seg->s_mapinfo == NULL);
4704
4705 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4706 e = devmap_umem_remap(hdl->c_dhp,
4707 rsm_dip, seg->s_cookie,
4708 hdl->c_off, hdl->c_len,
4709 maxprot, 0, NULL);
4710
4711 DBG_PRINTF((category, RSM_ERR,
4712 "rsmseg_resume:remap=%d\n", e));
4713 }
4714 } else { /* remote exporter */
4715 /* remap to the new rsmpi maps */
4716 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4717
4718 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4719 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4720 &dev_offset, &maplen);
4721 e = devmap_devmem_remap(hdl->c_dhp,
4722 p->dip, p->dev_register, dev_offset,
4723 maplen, maxprot, 0, NULL);
4724
4725 DBG_PRINTF((category, RSM_ERR,
4726 "rsmseg_resume:remap=%d\n", e));
4727 }
4728 }
4729
4730 rsmsharelock_release(seg);
4731
4732 seg->s_state = RSM_STATE_ACTIVE;
4733 cv_broadcast(&seg->s_cv);
4734
4735 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4736
4737 return (retc);
4738 }
4739
4740 static int
4741 rsmsegshare_resume(rsmseg_t *seg)
4742 {
4743 int e = RSM_SUCCESS;
4744 adapter_t *adapter;
4745 rsm_import_share_t *sharedp;
4746 DBG_DEFINE(category,
4747 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4748
4749 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4750
4751 ASSERT(rsmseglock_held(seg));
4752 ASSERT(rsmsharelock_held(seg));
4753
4754 sharedp = seg->s_share;
4755
4756 /*
4757 * If we are not in a xxxx_QUIESCE state that means shared
4758 * connect/mapping processing has been already been done
4759 * so return success.
4760 */
4761 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4762 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4763 return (RSM_SUCCESS);
4764 }
4765
4766 adapter = seg->s_adapter;
4767
4768 if (sharedp->rsmsi_node != my_nodeid) {
4769 rsm_addr_t hwaddr;
4770 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4771
4772 e = adapter->rsmpi_ops->rsm_connect(
4773 adapter->rsmpi_handle, hwaddr,
4774 sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4775
4776 DBG_PRINTF((category, RSM_DEBUG,
4777 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4778 sharedp->rsmsi_segid, e));
4779
4780 if (e != RSM_SUCCESS) {
4781 /* when do we send the NOT_IMPORTING message */
4782 sharedp->rsmsi_handle = NULL;
4783 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4784 /* signal any waiting segment */
4785 cv_broadcast(&sharedp->rsmsi_cv);
4786 return (e);
4787 }
4788 }
4789
4790 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4791 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4792 /* signal any waiting segment */
4793 cv_broadcast(&sharedp->rsmsi_cv);
4794 return (e);
4795 }
4796
4797 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4798
4799 /* do the rsmpi map of the whole segment here */
4800 if (sharedp->rsmsi_node != my_nodeid) {
4801 size_t mapped_len;
4802 rsm_mapinfo_t *p;
4803
4804 /*
4805 * We need to do rsmpi maps with <off, lens> identical to
4806 * the old mapinfo list because the segment mapping handles
4807 * dhp and such need the fragmentation of rsmpi maps to be
4808 * identical to what it was during the mmap of the segment
4809 */
4810 p = sharedp->rsmsi_mapinfo;
4811
4812 while (p != NULL) {
4813 mapped_len = 0;
4814
4815 e = adapter->rsmpi_ops->rsm_map(
4816 sharedp->rsmsi_handle, p->start_offset,
4817 p->individual_len, &mapped_len,
4818 &p->dip, &p->dev_register, &p->dev_offset,
4819 NULL, NULL);
4820
4821 if (e != 0) {
4822 DBG_PRINTF((category, RSM_ERR,
4823 "rsmsegshare_resume: rsmpi map err=%d\n",
4824 e));
4825 break;
4826 }
4827
4828 if (mapped_len != p->individual_len) {
4829 DBG_PRINTF((category, RSM_ERR,
4830 "rsmsegshare_resume: rsmpi maplen"
4831 "< reqlen=%lx\n", mapped_len));
4832 e = RSMERR_BAD_LENGTH;
4833 break;
4834 }
4835
4836 p = p->next;
4837
4838 }
4839
4840
4841 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4842 int err;
4843 /* Check if this is the first rsm_map */
4844 if (p != sharedp->rsmsi_mapinfo) {
4845 /*
4846 * A single rsm_unmap undoes multiple rsm_maps.
4847 */
4848 (void) seg->s_adapter->rsmpi_ops->
4849 rsm_unmap(sharedp->rsmsi_handle);
4850 }
4851
4852 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4853 sharedp->rsmsi_mapinfo = NULL;
4854
4855 err = adapter->rsmpi_ops->
4856 rsm_disconnect(sharedp->rsmsi_handle);
4857
4858 DBG_PRINTF((category, RSM_DEBUG,
4859 "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4860 sharedp->rsmsi_segid, err));
4861
4862 sharedp->rsmsi_handle = NULL;
4863 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4864
4865 /* signal the waiting segments */
4866 cv_broadcast(&sharedp->rsmsi_cv);
4867 DBG_PRINTF((category, RSM_DEBUG,
4868 "rsmsegshare_resume done: rsmpi map err\n"));
4869 return (e);
4870 }
4871 }
4872
4873 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4874
4875 /* signal any waiting segment */
4876 cv_broadcast(&sharedp->rsmsi_cv);
4877
4878 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4879
4880 return (e);
4881 }
4882
4883 /*
4884 * this is the routine that gets called by recv_taskq which is the
4885 * thread that processes messages that are flow-controlled.
4886 */
4887 static void
4888 rsm_intr_proc_deferred(void *arg)
4889 {
4890 path_t *path = (path_t *)arg;
4891 rsmipc_request_t *msg;
4892 rsmipc_msghdr_t *msghdr;
4893 rsm_node_id_t src_node;
4894 msgbuf_elem_t *head;
4895 int e;
4896 DBG_DEFINE(category,
4897 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4898
4899 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4900 "rsm_intr_proc_deferred enter\n"));
4901
4902 mutex_enter(&path->mutex);
4903
4904 /* use the head of the msgbuf_queue */
4905 head = rsmka_gethead_msgbuf(path);
4906
4907 mutex_exit(&path->mutex);
4908
4909 msg = (rsmipc_request_t *)&(head->msg);
4910 msghdr = (rsmipc_msghdr_t *)msg;
4911
4912 src_node = msghdr->rsmipc_src;
4913
4914 /*
4915 * messages that need to send a reply should check the message version
4916 * before processing the message. And all messages that need to
4917 * send a reply should be processed here by the worker thread.
4918 */
4919 switch (msghdr->rsmipc_type) {
4920 case RSMIPC_MSG_SEGCONNECT:
4921 if (msghdr->rsmipc_version != RSM_VERSION) {
4922 rsmipc_reply_t reply;
4923 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4924 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4925 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4926 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4927 } else {
4928 rsm_intr_segconnect(src_node, msg);
4929 }
4930 break;
4931 case RSMIPC_MSG_DISCONNECT:
4932 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4933 break;
4934 case RSMIPC_MSG_SUSPEND:
4935 importer_suspend(src_node);
4936 break;
4937 case RSMIPC_MSG_SUSPEND_DONE:
4938 rsm_suspend_complete(src_node, 0);
4939 break;
4940 case RSMIPC_MSG_RESUME:
4941 importer_resume(src_node);
4942 break;
4943 default:
4944 ASSERT(0);
4945 }
4946
4947 mutex_enter(&path->mutex);
4948
4949 rsmka_dequeue_msgbuf(path);
4950
4951 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4952 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4953 path->procmsg_cnt++;
4954
4955 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4956
4957 /* No need to send credits if path is going down */
4958 if ((path->state == RSMKA_PATH_ACTIVE) &&
4959 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4960 /*
4961 * send credits and reset procmsg_cnt if success otherwise
4962 * credits will be sent after processing the next message
4963 */
4964 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4965 if (e == 0)
4966 path->procmsg_cnt = 0;
4967 else
4968 DBG_PRINTF((category, RSM_ERR,
4969 "rsm_intr_proc_deferred:send credits err=%d\n", e));
4970 }
4971
4972 /*
4973 * decrement the path refcnt since we incremented it in
4974 * rsm_intr_callback_dispatch
4975 */
4976 PATH_RELE_NOLOCK(path);
4977
4978 mutex_exit(&path->mutex);
4979
4980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4981 "rsm_intr_proc_deferred done\n"));
4982 }
4983
4984 /*
4985 * Flow-controlled messages are enqueued and dispatched onto a taskq here
4986 */
4987 static void
4988 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4989 rsm_intr_hand_arg_t arg)
4990 {
4991 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
4992 path_t *path;
4993 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4994 DBG_DEFINE(category,
4995 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4996
4997 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4998 "rsm_intr_callback_dispatch enter\n"));
4999 ASSERT(data && hdlr_argp);
5000
5001 /* look up the path - incr the path refcnt */
5002 path = rsm_find_path(hdlr_argp->adapter_name,
5003 hdlr_argp->adapter_instance, src_hwaddr);
5004
5005 /* the path has been removed - drop this message */
5006 if (path == NULL) {
5007 DBG_PRINTF((category, RSM_DEBUG,
5008 "rsm_intr_callback_dispatch done: msg dropped\n"));
5009 return;
5010 }
5011 /* the path is not active - don't accept new messages */
5012 if (path->state != RSMKA_PATH_ACTIVE) {
5013 PATH_RELE_NOLOCK(path);
5014 mutex_exit(&path->mutex);
5015 DBG_PRINTF((category, RSM_DEBUG,
5016 "rsm_intr_callback_dispatch done: msg dropped"
5017 " path=%lx !ACTIVE\n", path));
5018 return;
5019 }
5020
5021 /*
5022 * Check if this message was sent to an older incarnation
5023 * of the path/sendq.
5024 */
5025 if (path->local_incn != msghdr->rsmipc_incn) {
5026 /* decrement the refcnt */
5027 PATH_RELE_NOLOCK(path);
5028 mutex_exit(&path->mutex);
5029 DBG_PRINTF((category, RSM_DEBUG,
5030 "rsm_intr_callback_dispatch done: old incn %lld\n",
5031 msghdr->rsmipc_incn));
5032 return;
5033 }
5034
5035 /* copy and enqueue msg on the path's msgbuf queue */
5036 rsmka_enqueue_msgbuf(path, data);
5037
5038 /*
5039 * schedule task to process messages - ignore retval from
5040 * task_dispatch because we sender cannot send more than
5041 * what receiver can handle.
5042 */
5043 (void) taskq_dispatch(path->recv_taskq,
5044 rsm_intr_proc_deferred, path, KM_NOSLEEP);
5045
5046 mutex_exit(&path->mutex);
5047
5048 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5049 "rsm_intr_callback_dispatch done\n"));
5050 }
5051
5052 /*
5053 * This procedure is called from rsm_srv_func when a remote node creates a
5054 * a send queue. This event is used as a hint that an earlier failed
5055 * attempt to create a send queue to that remote node may now succeed and
5056 * should be retried. Indication of an earlier failed attempt is provided
5057 * by the RSMKA_SQCREATE_PENDING flag.
5058 */
5059 static void
5060 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5061 {
5062 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
5063 path_t *path;
5064 DBG_DEFINE(category,
5065 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5066
5067 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5068 "rsm_sqcreateop_callback enter\n"));
5069
5070 /* look up the path - incr the path refcnt */
5071 path = rsm_find_path(hdlr_argp->adapter_name,
5072 hdlr_argp->adapter_instance, src_hwaddr);
5073
5074 if (path == NULL) {
5075 DBG_PRINTF((category, RSM_DEBUG,
5076 "rsm_sqcreateop_callback done: no path\n"));
5077 return;
5078 }
5079
5080 if ((path->state == RSMKA_PATH_UP) &&
5081 (path->flags & RSMKA_SQCREATE_PENDING)) {
5082 /*
5083 * previous attempt to create sendq had failed, retry
5084 * it and move to RSMKA_PATH_ACTIVE state if successful.
5085 * the refcnt will be decremented in the do_deferred_work
5086 */
5087 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5088 } else {
5089 /* decrement the refcnt */
5090 PATH_RELE_NOLOCK(path);
5091 }
5092 mutex_exit(&path->mutex);
5093
5094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5095 "rsm_sqcreateop_callback done\n"));
5096 }
5097
5098 static void
5099 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5100 {
5101 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5102 rsmipc_request_t *msg = (rsmipc_request_t *)data;
5103 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5104 rsm_node_id_t src_node;
5105 DBG_DEFINE(category,
5106 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5107
5108 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5109 "src=%d, type=%d\n", msghdr->rsmipc_src,
5110 msghdr->rsmipc_type));
5111
5112 /*
5113 * Check for the version number in the msg header. If it is not
5114 * RSM_VERSION, drop the message. In the future, we need to manage
5115 * incompatible version numbers in some way
5116 */
5117 if (msghdr->rsmipc_version != RSM_VERSION) {
5118 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5119 /*
5120 * Drop requests that don't have a reply right here
5121 * Request with reply will send a BAD_VERSION reply
5122 * when they get processed by the worker thread.
5123 */
5124 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5125 return;
5126 }
5127
5128 }
5129
5130 src_node = msghdr->rsmipc_src;
5131
5132 switch (msghdr->rsmipc_type) {
5133 case RSMIPC_MSG_SEGCONNECT:
5134 case RSMIPC_MSG_DISCONNECT:
5135 case RSMIPC_MSG_SUSPEND:
5136 case RSMIPC_MSG_SUSPEND_DONE:
5137 case RSMIPC_MSG_RESUME:
5138 /*
5139 * These message types are handled by a worker thread using
5140 * the flow-control algorithm.
5141 * Any message processing that does one or more of the
5142 * following should be handled in a worker thread.
5143 * - allocates resources and might sleep
5144 * - makes RSMPI calls down to the interconnect driver
5145 * this by defn include requests with reply.
5146 * - takes a long duration of time
5147 */
5148 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5149 break;
5150 case RSMIPC_MSG_NOTIMPORTING:
5151 importer_list_rm(src_node, msg->rsmipc_key,
5152 msg->rsmipc_segment_cookie);
5153 break;
5154 case RSMIPC_MSG_SQREADY:
5155 rsm_proc_sqready(data, src_hwaddr, arg);
5156 break;
5157 case RSMIPC_MSG_SQREADY_ACK:
5158 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5159 break;
5160 case RSMIPC_MSG_CREDIT:
5161 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5162 break;
5163 case RSMIPC_MSG_REPLY:
5164 rsm_intr_reply(msghdr);
5165 break;
5166 case RSMIPC_MSG_BELL:
5167 rsm_intr_event(msg);
5168 break;
5169 case RSMIPC_MSG_IMPORTING:
5170 importer_list_add(src_node, msg->rsmipc_key,
5171 msg->rsmipc_adapter_hwaddr,
5172 msg->rsmipc_segment_cookie);
5173 break;
5174 case RSMIPC_MSG_REPUBLISH:
5175 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5176 break;
5177 default:
5178 DBG_PRINTF((category, RSM_DEBUG,
5179 "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5180 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5181 }
5182
5183 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5184
5185 }
5186
5187 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5188 rsm_intr_q_op_t opcode, rsm_addr_t src,
5189 void *data, size_t size, rsm_intr_hand_arg_t arg)
5190 {
5191 DBG_DEFINE(category,
5192 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5193
5194 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5195
5196 switch (opcode) {
5197 case RSM_INTR_Q_OP_CREATE:
5198 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5199 rsm_sqcreateop_callback(src, arg);
5200 break;
5201 case RSM_INTR_Q_OP_DESTROY:
5202 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5203 break;
5204 case RSM_INTR_Q_OP_RECEIVE:
5205 rsm_intr_callback(data, src, arg);
5206 break;
5207 default:
5208 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5209 "rsm_srv_func: unknown opcode = %x\n", opcode));
5210 }
5211
5212 chd = chd;
5213 size = size;
5214
5215 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5216
5217 return (RSM_INTR_HAND_CLAIMED);
5218 }
5219
5220 /* *************************** IPC slots ************************* */
5221 static rsmipc_slot_t *
5222 rsmipc_alloc()
5223 {
5224 int i;
5225 rsmipc_slot_t *slot;
5226 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5227
5228 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5229
5230 /* try to find a free slot, if not wait */
5231 mutex_enter(&rsm_ipc.lock);
5232
5233 while (rsm_ipc.count == 0) {
5234 rsm_ipc.wanted = 1;
5235 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5236 }
5237
5238 /* An empty slot is available, find it */
5239 slot = &rsm_ipc.slots[0];
5240 for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5241 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5242 RSMIPC_CLEAR(slot, RSMIPC_FREE);
5243 break;
5244 }
5245 }
5246
5247 ASSERT(i < RSMIPC_SZ);
5248 rsm_ipc.count--; /* one less is available */
5249 rsm_ipc.sequence++; /* new sequence */
5250
5251 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5252 slot->rsmipc_cookie.ic.index = (uint_t)i;
5253
5254 mutex_exit(&rsm_ipc.lock);
5255
5256 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5257
5258 return (slot);
5259 }
5260
5261 static void
5262 rsmipc_free(rsmipc_slot_t *slot)
5263 {
5264 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5265
5266 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5267
5268 ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5269 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5270
5271 mutex_enter(&rsm_ipc.lock);
5272
5273 RSMIPC_SET(slot, RSMIPC_FREE);
5274
5275 slot->rsmipc_cookie.ic.sequence = 0;
5276
5277 mutex_exit(&slot->rsmipc_lock);
5278 rsm_ipc.count++;
5279 ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5280 if (rsm_ipc.wanted) {
5281 rsm_ipc.wanted = 0;
5282 cv_broadcast(&rsm_ipc.cv);
5283 }
5284
5285 mutex_exit(&rsm_ipc.lock);
5286
5287 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5288 }
5289
5290 static int
5291 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5292 {
5293 int e = 0;
5294 int credit_check = 0;
5295 int retry_cnt = 0;
5296 int min_retry_cnt = 10;
5297 rsm_send_t is;
5298 rsmipc_slot_t *rslot;
5299 adapter_t *adapter;
5300 path_t *path;
5301 sendq_token_t *sendq_token;
5302 sendq_token_t *used_sendq_token = NULL;
5303 rsm_send_q_handle_t ipc_handle;
5304 DBG_DEFINE(category,
5305 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5306
5307 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5308 dest));
5309
5310 /*
5311 * Check if this is a local case
5312 */
5313 if (dest == my_nodeid) {
5314 switch (req->rsmipc_hdr.rsmipc_type) {
5315 case RSMIPC_MSG_SEGCONNECT:
5316 reply->rsmipc_status = (short)rsmsegacl_validate(
5317 req, dest, reply);
5318 break;
5319 case RSMIPC_MSG_BELL:
5320 req->rsmipc_hdr.rsmipc_src = dest;
5321 rsm_intr_event(req);
5322 break;
5323 case RSMIPC_MSG_IMPORTING:
5324 importer_list_add(dest, req->rsmipc_key,
5325 req->rsmipc_adapter_hwaddr,
5326 req->rsmipc_segment_cookie);
5327 break;
5328 case RSMIPC_MSG_NOTIMPORTING:
5329 importer_list_rm(dest, req->rsmipc_key,
5330 req->rsmipc_segment_cookie);
5331 break;
5332 case RSMIPC_MSG_REPUBLISH:
5333 importer_update(dest, req->rsmipc_key,
5334 req->rsmipc_perm);
5335 break;
5336 case RSMIPC_MSG_SUSPEND:
5337 importer_suspend(dest);
5338 break;
5339 case RSMIPC_MSG_SUSPEND_DONE:
5340 rsm_suspend_complete(dest, 0);
5341 break;
5342 case RSMIPC_MSG_RESUME:
5343 importer_resume(dest);
5344 break;
5345 default:
5346 ASSERT(0);
5347 }
5348 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5349 "rsmipc_send done\n"));
5350 return (0);
5351 }
5352
5353 if (dest >= MAX_NODES) {
5354 DBG_PRINTF((category, RSM_ERR,
5355 "rsm: rsmipc_send bad node number %x\n", dest));
5356 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5357 }
5358
5359 /*
5360 * Oh boy! we are going remote.
5361 */
5362
5363 /*
5364 * identify if we need to have credits to send this message
5365 * - only selected requests are flow controlled
5366 */
5367 if (req != NULL) {
5368 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5369 "rsmipc_send:request type=%d\n",
5370 req->rsmipc_hdr.rsmipc_type));
5371
5372 switch (req->rsmipc_hdr.rsmipc_type) {
5373 case RSMIPC_MSG_SEGCONNECT:
5374 case RSMIPC_MSG_DISCONNECT:
5375 case RSMIPC_MSG_IMPORTING:
5376 case RSMIPC_MSG_SUSPEND:
5377 case RSMIPC_MSG_SUSPEND_DONE:
5378 case RSMIPC_MSG_RESUME:
5379 credit_check = 1;
5380 break;
5381 default:
5382 credit_check = 0;
5383 }
5384 }
5385
5386 again:
5387 if (retry_cnt++ == min_retry_cnt) {
5388 /* backoff before further retries for 10ms */
5389 delay(drv_usectohz(10000));
5390 retry_cnt = 0; /* reset retry_cnt */
5391 }
5392 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5393 if (sendq_token == NULL) {
5394 DBG_PRINTF((category, RSM_ERR,
5395 "rsm: rsmipc_send no device to reach node %d\n", dest));
5396 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5397 }
5398
5399 if ((sendq_token == used_sendq_token) &&
5400 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5401 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5402 rele_sendq_token(sendq_token);
5403 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5404 return (RSMERR_CONN_ABORTED);
5405 } else
5406 used_sendq_token = sendq_token;
5407
5408 /* lint -save -e413 */
5409 path = SQ_TOKEN_TO_PATH(sendq_token);
5410 adapter = path->local_adapter;
5411 /* lint -restore */
5412 ipc_handle = sendq_token->rsmpi_sendq_handle;
5413
5414 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5415 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5416
5417 if (reply == NULL) {
5418 /* Send request without ack */
5419 /*
5420 * Set the rsmipc_version number in the msghdr for KA
5421 * communication versioning
5422 */
5423 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5424 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5425 /*
5426 * remote endpoints incn should match the value in our
5427 * path's remote_incn field. No need to grab any lock
5428 * since we have refcnted the path in rsmka_get_sendq_token
5429 */
5430 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5431
5432 is.is_data = (void *)req;
5433 is.is_size = sizeof (*req);
5434 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5435 is.is_wait = 0;
5436
5437 if (credit_check) {
5438 mutex_enter(&path->mutex);
5439 /*
5440 * wait till we recv credits or path goes down. If path
5441 * goes down rsm_send will fail and we handle the error
5442 * then
5443 */
5444 while ((sendq_token->msgbuf_avail == 0) &&
5445 (path->state == RSMKA_PATH_ACTIVE)) {
5446 e = cv_wait_sig(&sendq_token->sendq_cv,
5447 &path->mutex);
5448 if (e == 0) {
5449 mutex_exit(&path->mutex);
5450 no_reply_cnt++;
5451 rele_sendq_token(sendq_token);
5452 DBG_PRINTF((category, RSM_DEBUG,
5453 "rsmipc_send done: "
5454 "cv_wait INTERRUPTED"));
5455 return (RSMERR_INTERRUPTED);
5456 }
5457 }
5458
5459 /*
5460 * path is not active retry on another path.
5461 */
5462 if (path->state != RSMKA_PATH_ACTIVE) {
5463 mutex_exit(&path->mutex);
5464 rele_sendq_token(sendq_token);
5465 e = RSMERR_CONN_ABORTED;
5466 DBG_PRINTF((category, RSM_ERR,
5467 "rsm: rsmipc_send: path !ACTIVE"));
5468 goto again;
5469 }
5470
5471 ASSERT(sendq_token->msgbuf_avail > 0);
5472
5473 /*
5474 * reserve a msgbuf
5475 */
5476 sendq_token->msgbuf_avail--;
5477
5478 mutex_exit(&path->mutex);
5479
5480 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5481 NULL);
5482
5483 if (e != RSM_SUCCESS) {
5484 mutex_enter(&path->mutex);
5485 /*
5486 * release the reserved msgbuf since
5487 * the send failed
5488 */
5489 sendq_token->msgbuf_avail++;
5490 cv_broadcast(&sendq_token->sendq_cv);
5491 mutex_exit(&path->mutex);
5492 }
5493 } else
5494 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5495 NULL);
5496
5497 no_reply_cnt++;
5498 rele_sendq_token(sendq_token);
5499 if (e != RSM_SUCCESS) {
5500 DBG_PRINTF((category, RSM_ERR,
5501 "rsm: rsmipc_send no reply send"
5502 " err = %d no reply count = %d\n",
5503 e, no_reply_cnt));
5504 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5505 e != RSMERR_BAD_BARRIER_HNDL);
5506 atomic_inc_64(&rsm_ipcsend_errcnt);
5507 goto again;
5508 } else {
5509 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5510 "rsmipc_send done\n"));
5511 return (e);
5512 }
5513
5514 }
5515
5516 if (req == NULL) {
5517 /* Send reply - No flow control is done for reply */
5518 /*
5519 * Set the version in the msg header for KA communication
5520 * versioning
5521 */
5522 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5523 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5524 /* incn number is not used for reply msgs currently */
5525 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5526
5527 is.is_data = (void *)reply;
5528 is.is_size = sizeof (*reply);
5529 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5530 is.is_wait = 0;
5531 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5532 rele_sendq_token(sendq_token);
5533 if (e != RSM_SUCCESS) {
5534 DBG_PRINTF((category, RSM_ERR,
5535 "rsm: rsmipc_send reply send"
5536 " err = %d\n", e));
5537 atomic_inc_64(&rsm_ipcsend_errcnt);
5538 goto again;
5539 } else {
5540 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5541 "rsmipc_send done\n"));
5542 return (e);
5543 }
5544 }
5545
5546 /* Reply needed */
5547 rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5548
5549 mutex_enter(&rslot->rsmipc_lock);
5550
5551 rslot->rsmipc_data = (void *)reply;
5552 RSMIPC_SET(rslot, RSMIPC_PENDING);
5553
5554 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5555 /*
5556 * Set the rsmipc_version number in the msghdr for KA
5557 * communication versioning
5558 */
5559 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5560 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5561 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5562 /*
5563 * remote endpoints incn should match the value in our
5564 * path's remote_incn field. No need to grab any lock
5565 * since we have refcnted the path in rsmka_get_sendq_token
5566 */
5567 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5568
5569 is.is_data = (void *)req;
5570 is.is_size = sizeof (*req);
5571 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5572 is.is_wait = 0;
5573 if (credit_check) {
5574
5575 mutex_enter(&path->mutex);
5576 /*
5577 * wait till we recv credits or path goes down. If path
5578 * goes down rsm_send will fail and we handle the error
5579 * then.
5580 */
5581 while ((sendq_token->msgbuf_avail == 0) &&
5582 (path->state == RSMKA_PATH_ACTIVE)) {
5583 e = cv_wait_sig(&sendq_token->sendq_cv,
5584 &path->mutex);
5585 if (e == 0) {
5586 mutex_exit(&path->mutex);
5587 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5588 rsmipc_free(rslot);
5589 rele_sendq_token(sendq_token);
5590 DBG_PRINTF((category, RSM_DEBUG,
5591 "rsmipc_send done: "
5592 "cv_wait INTERRUPTED"));
5593 return (RSMERR_INTERRUPTED);
5594 }
5595 }
5596
5597 /*
5598 * path is not active retry on another path.
5599 */
5600 if (path->state != RSMKA_PATH_ACTIVE) {
5601 mutex_exit(&path->mutex);
5602 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5603 rsmipc_free(rslot);
5604 rele_sendq_token(sendq_token);
5605 e = RSMERR_CONN_ABORTED;
5606 DBG_PRINTF((category, RSM_ERR,
5607 "rsm: rsmipc_send: path !ACTIVE"));
5608 goto again;
5609 }
5610
5611 ASSERT(sendq_token->msgbuf_avail > 0);
5612
5613 /*
5614 * reserve a msgbuf
5615 */
5616 sendq_token->msgbuf_avail--;
5617
5618 mutex_exit(&path->mutex);
5619
5620 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5621 NULL);
5622
5623 if (e != RSM_SUCCESS) {
5624 mutex_enter(&path->mutex);
5625 /*
5626 * release the reserved msgbuf since
5627 * the send failed
5628 */
5629 sendq_token->msgbuf_avail++;
5630 cv_broadcast(&sendq_token->sendq_cv);
5631 mutex_exit(&path->mutex);
5632 }
5633 } else
5634 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5635 NULL);
5636
5637 if (e != RSM_SUCCESS) {
5638 DBG_PRINTF((category, RSM_ERR,
5639 "rsm: rsmipc_send rsmpi send err = %d\n", e));
5640 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5641 rsmipc_free(rslot);
5642 rele_sendq_token(sendq_token);
5643 atomic_inc_64(&rsm_ipcsend_errcnt);
5644 goto again;
5645 }
5646
5647 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5648 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5649 drv_usectohz(5000000), TR_CLOCK_TICK);
5650 if (e < 0) {
5651 /* timed out - retry */
5652 e = RSMERR_TIMEOUT;
5653 } else if (e == 0) {
5654 /* signalled - return error */
5655 e = RSMERR_INTERRUPTED;
5656 break;
5657 } else {
5658 e = RSM_SUCCESS;
5659 }
5660 }
5661
5662 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5663 rsmipc_free(rslot);
5664 rele_sendq_token(sendq_token);
5665
5666 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5667 return (e);
5668 }
5669
5670 static int
5671 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie)
5672 {
5673 rsmipc_request_t request;
5674
5675 /*
5676 * inform the exporter to delete this importer
5677 */
5678 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5679 request.rsmipc_key = segid;
5680 request.rsmipc_segment_cookie = cookie;
5681 return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5682 }
5683
5684 static void
5685 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5686 int acl_len, rsm_permission_t default_permission)
5687 {
5688 int i;
5689 importing_token_t *token;
5690 rsmipc_request_t request;
5691 republish_token_t *republish_list = NULL;
5692 republish_token_t *rp;
5693 rsm_permission_t permission;
5694 int index;
5695
5696 /*
5697 * send the new access mode to all the nodes that have imported
5698 * this segment.
5699 * If the new acl does not have a node that was present in
5700 * the old acl a access permission of 0 is sent.
5701 */
5702
5703 index = rsmhash(segid);
5704
5705 /*
5706 * create a list of node/permissions to send the republish message
5707 */
5708 mutex_enter(&importer_list.lock);
5709
5710 token = importer_list.bucket[index];
5711 while (token != NULL) {
5712 if (segid == token->key) {
5713 permission = default_permission;
5714
5715 for (i = 0; i < acl_len; i++) {
5716 if (token->importing_node == acl[i].ae_node) {
5717 permission = acl[i].ae_permission;
5718 break;
5719 }
5720 }
5721 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5722
5723 rp->key = segid;
5724 rp->importing_node = token->importing_node;
5725 rp->permission = permission;
5726 rp->next = republish_list;
5727 republish_list = rp;
5728 }
5729 token = token->next;
5730 }
5731
5732 mutex_exit(&importer_list.lock);
5733
5734 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5735 request.rsmipc_key = segid;
5736
5737 while (republish_list != NULL) {
5738 request.rsmipc_perm = republish_list->permission;
5739 (void) rsmipc_send(republish_list->importing_node,
5740 &request, RSM_NO_REPLY);
5741 rp = republish_list;
5742 republish_list = republish_list->next;
5743 kmem_free(rp, sizeof (republish_token_t));
5744 }
5745 }
5746
5747 static void
5748 rsm_send_suspend()
5749 {
5750 int i, e;
5751 rsmipc_request_t request;
5752 list_element_t *tokp;
5753 list_element_t *head = NULL;
5754 importing_token_t *token;
5755 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5756 "rsm_send_suspend enter\n"));
5757
5758 /*
5759 * create a list of node to send the suspend message
5760 *
5761 * Currently the whole importer list is scanned and we obtain
5762 * all the nodes - this basically gets all nodes that at least
5763 * import one segment from the local node.
5764 *
5765 * no need to grab the rsm_suspend_list lock here since we are
5766 * single threaded when suspend is called.
5767 */
5768
5769 mutex_enter(&importer_list.lock);
5770 for (i = 0; i < rsm_hash_size; i++) {
5771
5772 token = importer_list.bucket[i];
5773
5774 while (token != NULL) {
5775
5776 tokp = head;
5777
5778 /*
5779 * make sure that the token's node
5780 * is not already on the suspend list
5781 */
5782 while (tokp != NULL) {
5783 if (tokp->nodeid == token->importing_node) {
5784 break;
5785 }
5786 tokp = tokp->next;
5787 }
5788
5789 if (tokp == NULL) { /* not in suspend list */
5790 tokp = kmem_zalloc(sizeof (list_element_t),
5791 KM_SLEEP);
5792 tokp->nodeid = token->importing_node;
5793 tokp->next = head;
5794 head = tokp;
5795 }
5796
5797 token = token->next;
5798 }
5799 }
5800 mutex_exit(&importer_list.lock);
5801
5802 if (head == NULL) { /* no importers so go ahead and quiesce segments */
5803 exporter_quiesce();
5804 return;
5805 }
5806
5807 mutex_enter(&rsm_suspend_list.list_lock);
5808 ASSERT(rsm_suspend_list.list_head == NULL);
5809 /*
5810 * update the suspend list righaway so that if a node dies the
5811 * pathmanager can set the NODE dead flag
5812 */
5813 rsm_suspend_list.list_head = head;
5814 mutex_exit(&rsm_suspend_list.list_lock);
5815
5816 tokp = head;
5817
5818 while (tokp != NULL) {
5819 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5820 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5821 /*
5822 * Error in rsmipc_send currently happens due to inaccessibility
5823 * of the remote node.
5824 */
5825 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5826 tokp->flags |= RSM_SUSPEND_ACKPENDING;
5827 }
5828
5829 tokp = tokp->next;
5830 }
5831
5832 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5833 "rsm_send_suspend done\n"));
5834
5835 }
5836
5837 static void
5838 rsm_send_resume()
5839 {
5840 rsmipc_request_t request;
5841 list_element_t *elem, *head;
5842
5843 /*
5844 * save the suspend list so that we know where to send
5845 * the resume messages and make the suspend list head
5846 * NULL.
5847 */
5848 mutex_enter(&rsm_suspend_list.list_lock);
5849 head = rsm_suspend_list.list_head;
5850 rsm_suspend_list.list_head = NULL;
5851 mutex_exit(&rsm_suspend_list.list_lock);
5852
5853 while (head != NULL) {
5854 elem = head;
5855 head = head->next;
5856
5857 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5858
5859 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5860
5861 kmem_free((void *)elem, sizeof (list_element_t));
5862
5863 }
5864
5865 }
5866
5867 /*
5868 * This function takes path and sends a message using the sendq
5869 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5870 * and RSMIPC_MSG_CREDIT are sent using this function.
5871 */
5872 int
5873 rsmipc_send_controlmsg(path_t *path, int msgtype)
5874 {
5875 int e;
5876 int retry_cnt = 0;
5877 int min_retry_cnt = 10;
5878 adapter_t *adapter;
5879 rsm_send_t is;
5880 rsm_send_q_handle_t ipc_handle;
5881 rsmipc_controlmsg_t msg;
5882 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5883
5884 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5885 "rsmipc_send_controlmsg enter\n"));
5886
5887 ASSERT(MUTEX_HELD(&path->mutex));
5888
5889 adapter = path->local_adapter;
5890
5891 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5892 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5893 my_nodeid, adapter->hwaddr, path->remote_node,
5894 path->remote_hwaddr, path->procmsg_cnt));
5895
5896 if (path->state != RSMKA_PATH_ACTIVE) {
5897 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5898 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5899 return (1);
5900 }
5901
5902 ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5903
5904 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5905 msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5906 msg.rsmipc_hdr.rsmipc_type = msgtype;
5907 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5908
5909 if (msgtype == RSMIPC_MSG_CREDIT)
5910 msg.rsmipc_credits = path->procmsg_cnt;
5911
5912 msg.rsmipc_local_incn = path->local_incn;
5913
5914 msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5915 /* incr the sendq, path refcnt */
5916 PATH_HOLD_NOLOCK(path);
5917 SENDQ_TOKEN_HOLD(path);
5918
5919 do {
5920 /* drop the path lock before doing the rsm_send */
5921 mutex_exit(&path->mutex);
5922
5923 is.is_data = (void *)&msg;
5924 is.is_size = sizeof (msg);
5925 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5926 is.is_wait = 0;
5927
5928 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5929
5930 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5931 e != RSMERR_BAD_BARRIER_HNDL);
5932
5933 mutex_enter(&path->mutex);
5934
5935 if (e == RSM_SUCCESS) {
5936 break;
5937 }
5938 /* error counter for statistics */
5939 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5940
5941 DBG_PRINTF((category, RSM_ERR,
5942 "rsmipc_send_controlmsg:rsm_send error=%d", e));
5943
5944 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5945 (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5946 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5947 retry_cnt = 0;
5948 }
5949 } while (path->state == RSMKA_PATH_ACTIVE);
5950
5951 /* decrement the sendq,path refcnt that we incr before rsm_send */
5952 SENDQ_TOKEN_RELE(path);
5953 PATH_RELE_NOLOCK(path);
5954
5955 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5956 "rsmipc_send_controlmsg done=%d", e));
5957 return (e);
5958 }
5959
5960 /*
5961 * Called from rsm_force_unload and path_importer_disconnect. The memory
5962 * mapping for the imported segment is removed and the segment is
5963 * disconnected at the interconnect layer if disconnect_flag is TRUE.
5964 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5965 * and FALSE from rsm_rebind.
5966 *
5967 * When subsequent accesses cause page faulting, the dummy page is mapped
5968 * to resolve the fault, and the mapping generation number is incremented
5969 * so that the application can be notified on a close barrier operation.
5970 *
5971 * It is important to note that the caller of rsmseg_unload is responsible for
5972 * acquiring the segment lock before making a call to rsmseg_unload. This is
5973 * required to make the caller and rsmseg_unload thread safe. The segment lock
5974 * will be released by the rsmseg_unload function.
5975 */
5976 void
5977 rsmseg_unload(rsmseg_t *im_seg)
5978 {
5979 rsmcookie_t *hdl;
5980 void *shared_cookie;
5981 rsmipc_request_t request;
5982 uint_t maxprot;
5983
5984 DBG_DEFINE(category,
5985 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5986
5987 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5988
5989 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5990
5991 /* wait until segment leaves the mapping state */
5992 while (im_seg->s_state == RSM_STATE_MAPPING)
5993 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5994 /*
5995 * An unload is only necessary if the segment is connected. However,
5996 * if the segment was on the import list in state RSM_STATE_CONNECTING
5997 * then a connection was in progress. Change to RSM_STATE_NEW
5998 * here to cause an early exit from the connection process.
5999 */
6000 if (im_seg->s_state == RSM_STATE_NEW) {
6001 rsmseglock_release(im_seg);
6002 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6003 "rsmseg_unload done: RSM_STATE_NEW\n"));
6004 return;
6005 } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6006 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6007 rsmsharelock_acquire(im_seg);
6008 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6009 rsmsharelock_release(im_seg);
6010 rsmseglock_release(im_seg);
6011 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6012 "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6013 return;
6014 }
6015
6016 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6017 if (im_seg->s_ckl != NULL) {
6018 int e;
6019 /* Setup protections for remap */
6020 maxprot = PROT_USER;
6021 if (im_seg->s_mode & RSM_PERM_READ) {
6022 maxprot |= PROT_READ;
6023 }
6024 if (im_seg->s_mode & RSM_PERM_WRITE) {
6025 maxprot |= PROT_WRITE;
6026 }
6027 hdl = im_seg->s_ckl;
6028 for (; hdl != NULL; hdl = hdl->c_next) {
6029 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6030 remap_cookie,
6031 hdl->c_off, hdl->c_len,
6032 maxprot, 0, NULL);
6033
6034 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6035 "remap returns %d\n", e));
6036 }
6037 }
6038
6039 (void) rsm_closeconnection(im_seg, &shared_cookie);
6040
6041 if (shared_cookie != NULL) {
6042 /*
6043 * inform the exporting node so this import
6044 * can be deleted from the list of importers.
6045 */
6046 request.rsmipc_hdr.rsmipc_type =
6047 RSMIPC_MSG_NOTIMPORTING;
6048 request.rsmipc_key = im_seg->s_segid;
6049 request.rsmipc_segment_cookie = shared_cookie;
6050 rsmseglock_release(im_seg);
6051 (void) rsmipc_send(im_seg->s_node, &request,
6052 RSM_NO_REPLY);
6053 } else {
6054 rsmseglock_release(im_seg);
6055 }
6056 }
6057 else
6058 rsmseglock_release(im_seg);
6059
6060 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6061
6062 }
6063
6064 /* ****************************** Importer Calls ************************ */
6065
6066 static int
6067 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6068 {
6069 int shifts = 0;
6070
6071 if (crgetuid(cr) != owner) {
6072 shifts += 3;
6073 if (!groupmember(group, cr))
6074 shifts += 3;
6075 }
6076
6077 mode &= ~(perm << shifts);
6078
6079 if (mode == 0)
6080 return (0);
6081
6082 return (secpolicy_rsm_access(cr, owner, mode));
6083 }
6084
6085
6086 static int
6087 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6088 intptr_t dataptr, int mode)
6089 {
6090 int e;
6091 int recheck_state = 0;
6092 void *shared_cookie;
6093 rsmipc_request_t request;
6094 rsmipc_reply_t reply;
6095 rsm_permission_t access;
6096 adapter_t *adapter;
6097 rsm_addr_t addr = 0;
6098 rsm_import_share_t *sharedp;
6099 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6100
6101 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6102
6103 adapter = rsm_getadapter(msg, mode);
6104 if (adapter == NULL) {
6105 DBG_PRINTF((category, RSM_ERR,
6106 "rsm_connect done:ENODEV adapter=NULL\n"));
6107 return (RSMERR_CTLR_NOT_PRESENT);
6108 }
6109
6110 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6111 rsmka_release_adapter(adapter);
6112 DBG_PRINTF((category, RSM_ERR,
6113 "rsm_connect done:ENODEV loopback\n"));
6114 return (RSMERR_CTLR_NOT_PRESENT);
6115 }
6116
6117
6118 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6119 ASSERT(seg->s_state == RSM_STATE_NEW);
6120
6121 /*
6122 * Translate perm to access
6123 */
6124 if (msg->perm & ~RSM_PERM_RDWR) {
6125 rsmka_release_adapter(adapter);
6126 DBG_PRINTF((category, RSM_ERR,
6127 "rsm_connect done:EINVAL invalid perms\n"));
6128 return (RSMERR_BAD_PERMS);
6129 }
6130 access = 0;
6131 if (msg->perm & RSM_PERM_READ)
6132 access |= RSM_ACCESS_READ;
6133 if (msg->perm & RSM_PERM_WRITE)
6134 access |= RSM_ACCESS_WRITE;
6135
6136 seg->s_node = msg->nodeid;
6137
6138 /*
6139 * Adding to the import list locks the segment; release the segment
6140 * lock so we can get the reply for the send.
6141 */
6142 e = rsmimport_add(seg, msg->key);
6143 if (e) {
6144 rsmka_release_adapter(adapter);
6145 DBG_PRINTF((category, RSM_ERR,
6146 "rsm_connect done:rsmimport_add failed %d\n", e));
6147 return (e);
6148 }
6149 seg->s_state = RSM_STATE_CONNECTING;
6150
6151 /*
6152 * Set the s_adapter field here so as to have a valid comparison of
6153 * the adapter and the s_adapter value during rsmshare_get. For
6154 * any error, set s_adapter to NULL before doing a release_adapter
6155 */
6156 seg->s_adapter = adapter;
6157
6158 rsmseglock_release(seg);
6159
6160 /*
6161 * get the pointer to the shared data structure; the
6162 * shared data is locked and refcount has been incremented
6163 */
6164 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6165
6166 ASSERT(rsmsharelock_held(seg));
6167
6168 do {
6169 /* flag indicates whether we need to recheck the state */
6170 recheck_state = 0;
6171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6172 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6173 switch (sharedp->rsmsi_state) {
6174 case RSMSI_STATE_NEW:
6175 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6176 break;
6177 case RSMSI_STATE_CONNECTING:
6178 /* FALLTHRU */
6179 case RSMSI_STATE_CONN_QUIESCE:
6180 /* FALLTHRU */
6181 case RSMSI_STATE_MAP_QUIESCE:
6182 /* wait for the state to change */
6183 while ((sharedp->rsmsi_state ==
6184 RSMSI_STATE_CONNECTING) ||
6185 (sharedp->rsmsi_state ==
6186 RSMSI_STATE_CONN_QUIESCE) ||
6187 (sharedp->rsmsi_state ==
6188 RSMSI_STATE_MAP_QUIESCE)) {
6189 if (cv_wait_sig(&sharedp->rsmsi_cv,
6190 &sharedp->rsmsi_lock) == 0) {
6191 /* signalled - clean up and return */
6192 rsmsharelock_release(seg);
6193 rsmimport_rm(seg);
6194 seg->s_adapter = NULL;
6195 rsmka_release_adapter(adapter);
6196 seg->s_state = RSM_STATE_NEW;
6197 DBG_PRINTF((category, RSM_ERR,
6198 "rsm_connect done: INTERRUPTED\n"));
6199 return (RSMERR_INTERRUPTED);
6200 }
6201 }
6202 /*
6203 * the state changed, loop back and check what it is
6204 */
6205 recheck_state = 1;
6206 break;
6207 case RSMSI_STATE_ABORT_CONNECT:
6208 /* exit the loop and clean up further down */
6209 break;
6210 case RSMSI_STATE_CONNECTED:
6211 /* already connected, good - fall through */
6212 case RSMSI_STATE_MAPPED:
6213 /* already mapped, wow - fall through */
6214 /* access validation etc is done further down */
6215 break;
6216 case RSMSI_STATE_DISCONNECTED:
6217 /* disconnected - so reconnect now */
6218 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6219 break;
6220 default:
6221 ASSERT(0); /* Invalid State */
6222 }
6223 } while (recheck_state);
6224
6225 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6226 /* we are the first to connect */
6227 rsmsharelock_release(seg);
6228
6229 if (msg->nodeid != my_nodeid) {
6230 addr = get_remote_hwaddr(adapter, msg->nodeid);
6231
6232 if ((int64_t)addr < 0) {
6233 rsmsharelock_acquire(seg);
6234 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6235 RSMSI_STATE_NEW);
6236 rsmsharelock_release(seg);
6237 rsmimport_rm(seg);
6238 seg->s_adapter = NULL;
6239 rsmka_release_adapter(adapter);
6240 seg->s_state = RSM_STATE_NEW;
6241 DBG_PRINTF((category, RSM_ERR,
6242 "rsm_connect done: hwaddr<0\n"));
6243 return (RSMERR_INTERNAL_ERROR);
6244 }
6245 } else {
6246 addr = adapter->hwaddr;
6247 }
6248
6249 /*
6250 * send request to node [src, dest, key, msgid] and get back
6251 * [status, msgid, cookie]
6252 */
6253 request.rsmipc_key = msg->key;
6254 /*
6255 * we need the s_mode of the exporter so pass
6256 * RSM_ACCESS_TRUSTED
6257 */
6258 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6259 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6260 request.rsmipc_adapter_hwaddr = addr;
6261 request.rsmipc_segment_cookie = sharedp;
6262
6263 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6264 if (e) {
6265 rsmsharelock_acquire(seg);
6266 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6267 RSMSI_STATE_NEW);
6268 rsmsharelock_release(seg);
6269 rsmimport_rm(seg);
6270 seg->s_adapter = NULL;
6271 rsmka_release_adapter(adapter);
6272 seg->s_state = RSM_STATE_NEW;
6273 DBG_PRINTF((category, RSM_ERR,
6274 "rsm_connect done:rsmipc_send failed %d\n", e));
6275 return (e);
6276 }
6277
6278 if (reply.rsmipc_status != RSM_SUCCESS) {
6279 rsmsharelock_acquire(seg);
6280 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6281 RSMSI_STATE_NEW);
6282 rsmsharelock_release(seg);
6283 rsmimport_rm(seg);
6284 seg->s_adapter = NULL;
6285 rsmka_release_adapter(adapter);
6286 seg->s_state = RSM_STATE_NEW;
6287 DBG_PRINTF((category, RSM_ERR,
6288 "rsm_connect done:rsmipc_send reply err %d\n",
6289 reply.rsmipc_status));
6290 return (reply.rsmipc_status);
6291 }
6292
6293 rsmsharelock_acquire(seg);
6294 /* store the information recvd into the shared data struct */
6295 sharedp->rsmsi_mode = reply.rsmipc_mode;
6296 sharedp->rsmsi_uid = reply.rsmipc_uid;
6297 sharedp->rsmsi_gid = reply.rsmipc_gid;
6298 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6299 sharedp->rsmsi_cookie = sharedp;
6300 }
6301
6302 rsmsharelock_release(seg);
6303
6304 /*
6305 * Get the segment lock and check for a force disconnect
6306 * from the export side which would have changed the state
6307 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6308 * force disconnect will be held off until the connection
6309 * has completed.
6310 */
6311 rsmseglock_acquire(seg);
6312 rsmsharelock_acquire(seg);
6313 ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6314 seg->s_state == RSM_STATE_ABORT_CONNECT);
6315
6316 shared_cookie = sharedp->rsmsi_cookie;
6317
6318 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6319 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6320 seg->s_state = RSM_STATE_NEW;
6321 seg->s_adapter = NULL;
6322 rsmsharelock_release(seg);
6323 rsmseglock_release(seg);
6324 rsmimport_rm(seg);
6325 rsmka_release_adapter(adapter);
6326
6327 rsmsharelock_acquire(seg);
6328 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6329 /*
6330 * set a flag indicating abort handling has been
6331 * done
6332 */
6333 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6334 rsmsharelock_release(seg);
6335 /* send a message to exporter - only once */
6336 (void) rsm_send_notimporting(msg->nodeid,
6337 msg->key, shared_cookie);
6338 rsmsharelock_acquire(seg);
6339 /*
6340 * wake up any waiting importers and inform that
6341 * connection has been aborted
6342 */
6343 cv_broadcast(&sharedp->rsmsi_cv);
6344 }
6345 rsmsharelock_release(seg);
6346
6347 DBG_PRINTF((category, RSM_ERR,
6348 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6349 return (RSMERR_INTERRUPTED);
6350 }
6351
6352
6353 /*
6354 * We need to verify that this process has access
6355 */
6356 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6357 access & sharedp->rsmsi_mode,
6358 (int)(msg->perm & RSM_PERM_RDWR), cred);
6359 if (e) {
6360 rsmsharelock_release(seg);
6361 seg->s_state = RSM_STATE_NEW;
6362 seg->s_adapter = NULL;
6363 rsmseglock_release(seg);
6364 rsmimport_rm(seg);
6365 rsmka_release_adapter(adapter);
6366 /*
6367 * No need to lock segment it has been removed
6368 * from the hash table
6369 */
6370 rsmsharelock_acquire(seg);
6371 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6372 rsmsharelock_release(seg);
6373 /* this is the first importer */
6374
6375 (void) rsm_send_notimporting(msg->nodeid, msg->key,
6376 shared_cookie);
6377 rsmsharelock_acquire(seg);
6378 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6379 cv_broadcast(&sharedp->rsmsi_cv);
6380 }
6381 rsmsharelock_release(seg);
6382
6383 DBG_PRINTF((category, RSM_ERR,
6384 "rsm_connect done: ipcaccess failed\n"));
6385 return (RSMERR_PERM_DENIED);
6386 }
6387
6388 /* update state and cookie */
6389 seg->s_segid = sharedp->rsmsi_segid;
6390 seg->s_len = sharedp->rsmsi_seglen;
6391 seg->s_mode = access & sharedp->rsmsi_mode;
6392 seg->s_pid = ddi_get_pid();
6393 seg->s_mapinfo = NULL;
6394
6395 if (seg->s_node != my_nodeid) {
6396 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6397 e = adapter->rsmpi_ops->rsm_connect(
6398 adapter->rsmpi_handle,
6399 addr, seg->s_segid, &sharedp->rsmsi_handle);
6400
6401 if (e != RSM_SUCCESS) {
6402 seg->s_state = RSM_STATE_NEW;
6403 seg->s_adapter = NULL;
6404 rsmsharelock_release(seg);
6405 rsmseglock_release(seg);
6406 rsmimport_rm(seg);
6407 rsmka_release_adapter(adapter);
6408 /*
6409 * inform the exporter to delete this importer
6410 */
6411 (void) rsm_send_notimporting(msg->nodeid,
6412 msg->key, shared_cookie);
6413
6414 /*
6415 * Now inform any waiting importers to
6416 * retry connect. This needs to be done
6417 * after sending notimporting so that
6418 * the notimporting is sent before a waiting
6419 * importer sends a segconnect while retrying
6420 *
6421 * No need to lock segment it has been removed
6422 * from the hash table
6423 */
6424
6425 rsmsharelock_acquire(seg);
6426 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6427 cv_broadcast(&sharedp->rsmsi_cv);
6428 rsmsharelock_release(seg);
6429
6430 DBG_PRINTF((category, RSM_ERR,
6431 "rsm_connect error %d\n", e));
6432 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6433 return (
6434 RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6435 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6436 (e == RSMERR_UNKNOWN_RSM_ADDR))
6437 return (RSMERR_REMOTE_NODE_UNREACHABLE);
6438 else
6439 return (e);
6440 }
6441
6442 }
6443 seg->s_handle.in = sharedp->rsmsi_handle;
6444
6445 }
6446
6447 seg->s_state = RSM_STATE_CONNECT;
6448
6449
6450 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */
6451 if (bar_va) {
6452 /* increment generation number on barrier page */
6453 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6454 /* return user off into barrier page where status will be */
6455 msg->off = (int)seg->s_hdr.rsmrc_num;
6456 msg->gnum = bar_va[msg->off]; /* gnum race */
6457 } else {
6458 msg->off = 0;
6459 msg->gnum = 0; /* gnum race */
6460 }
6461
6462 msg->len = (int)sharedp->rsmsi_seglen;
6463 msg->rnum = seg->s_minor;
6464 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6465 rsmsharelock_release(seg);
6466 rsmseglock_release(seg);
6467
6468 /* Return back to user the segment size & perm in case it's needed */
6469
6470 #ifdef _MULTI_DATAMODEL
6471 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6472 rsm_ioctlmsg32_t msg32;
6473
6474 if (msg->len > UINT_MAX)
6475 msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6476 else
6477 msg32.len = msg->len;
6478 msg32.off = msg->off;
6479 msg32.perm = msg->perm;
6480 msg32.gnum = msg->gnum;
6481 msg32.rnum = msg->rnum;
6482
6483 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6484 "rsm_connect done\n"));
6485
6486 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6487 sizeof (msg32), mode))
6488 return (RSMERR_BAD_ADDR);
6489 else
6490 return (RSM_SUCCESS);
6491 }
6492 #endif
6493 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6494
6495 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6496 mode))
6497 return (RSMERR_BAD_ADDR);
6498 else
6499 return (RSM_SUCCESS);
6500 }
6501
6502 static int
6503 rsm_unmap(rsmseg_t *seg)
6504 {
6505 int err;
6506 adapter_t *adapter;
6507 rsm_import_share_t *sharedp;
6508 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6509
6510 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6511 "rsm_unmap enter %u\n", seg->s_segid));
6512
6513 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6514
6515 /* assert seg is locked */
6516 ASSERT(rsmseglock_held(seg));
6517 ASSERT(seg->s_state != RSM_STATE_MAPPING);
6518
6519 if ((seg->s_state != RSM_STATE_ACTIVE) &&
6520 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6521 /* segment unmap has already been done */
6522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6523 return (RSM_SUCCESS);
6524 }
6525
6526 sharedp = seg->s_share;
6527
6528 rsmsharelock_acquire(seg);
6529
6530 /*
6531 * - shared data struct is in MAPPED or MAP_QUIESCE state
6532 */
6533
6534 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6535 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6536
6537 /*
6538 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6539 * the segment cookie list was NULL; but it is always NULL when
6540 * called from rsmmap_unmap and won't be NULL when called for
6541 * a force disconnect - so the check for NULL cookie list was removed
6542 */
6543
6544 ASSERT(sharedp->rsmsi_mapcnt > 0);
6545
6546 sharedp->rsmsi_mapcnt--;
6547
6548 if (sharedp->rsmsi_mapcnt == 0) {
6549 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6550 /* unmap the shared RSMPI mapping */
6551 adapter = seg->s_adapter;
6552 if (seg->s_node != my_nodeid) {
6553 ASSERT(sharedp->rsmsi_handle != NULL);
6554 err = adapter->rsmpi_ops->
6555 rsm_unmap(sharedp->rsmsi_handle);
6556 DBG_PRINTF((category, RSM_DEBUG,
6557 "rsm_unmap: rsmpi unmap %d\n", err));
6558 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6559 sharedp->rsmsi_mapinfo = NULL;
6560 }
6561 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6562 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6563 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6564 }
6565 }
6566
6567 rsmsharelock_release(seg);
6568
6569 /*
6570 * The s_cookie field is used to store the cookie returned from the
6571 * ddi_umem_lock when binding the pages for an export segment. This
6572 * is the primary use of the s_cookie field and does not normally
6573 * pertain to any importing segment except in the loopback case.
6574 * For the loopback case, the import segment and export segment are
6575 * on the same node, the s_cookie field of the segment structure for
6576 * the importer is initialized to the s_cookie field in the exported
6577 * segment during the map operation and is used during the call to
6578 * devmap_umem_setup for the import mapping.
6579 * Thus, during unmap, we simply need to set s_cookie to NULL to
6580 * indicate that the mapping no longer exists.
6581 */
6582 seg->s_cookie = NULL;
6583
6584 seg->s_mapinfo = NULL;
6585
6586 if (seg->s_state == RSM_STATE_ACTIVE)
6587 seg->s_state = RSM_STATE_CONNECT;
6588 else
6589 seg->s_state = RSM_STATE_CONN_QUIESCE;
6590
6591 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6592
6593 return (RSM_SUCCESS);
6594 }
6595
6596 /*
6597 * cookie returned here if not null indicates that it is
6598 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6599 * message.
6600 */
6601 static int
6602 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6603 {
6604 int e;
6605 adapter_t *adapter;
6606 rsm_import_share_t *sharedp;
6607 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6608
6609 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6610 "rsm_closeconnection enter\n"));
6611
6612 *cookie = (void *)NULL;
6613
6614 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6615
6616 /* assert seg is locked */
6617 ASSERT(rsmseglock_held(seg));
6618
6619 if (seg->s_state == RSM_STATE_DISCONNECT) {
6620 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6621 "rsm_closeconnection done: already disconnected\n"));
6622 return (RSM_SUCCESS);
6623 }
6624
6625 /* wait for all putv/getv ops to get done */
6626 while (seg->s_rdmacnt > 0) {
6627 cv_wait(&seg->s_cv, &seg->s_lock);
6628 }
6629
6630 (void) rsm_unmap(seg);
6631
6632 ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6633 seg->s_state == RSM_STATE_CONN_QUIESCE);
6634
6635 adapter = seg->s_adapter;
6636 sharedp = seg->s_share;
6637
6638 ASSERT(sharedp != NULL);
6639
6640 rsmsharelock_acquire(seg);
6641
6642 /*
6643 * Disconnect on adapter
6644 *
6645 * The current algorithm is stateless, I don't have to contact
6646 * server when I go away. It only gives me permissions. Of course,
6647 * the adapters will talk to terminate the connect.
6648 *
6649 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6650 */
6651 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6652 (sharedp->rsmsi_node != my_nodeid)) {
6653
6654 if (sharedp->rsmsi_refcnt == 1) {
6655 /* this is the last importer */
6656 ASSERT(sharedp->rsmsi_mapcnt == 0);
6657
6658 e = adapter->rsmpi_ops->
6659 rsm_disconnect(sharedp->rsmsi_handle);
6660 if (e != RSM_SUCCESS) {
6661 DBG_PRINTF((category, RSM_DEBUG,
6662 "rsm:disconnect failed seg=%x:err=%d\n",
6663 seg->s_key, e));
6664 }
6665 }
6666 }
6667
6668 seg->s_handle.in = NULL;
6669
6670 sharedp->rsmsi_refcnt--;
6671
6672 if (sharedp->rsmsi_refcnt == 0) {
6673 *cookie = (void *)sharedp->rsmsi_cookie;
6674 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6675 sharedp->rsmsi_handle = NULL;
6676 rsmsharelock_release(seg);
6677
6678 /* clean up the shared data structure */
6679 mutex_destroy(&sharedp->rsmsi_lock);
6680 cv_destroy(&sharedp->rsmsi_cv);
6681 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6682
6683 } else {
6684 rsmsharelock_release(seg);
6685 }
6686
6687 /* increment generation number on barrier page */
6688 if (bar_va) {
6689 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6690 }
6691
6692 /*
6693 * The following needs to be done after any
6694 * rsmsharelock calls which use seg->s_share.
6695 */
6696 seg->s_share = NULL;
6697
6698 seg->s_state = RSM_STATE_DISCONNECT;
6699 /* signal anyone waiting in the CONN_QUIESCE state */
6700 cv_broadcast(&seg->s_cv);
6701
6702 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6703 "rsm_closeconnection done\n"));
6704
6705 return (RSM_SUCCESS);
6706 }
6707
6708 int
6709 rsm_disconnect(rsmseg_t *seg)
6710 {
6711 rsmipc_request_t request;
6712 void *shared_cookie;
6713 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6714
6715 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6716
6717 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6718
6719 /* assert seg isn't locked */
6720 ASSERT(!rsmseglock_held(seg));
6721
6722
6723 /* Remove segment from imported list */
6724 rsmimport_rm(seg);
6725
6726 /* acquire the segment */
6727 rsmseglock_acquire(seg);
6728
6729 /* wait until segment leaves the mapping state */
6730 while (seg->s_state == RSM_STATE_MAPPING)
6731 cv_wait(&seg->s_cv, &seg->s_lock);
6732
6733 if (seg->s_state == RSM_STATE_DISCONNECT) {
6734 seg->s_state = RSM_STATE_NEW;
6735 rsmseglock_release(seg);
6736 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6737 "rsm_disconnect done: already disconnected\n"));
6738 return (RSM_SUCCESS);
6739 }
6740
6741 (void) rsm_closeconnection(seg, &shared_cookie);
6742
6743 /* update state */
6744 seg->s_state = RSM_STATE_NEW;
6745
6746 if (shared_cookie != NULL) {
6747 /*
6748 * This is the last importer so inform the exporting node
6749 * so this import can be deleted from the list of importers.
6750 */
6751 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6752 request.rsmipc_key = seg->s_segid;
6753 request.rsmipc_segment_cookie = shared_cookie;
6754 rsmseglock_release(seg);
6755 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6756 } else {
6757 rsmseglock_release(seg);
6758 }
6759
6760 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6761
6762 return (DDI_SUCCESS);
6763 }
6764
6765 /*ARGSUSED*/
6766 static int
6767 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6768 struct pollhead **phpp)
6769 {
6770 minor_t rnum;
6771 rsmresource_t *res;
6772 rsmseg_t *seg;
6773 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6774
6775 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6776
6777 /* find minor, no lock */
6778 rnum = getminor(dev);
6779 res = rsmresource_lookup(rnum, RSM_NOLOCK);
6780
6781 /* poll is supported only for export/import segments */
6782 if ((res == NULL) || (res == RSMRC_RESERVED) ||
6783 (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6784 return (ENXIO);
6785 }
6786
6787 *reventsp = 0;
6788
6789 /*
6790 * An exported segment must be in state RSM_STATE_EXPORT; an
6791 * imported segment must be in state RSM_STATE_ACTIVE.
6792 */
6793 seg = (rsmseg_t *)res;
6794
6795 if (seg->s_pollevent) {
6796 *reventsp = POLLRDNORM;
6797 } else if (!anyyet) {
6798 /* cannot take segment lock here */
6799 *phpp = &seg->s_poll;
6800 seg->s_pollflag |= RSM_SEGMENT_POLL;
6801 }
6802 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6803 return (0);
6804 }
6805
6806
6807
6808 /* ************************* IOCTL Commands ********************* */
6809
6810 static rsmseg_t *
6811 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6812 rsm_resource_type_t type)
6813 {
6814 /* get segment from resource handle */
6815 rsmseg_t *seg;
6816 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6817
6818 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6819
6820
6821 if (res != RSMRC_RESERVED) {
6822 seg = (rsmseg_t *)res;
6823 } else {
6824 /* Allocate segment now and bind it */
6825 seg = rsmseg_alloc(rnum, credp);
6826
6827 /*
6828 * if DR pre-processing is going on or DR is in progress
6829 * then the new export segments should be in the NEW_QSCD state
6830 */
6831 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6832 mutex_enter(&rsm_drv_data.drv_lock);
6833 if ((rsm_drv_data.drv_state ==
6834 RSM_DRV_PREDEL_STARTED) ||
6835 (rsm_drv_data.drv_state ==
6836 RSM_DRV_PREDEL_COMPLETED) ||
6837 (rsm_drv_data.drv_state ==
6838 RSM_DRV_DR_IN_PROGRESS)) {
6839 seg->s_state = RSM_STATE_NEW_QUIESCED;
6840 }
6841 mutex_exit(&rsm_drv_data.drv_lock);
6842 }
6843
6844 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6845 }
6846
6847 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6848
6849 return (seg);
6850 }
6851
6852 static int
6853 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6854 int mode, cred_t *credp)
6855 {
6856 int error;
6857 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6858
6859 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6860
6861 arg = arg;
6862 credp = credp;
6863
6864 ASSERT(seg != NULL);
6865
6866 switch (cmd) {
6867 case RSM_IOCTL_BIND:
6868 error = rsm_bind(seg, msg, arg, mode);
6869 break;
6870 case RSM_IOCTL_REBIND:
6871 error = rsm_rebind(seg, msg);
6872 break;
6873 case RSM_IOCTL_UNBIND:
6874 error = ENOTSUP;
6875 break;
6876 case RSM_IOCTL_PUBLISH:
6877 error = rsm_publish(seg, msg, arg, mode);
6878 break;
6879 case RSM_IOCTL_REPUBLISH:
6880 error = rsm_republish(seg, msg, mode);
6881 break;
6882 case RSM_IOCTL_UNPUBLISH:
6883 error = rsm_unpublish(seg, 1);
6884 break;
6885 default:
6886 error = EINVAL;
6887 break;
6888 }
6889
6890 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6891 error));
6892
6893 return (error);
6894 }
6895 static int
6896 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6897 int mode, cred_t *credp)
6898 {
6899 int error;
6900 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6901
6902 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6903
6904 ASSERT(seg);
6905
6906 switch (cmd) {
6907 case RSM_IOCTL_CONNECT:
6908 error = rsm_connect(seg, msg, credp, arg, mode);
6909 break;
6910 default:
6911 error = EINVAL;
6912 break;
6913 }
6914
6915 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6916 error));
6917 return (error);
6918 }
6919
6920 static int
6921 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6922 int mode)
6923 {
6924 int e;
6925 adapter_t *adapter;
6926 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6927
6928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6929
6930
6931 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6932 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6933 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6934 return (RSMERR_CONN_ABORTED);
6935 } else if (seg->s_node == my_nodeid) {
6936 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6937 "rsmbar_ioctl done: loopback\n"));
6938 return (RSM_SUCCESS);
6939 }
6940
6941 adapter = seg->s_adapter;
6942
6943 switch (cmd) {
6944 case RSM_IOCTL_BAR_CHECK:
6945 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6946 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6947 return (bar_va ? RSM_SUCCESS : EINVAL);
6948 case RSM_IOCTL_BAR_OPEN:
6949 e = adapter->rsmpi_ops->
6950 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6951 break;
6952 case RSM_IOCTL_BAR_ORDER:
6953 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6954 break;
6955 case RSM_IOCTL_BAR_CLOSE:
6956 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6957 break;
6958 default:
6959 e = EINVAL;
6960 break;
6961 }
6962
6963 if (e == RSM_SUCCESS) {
6964 #ifdef _MULTI_DATAMODEL
6965 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6966 rsm_ioctlmsg32_t msg32;
6967 int i;
6968
6969 for (i = 0; i < 4; i++) {
6970 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6971 }
6972
6973 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6974 "rsmbar_ioctl done\n"));
6975 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6976 sizeof (msg32), mode))
6977 return (RSMERR_BAD_ADDR);
6978 else
6979 return (RSM_SUCCESS);
6980 }
6981 #endif
6982 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6983 "rsmbar_ioctl done\n"));
6984 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6985 sizeof (*msg), mode))
6986 return (RSMERR_BAD_ADDR);
6987 else
6988 return (RSM_SUCCESS);
6989 }
6990
6991 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6992 "rsmbar_ioctl done: error=%d\n", e));
6993
6994 return (e);
6995 }
6996
6997 /*
6998 * Ring the doorbell of the export segment to which this segment is
6999 * connected.
7000 */
7001 static int
7002 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7003 {
7004 int e = 0;
7005 rsmipc_request_t request;
7006
7007 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7008
7009 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7010
7011 request.rsmipc_key = seg->s_segid;
7012 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7013 request.rsmipc_segment_cookie = NULL;
7014 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7015
7016 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7017 "exportbell_ioctl done: %d\n", e));
7018
7019 return (e);
7020 }
7021
7022 /*
7023 * Ring the doorbells of all segments importing this segment
7024 */
7025 static int
7026 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7027 {
7028 importing_token_t *token = NULL;
7029 rsmipc_request_t request;
7030 int index;
7031
7032 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7033
7034 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7035
7036 ASSERT(seg->s_state != RSM_STATE_NEW &&
7037 seg->s_state != RSM_STATE_NEW_QUIESCED);
7038
7039 request.rsmipc_key = seg->s_segid;
7040 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7041
7042 index = rsmhash(seg->s_segid);
7043
7044 token = importer_list.bucket[index];
7045
7046 while (token != NULL) {
7047 if (seg->s_key == token->key) {
7048 request.rsmipc_segment_cookie =
7049 token->import_segment_cookie;
7050 (void) rsmipc_send(token->importing_node,
7051 &request, RSM_NO_REPLY);
7052 }
7053 token = token->next;
7054 }
7055
7056 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7057 "importbell_ioctl done\n"));
7058 return (RSM_SUCCESS);
7059 }
7060
7061 static int
7062 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7063 rsm_poll_event_t **eventspp, int mode)
7064 {
7065 rsm_poll_event_t *evlist = NULL;
7066 size_t evlistsz;
7067 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7068
7069 #ifdef _MULTI_DATAMODEL
7070 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7071 int i;
7072 rsm_consume_event_msg32_t cemsg32 = {0};
7073 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7074 rsm_poll_event32_t *evlist32;
7075 size_t evlistsz32;
7076
7077 /* copyin the ioctl message */
7078 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7079 sizeof (rsm_consume_event_msg32_t), mode)) {
7080 DBG_PRINTF((category, RSM_ERR,
7081 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7082 return (RSMERR_BAD_ADDR);
7083 }
7084 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7085 msgp->numents = (int)cemsg32.numents;
7086
7087 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7088 /*
7089 * If numents is large alloc events list on heap otherwise
7090 * use the address of array that was passed in.
7091 */
7092 if (msgp->numents > RSM_MAX_POLLFDS) {
7093 if (msgp->numents > max_segs) { /* validate numents */
7094 DBG_PRINTF((category, RSM_ERR,
7095 "consumeevent_copyin: "
7096 "RSMERR_BAD_ARGS_ERRORS\n"));
7097 return (RSMERR_BAD_ARGS_ERRORS);
7098 }
7099 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7100 } else {
7101 evlist32 = event32;
7102 }
7103
7104 /* copyin the seglist into the rsm_poll_event32_t array */
7105 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7106 evlistsz32, mode)) {
7107 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7108 kmem_free(evlist32, evlistsz32);
7109 }
7110 DBG_PRINTF((category, RSM_ERR,
7111 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7112 return (RSMERR_BAD_ADDR);
7113 }
7114
7115 /* evlist and evlistsz are based on rsm_poll_event_t type */
7116 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7117
7118 if (msgp->numents > RSM_MAX_POLLFDS) {
7119 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7120 *eventspp = evlist;
7121 } else {
7122 evlist = *eventspp;
7123 }
7124 /*
7125 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7126 * array
7127 */
7128 for (i = 0; i < msgp->numents; i++) {
7129 evlist[i].rnum = evlist32[i].rnum;
7130 evlist[i].fdsidx = evlist32[i].fdsidx;
7131 evlist[i].revent = evlist32[i].revent;
7132 }
7133 /* free the temp 32-bit event list */
7134 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7135 kmem_free(evlist32, evlistsz32);
7136 }
7137
7138 return (RSM_SUCCESS);
7139 }
7140 #endif
7141 /* copyin the ioctl message */
7142 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7143 mode)) {
7144 DBG_PRINTF((category, RSM_ERR,
7145 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7146 return (RSMERR_BAD_ADDR);
7147 }
7148 /*
7149 * If numents is large alloc events list on heap otherwise
7150 * use the address of array that was passed in.
7151 */
7152 if (msgp->numents > RSM_MAX_POLLFDS) {
7153 if (msgp->numents > max_segs) { /* validate numents */
7154 DBG_PRINTF((category, RSM_ERR,
7155 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7156 return (RSMERR_BAD_ARGS_ERRORS);
7157 }
7158 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7159 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7160 *eventspp = evlist;
7161 }
7162
7163 /* copyin the seglist */
7164 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7165 sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7166 if (evlist) {
7167 kmem_free(evlist, evlistsz);
7168 *eventspp = NULL;
7169 }
7170 DBG_PRINTF((category, RSM_ERR,
7171 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7172 return (RSMERR_BAD_ADDR);
7173 }
7174
7175 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7176 "consumeevent_copyin done\n"));
7177 return (RSM_SUCCESS);
7178 }
7179
7180 static int
7181 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7182 rsm_poll_event_t *eventsp, int mode)
7183 {
7184 size_t evlistsz;
7185 int err = RSM_SUCCESS;
7186 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7187
7188 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7189 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7190 msgp->numents, eventsp));
7191
7192 #ifdef _MULTI_DATAMODEL
7193 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7194 int i;
7195 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7196 rsm_poll_event32_t *evlist32;
7197 size_t evlistsz32;
7198
7199 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7200 if (msgp->numents > RSM_MAX_POLLFDS) {
7201 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7202 } else {
7203 evlist32 = event32;
7204 }
7205
7206 /*
7207 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7208 * array
7209 */
7210 for (i = 0; i < msgp->numents; i++) {
7211 evlist32[i].rnum = eventsp[i].rnum;
7212 evlist32[i].fdsidx = eventsp[i].fdsidx;
7213 evlist32[i].revent = eventsp[i].revent;
7214 }
7215
7216 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7217 evlistsz32, mode)) {
7218 err = RSMERR_BAD_ADDR;
7219 }
7220
7221 if (msgp->numents > RSM_MAX_POLLFDS) {
7222 if (evlist32) { /* free the temp 32-bit event list */
7223 kmem_free(evlist32, evlistsz32);
7224 }
7225 /*
7226 * eventsp and evlistsz are based on rsm_poll_event_t
7227 * type
7228 */
7229 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7230 /* event list on the heap and needs to be freed here */
7231 if (eventsp) {
7232 kmem_free(eventsp, evlistsz);
7233 }
7234 }
7235
7236 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7237 "consumeevent_copyout done: err=%d\n", err));
7238 return (err);
7239 }
7240 #endif
7241 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7242
7243 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7244 mode)) {
7245 err = RSMERR_BAD_ADDR;
7246 }
7247
7248 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7249 /* event list on the heap and needs to be freed here */
7250 kmem_free(eventsp, evlistsz);
7251 }
7252
7253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7254 "consumeevent_copyout done: err=%d\n", err));
7255 return (err);
7256 }
7257
7258 static int
7259 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7260 {
7261 int rc;
7262 int i;
7263 minor_t rnum;
7264 rsm_consume_event_msg_t msg = {0};
7265 rsmseg_t *seg;
7266 rsm_poll_event_t *event_list;
7267 rsm_poll_event_t events[RSM_MAX_POLLFDS];
7268 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7269
7270 event_list = events;
7271
7272 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7273 RSM_SUCCESS) {
7274 return (rc);
7275 }
7276
7277 for (i = 0; i < msg.numents; i++) {
7278 rnum = event_list[i].rnum;
7279 event_list[i].revent = 0;
7280 /* get the segment structure */
7281 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7282 if (seg) {
7283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7284 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7285 seg));
7286 if (seg->s_pollevent) {
7287 /* consume the event */
7288 atomic_dec_32(&seg->s_pollevent);
7289 event_list[i].revent = POLLRDNORM;
7290 }
7291 rsmseglock_release(seg);
7292 }
7293 }
7294
7295 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7296 RSM_SUCCESS) {
7297 return (rc);
7298 }
7299
7300 return (RSM_SUCCESS);
7301 }
7302
7303 static int
7304 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7305 {
7306 int size;
7307 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7308
7309 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7310
7311 #ifdef _MULTI_DATAMODEL
7312 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7313 rsmka_iovec32_t *iovec32, *iovec32_base;
7314 int i;
7315
7316 size = count * sizeof (rsmka_iovec32_t);
7317 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7318 if (ddi_copyin((caddr_t)user_vec,
7319 (caddr_t)iovec32, size, mode)) {
7320 kmem_free(iovec32, size);
7321 DBG_PRINTF((category, RSM_DEBUG,
7322 "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7323 return (RSMERR_BAD_ADDR);
7324 }
7325
7326 for (i = 0; i < count; i++, iovec++, iovec32++) {
7327 iovec->io_type = (int)iovec32->io_type;
7328 if (iovec->io_type == RSM_HANDLE_TYPE)
7329 iovec->local.segid = (rsm_memseg_id_t)
7330 iovec32->local;
7331 else
7332 iovec->local.vaddr =
7333 (caddr_t)(uintptr_t)iovec32->local;
7334 iovec->local_offset = (size_t)iovec32->local_offset;
7335 iovec->remote_offset = (size_t)iovec32->remote_offset;
7336 iovec->transfer_len = (size_t)iovec32->transfer_len;
7337
7338 }
7339 kmem_free(iovec32_base, size);
7340 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7341 "iovec_copyin done\n"));
7342 return (DDI_SUCCESS);
7343 }
7344 #endif
7345
7346 size = count * sizeof (rsmka_iovec_t);
7347 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7348 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7349 "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7350 return (RSMERR_BAD_ADDR);
7351 }
7352
7353 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7354
7355 return (DDI_SUCCESS);
7356 }
7357
7358
7359 static int
7360 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7361 {
7362 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7363
7364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7365
7366 #ifdef _MULTI_DATAMODEL
7367 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7368 rsmka_scat_gath32_t sg_io32;
7369
7370 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7371 mode)) {
7372 DBG_PRINTF((category, RSM_DEBUG,
7373 "sgio_copyin done: returning EFAULT\n"));
7374 return (RSMERR_BAD_ADDR);
7375 }
7376 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7377 sg_io->io_request_count = (size_t)sg_io32.io_request_count;
7378 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7379 sg_io->flags = (size_t)sg_io32.flags;
7380 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7381 (uintptr_t)sg_io32.remote_handle;
7382 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7383 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7384 "sgio_copyin done\n"));
7385 return (DDI_SUCCESS);
7386 }
7387 #endif
7388 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7389 mode)) {
7390 DBG_PRINTF((category, RSM_DEBUG,
7391 "sgio_copyin done: returning EFAULT\n"));
7392 return (RSMERR_BAD_ADDR);
7393 }
7394 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7395 return (DDI_SUCCESS);
7396 }
7397
7398 static int
7399 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7400 {
7401 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7402
7403 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7404 "sgio_resid_copyout enter\n"));
7405
7406 #ifdef _MULTI_DATAMODEL
7407 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7408 rsmka_scat_gath32_t sg_io32;
7409
7410 sg_io32.io_residual_count = sg_io->io_residual_count;
7411 sg_io32.flags = sg_io->flags;
7412
7413 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7414 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7415 sizeof (uint32_t), mode)) {
7416
7417 DBG_PRINTF((category, RSM_ERR,
7418 "sgio_resid_copyout error: rescnt\n"));
7419 return (RSMERR_BAD_ADDR);
7420 }
7421
7422 if (ddi_copyout((caddr_t)&sg_io32.flags,
7423 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7424 sizeof (uint32_t), mode)) {
7425
7426 DBG_PRINTF((category, RSM_ERR,
7427 "sgio_resid_copyout error: flags\n"));
7428 return (RSMERR_BAD_ADDR);
7429 }
7430 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7431 "sgio_resid_copyout done\n"));
7432 return (DDI_SUCCESS);
7433 }
7434 #endif
7435 if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7436 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7437 sizeof (ulong_t), mode)) {
7438
7439 DBG_PRINTF((category, RSM_ERR,
7440 "sgio_resid_copyout error:rescnt\n"));
7441 return (RSMERR_BAD_ADDR);
7442 }
7443
7444 if (ddi_copyout((caddr_t)&sg_io->flags,
7445 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7446 sizeof (uint_t), mode)) {
7447
7448 DBG_PRINTF((category, RSM_ERR,
7449 "sgio_resid_copyout error:flags\n"));
7450 return (RSMERR_BAD_ADDR);
7451 }
7452
7453 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7454 return (DDI_SUCCESS);
7455 }
7456
7457
7458 static int
7459 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7460 {
7461 rsmka_scat_gath_t sg_io;
7462 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN];
7463 rsmka_iovec_t *ka_iovec;
7464 rsmka_iovec_t *ka_iovec_start;
7465 rsmpi_scat_gath_t rsmpi_sg_io;
7466 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN];
7467 rsmpi_iovec_t *iovec;
7468 rsmpi_iovec_t *iovec_start = NULL;
7469 rsmapi_access_entry_t *acl;
7470 rsmresource_t *res;
7471 minor_t rnum;
7472 rsmseg_t *im_seg, *ex_seg;
7473 int e;
7474 int error = 0;
7475 uint_t i;
7476 uint_t iov_proc = 0; /* num of iovecs processed */
7477 size_t size = 0;
7478 size_t ka_size;
7479
7480 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7481
7482 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7483
7484 credp = credp;
7485
7486 /*
7487 * Copyin the scatter/gather structure and build new structure
7488 * for rsmpi.
7489 */
7490 e = sgio_copyin(arg, &sg_io, mode);
7491 if (e != DDI_SUCCESS) {
7492 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7493 "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7494 return (e);
7495 }
7496
7497 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7498 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7499 "rsm_iovec_ioctl done: request_count(%d) too large\n",
7500 sg_io.io_request_count));
7501 return (RSMERR_BAD_SGIO);
7502 }
7503
7504 rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7505 rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7506 rsmpi_sg_io.io_segflg = 0;
7507
7508 /* Allocate memory and copyin io vector array */
7509 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7510 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t);
7511 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7512 } else {
7513 ka_iovec_start = ka_iovec = ka_iovec_arr;
7514 }
7515 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7516 sg_io.io_request_count, mode);
7517 if (e != DDI_SUCCESS) {
7518 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7519 kmem_free(ka_iovec, ka_size);
7520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7521 "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7522 return (e);
7523 }
7524
7525 /* get the import segment descriptor */
7526 rnum = getminor(dev);
7527 res = rsmresource_lookup(rnum, RSM_LOCK);
7528
7529 /*
7530 * The following sequence of locking may (or MAY NOT) cause a
7531 * deadlock but this is currently not addressed here since the
7532 * implementation will be changed to incorporate the use of
7533 * reference counting for both the import and the export segments.
7534 */
7535
7536 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7537
7538 im_seg = (rsmseg_t *)res;
7539
7540 if (im_seg == NULL) {
7541 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7542 kmem_free(ka_iovec, ka_size);
7543 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7544 "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7545 return (EINVAL);
7546 }
7547 /* putv/getv supported is supported only on import segments */
7548 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7549 rsmseglock_release(im_seg);
7550 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7551 kmem_free(ka_iovec, ka_size);
7552 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7553 "rsm_iovec_ioctl done: not an import segment\n"));
7554 return (EINVAL);
7555 }
7556
7557 /*
7558 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7559 * as well as wait for a local DR to complete.
7560 */
7561 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7562 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7563 (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7564 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7565 DBG_PRINTF((category, RSM_DEBUG,
7566 "rsm_iovec_ioctl done: cv_wait INTR"));
7567 rsmseglock_release(im_seg);
7568 return (RSMERR_INTERRUPTED);
7569 }
7570 }
7571
7572 if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7573 (im_seg->s_state != RSM_STATE_ACTIVE)) {
7574
7575 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7576 im_seg->s_state == RSM_STATE_NEW);
7577
7578 DBG_PRINTF((category, RSM_DEBUG,
7579 "rsm_iovec_ioctl done: im_seg not conn/map"));
7580 rsmseglock_release(im_seg);
7581 e = RSMERR_BAD_SGIO;
7582 goto out;
7583 }
7584
7585 im_seg->s_rdmacnt++;
7586 rsmseglock_release(im_seg);
7587
7588 /*
7589 * Allocate and set up the io vector for rsmpi
7590 */
7591 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7592 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7593 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7594 } else {
7595 iovec_start = iovec = iovec_arr;
7596 }
7597
7598 rsmpi_sg_io.iovec = iovec;
7599 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7600 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7601 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7602
7603 if (ex_seg == NULL) {
7604 e = RSMERR_BAD_SGIO;
7605 break;
7606 }
7607 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7608
7609 acl = ex_seg->s_acl;
7610 if (acl[0].ae_permission == 0) {
7611 struct buf *xbuf;
7612 dev_t sdev = 0;
7613
7614 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7615 0, ex_seg->s_len, B_WRITE,
7616 sdev, 0, NULL, DDI_UMEM_SLEEP);
7617
7618 ASSERT(xbuf != NULL);
7619
7620 iovec->local_mem.ms_type = RSM_MEM_BUF;
7621 iovec->local_mem.ms_memory.bp = xbuf;
7622 } else {
7623 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7624 iovec->local_mem.ms_memory.handle =
7625 ex_seg->s_handle.out;
7626 }
7627 ex_seg->s_rdmacnt++; /* refcnt the handle */
7628 rsmseglock_release(ex_seg);
7629 } else {
7630 iovec->local_mem.ms_type = RSM_MEM_VADDR;
7631 iovec->local_mem.ms_memory.vr.vaddr =
7632 ka_iovec->local.vaddr;
7633 }
7634
7635 iovec->local_offset = ka_iovec->local_offset;
7636 iovec->remote_handle = im_seg->s_handle.in;
7637 iovec->remote_offset = ka_iovec->remote_offset;
7638 iovec->transfer_length = ka_iovec->transfer_len;
7639 iovec++;
7640 ka_iovec++;
7641 }
7642
7643 if (iov_proc < sg_io.io_request_count) {
7644 /* error while processing handle */
7645 rsmseglock_acquire(im_seg);
7646 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */
7647 if (im_seg->s_rdmacnt == 0) {
7648 cv_broadcast(&im_seg->s_cv);
7649 }
7650 rsmseglock_release(im_seg);
7651 goto out;
7652 }
7653
7654 /* call rsmpi */
7655 if (cmd == RSM_IOCTL_PUTV)
7656 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7657 im_seg->s_adapter->rsmpi_handle,
7658 &rsmpi_sg_io);
7659 else if (cmd == RSM_IOCTL_GETV)
7660 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7661 im_seg->s_adapter->rsmpi_handle,
7662 &rsmpi_sg_io);
7663 else {
7664 e = EINVAL;
7665 DBG_PRINTF((category, RSM_DEBUG,
7666 "iovec_ioctl: bad command = %x\n", cmd));
7667 }
7668
7669
7670 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7671 "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7672
7673 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7674
7675 /*
7676 * Check for implicit signal post flag and do the signal
7677 * post if needed
7678 */
7679 if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7680 e == RSM_SUCCESS) {
7681 rsmipc_request_t request;
7682
7683 request.rsmipc_key = im_seg->s_segid;
7684 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7685 request.rsmipc_segment_cookie = NULL;
7686 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7687 /*
7688 * Reset the implicit signal post flag to 0 to indicate
7689 * that the signal post has been done and need not be
7690 * done in the RSMAPI library
7691 */
7692 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7693 }
7694
7695 rsmseglock_acquire(im_seg);
7696 im_seg->s_rdmacnt--;
7697 if (im_seg->s_rdmacnt == 0) {
7698 cv_broadcast(&im_seg->s_cv);
7699 }
7700 rsmseglock_release(im_seg);
7701 error = sgio_resid_copyout(arg, &sg_io, mode);
7702 out:
7703 iovec = iovec_start;
7704 ka_iovec = ka_iovec_start;
7705 for (i = 0; i < iov_proc; i++) {
7706 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7707 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7708
7709 ASSERT(ex_seg != NULL);
7710 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7711
7712 ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7713 if (ex_seg->s_rdmacnt == 0) {
7714 cv_broadcast(&ex_seg->s_cv);
7715 }
7716 rsmseglock_release(ex_seg);
7717 }
7718
7719 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7720
7721 /*
7722 * At present there is no dependency on the existence of xbufs
7723 * created by ddi_umem_iosetup for each of the iovecs. So we
7724 * can these xbufs here.
7725 */
7726 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7727 freerbuf(iovec->local_mem.ms_memory.bp);
7728 }
7729
7730 iovec++;
7731 ka_iovec++;
7732 }
7733
7734 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7735 if (iovec_start)
7736 kmem_free(iovec_start, size);
7737 kmem_free(ka_iovec_start, ka_size);
7738 }
7739
7740 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7741 "rsm_iovec_ioctl done %d\n", e));
7742 /* if RSMPI call fails return that else return copyout's retval */
7743 return ((e != RSM_SUCCESS) ? e : error);
7744
7745 }
7746
7747
7748 static int
7749 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7750 {
7751 adapter_t *adapter;
7752 rsm_addr_t addr;
7753 rsm_node_id_t node;
7754 int rval = DDI_SUCCESS;
7755 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7756
7757 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7758
7759 adapter = rsm_getadapter(msg, mode);
7760 if (adapter == NULL) {
7761 DBG_PRINTF((category, RSM_DEBUG,
7762 "rsmaddr_ioctl done: adapter not found\n"));
7763 return (RSMERR_CTLR_NOT_PRESENT);
7764 }
7765
7766 switch (cmd) {
7767 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7768 /* returns the hwaddr in msg->hwaddr */
7769 if (msg->nodeid == my_nodeid) {
7770 msg->hwaddr = adapter->hwaddr;
7771 } else {
7772 addr = get_remote_hwaddr(adapter, msg->nodeid);
7773 if ((int64_t)addr < 0) {
7774 rval = RSMERR_INTERNAL_ERROR;
7775 } else {
7776 msg->hwaddr = addr;
7777 }
7778 }
7779 break;
7780 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7781 /* returns the nodeid in msg->nodeid */
7782 if (msg->hwaddr == adapter->hwaddr) {
7783 msg->nodeid = my_nodeid;
7784 } else {
7785 node = get_remote_nodeid(adapter, msg->hwaddr);
7786 if ((int)node < 0) {
7787 rval = RSMERR_INTERNAL_ERROR;
7788 } else {
7789 msg->nodeid = (rsm_node_id_t)node;
7790 }
7791 }
7792 break;
7793 default:
7794 rval = EINVAL;
7795 break;
7796 }
7797
7798 rsmka_release_adapter(adapter);
7799 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7800 "rsmaddr_ioctl done: %d\n", rval));
7801 return (rval);
7802 }
7803
7804 static int
7805 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7806 {
7807 DBG_DEFINE(category,
7808 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7809
7810 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7811
7812 #ifdef _MULTI_DATAMODEL
7813
7814 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7815 rsm_ioctlmsg32_t msg32;
7816 int i;
7817
7818 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7819 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7820 "rsm_ddi_copyin done: EFAULT\n"));
7821 return (RSMERR_BAD_ADDR);
7822 }
7823 msg->len = msg32.len;
7824 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7825 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7826 msg->key = msg32.key;
7827 msg->acl_len = msg32.acl_len;
7828 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7829 msg->cnum = msg32.cnum;
7830 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7831 msg->cname_len = msg32.cname_len;
7832 msg->nodeid = msg32.nodeid;
7833 msg->hwaddr = msg32.hwaddr;
7834 msg->perm = msg32.perm;
7835 for (i = 0; i < 4; i++) {
7836 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7837 }
7838 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7839 "rsm_ddi_copyin done\n"));
7840 return (RSM_SUCCESS);
7841 }
7842 #endif
7843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7844 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7845 return (RSMERR_BAD_ADDR);
7846 else
7847 return (RSM_SUCCESS);
7848 }
7849
7850 static int
7851 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7852 {
7853 rsmka_int_controller_attr_t rsm_cattr;
7854 DBG_DEFINE(category,
7855 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7856
7857 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7858 "rsmattr_ddi_copyout enter\n"));
7859 /*
7860 * need to copy appropriate data from rsm_controller_attr_t
7861 * to rsmka_int_controller_attr_t
7862 */
7863 #ifdef _MULTI_DATAMODEL
7864 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7865 rsmka_int_controller_attr32_t rsm_cattr32;
7866
7867 rsm_cattr32.attr_direct_access_sizes =
7868 adapter->rsm_attr.attr_direct_access_sizes;
7869 rsm_cattr32.attr_atomic_sizes =
7870 adapter->rsm_attr.attr_atomic_sizes;
7871 rsm_cattr32.attr_page_size =
7872 adapter->rsm_attr.attr_page_size;
7873 if (adapter->rsm_attr.attr_max_export_segment_size >
7874 UINT_MAX)
7875 rsm_cattr32.attr_max_export_segment_size =
7876 RSM_MAXSZ_PAGE_ALIGNED;
7877 else
7878 rsm_cattr32.attr_max_export_segment_size =
7879 adapter->rsm_attr.attr_max_export_segment_size;
7880 if (adapter->rsm_attr.attr_tot_export_segment_size >
7881 UINT_MAX)
7882 rsm_cattr32.attr_tot_export_segment_size =
7883 RSM_MAXSZ_PAGE_ALIGNED;
7884 else
7885 rsm_cattr32.attr_tot_export_segment_size =
7886 adapter->rsm_attr.attr_tot_export_segment_size;
7887 if (adapter->rsm_attr.attr_max_export_segments >
7888 UINT_MAX)
7889 rsm_cattr32.attr_max_export_segments =
7890 UINT_MAX;
7891 else
7892 rsm_cattr32.attr_max_export_segments =
7893 adapter->rsm_attr.attr_max_export_segments;
7894 if (adapter->rsm_attr.attr_max_import_map_size >
7895 UINT_MAX)
7896 rsm_cattr32.attr_max_import_map_size =
7897 RSM_MAXSZ_PAGE_ALIGNED;
7898 else
7899 rsm_cattr32.attr_max_import_map_size =
7900 adapter->rsm_attr.attr_max_import_map_size;
7901 if (adapter->rsm_attr.attr_tot_import_map_size >
7902 UINT_MAX)
7903 rsm_cattr32.attr_tot_import_map_size =
7904 RSM_MAXSZ_PAGE_ALIGNED;
7905 else
7906 rsm_cattr32.attr_tot_import_map_size =
7907 adapter->rsm_attr.attr_tot_import_map_size;
7908 if (adapter->rsm_attr.attr_max_import_segments >
7909 UINT_MAX)
7910 rsm_cattr32.attr_max_import_segments =
7911 UINT_MAX;
7912 else
7913 rsm_cattr32.attr_max_import_segments =
7914 adapter->rsm_attr.attr_max_import_segments;
7915 rsm_cattr32.attr_controller_addr =
7916 adapter->rsm_attr.attr_controller_addr;
7917
7918 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7919 "rsmattr_ddi_copyout done\n"));
7920 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7921 sizeof (rsmka_int_controller_attr32_t), mode)) {
7922 return (RSMERR_BAD_ADDR);
7923 }
7924 else
7925 return (RSM_SUCCESS);
7926 }
7927 #endif
7928 rsm_cattr.attr_direct_access_sizes =
7929 adapter->rsm_attr.attr_direct_access_sizes;
7930 rsm_cattr.attr_atomic_sizes =
7931 adapter->rsm_attr.attr_atomic_sizes;
7932 rsm_cattr.attr_page_size =
7933 adapter->rsm_attr.attr_page_size;
7934 rsm_cattr.attr_max_export_segment_size =
7935 adapter->rsm_attr.attr_max_export_segment_size;
7936 rsm_cattr.attr_tot_export_segment_size =
7937 adapter->rsm_attr.attr_tot_export_segment_size;
7938 rsm_cattr.attr_max_export_segments =
7939 adapter->rsm_attr.attr_max_export_segments;
7940 rsm_cattr.attr_max_import_map_size =
7941 adapter->rsm_attr.attr_max_import_map_size;
7942 rsm_cattr.attr_tot_import_map_size =
7943 adapter->rsm_attr.attr_tot_import_map_size;
7944 rsm_cattr.attr_max_import_segments =
7945 adapter->rsm_attr.attr_max_import_segments;
7946 rsm_cattr.attr_controller_addr =
7947 adapter->rsm_attr.attr_controller_addr;
7948 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7949 "rsmattr_ddi_copyout done\n"));
7950 if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7951 sizeof (rsmka_int_controller_attr_t), mode)) {
7952 return (RSMERR_BAD_ADDR);
7953 }
7954 else
7955 return (RSM_SUCCESS);
7956 }
7957
7958 /*ARGSUSED*/
7959 static int
7960 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7961 int *rvalp)
7962 {
7963 rsmseg_t *seg;
7964 rsmresource_t *res;
7965 minor_t rnum;
7966 rsm_ioctlmsg_t msg = {0};
7967 int error;
7968 adapter_t *adapter;
7969 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7970
7971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7972
7973 if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7974 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7975 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7976 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7977 return (error);
7978 }
7979
7980 /* topology cmd does not use the arg common to other cmds */
7981 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7982 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7983 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7984 "rsm_ioctl done: %d\n", error));
7985 return (error);
7986 }
7987
7988 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7989 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7990 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7991 "rsm_ioctl done: %d\n", error));
7992 return (error);
7993 }
7994
7995 /*
7996 * try to load arguments
7997 */
7998 if (cmd != RSM_IOCTL_RING_BELL &&
7999 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
8000 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8001 "rsm_ioctl done: EFAULT\n"));
8002 return (RSMERR_BAD_ADDR);
8003 }
8004
8005 if (cmd == RSM_IOCTL_ATTR) {
8006 adapter = rsm_getadapter(&msg, mode);
8007 if (adapter == NULL) {
8008 DBG_PRINTF((category, RSM_DEBUG,
8009 "rsm_ioctl done: ENODEV\n"));
8010 return (RSMERR_CTLR_NOT_PRESENT);
8011 }
8012 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8013 rsmka_release_adapter(adapter);
8014 DBG_PRINTF((category, RSM_DEBUG,
8015 "rsm_ioctl:after copyout %d\n", error));
8016 return (error);
8017 }
8018
8019 if (cmd == RSM_IOCTL_BAR_INFO) {
8020 /* Return library off,len of barrier page */
8021 msg.off = barrier_offset;
8022 msg.len = (int)barrier_size;
8023 #ifdef _MULTI_DATAMODEL
8024 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8025 rsm_ioctlmsg32_t msg32;
8026
8027 if (msg.len > UINT_MAX)
8028 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8029 else
8030 msg32.len = (int32_t)msg.len;
8031 msg32.off = (int32_t)msg.off;
8032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8033 "rsm_ioctl done\n"));
8034 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8035 sizeof (msg32), mode))
8036 return (RSMERR_BAD_ADDR);
8037 else
8038 return (RSM_SUCCESS);
8039 }
8040 #endif
8041 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8042 "rsm_ioctl done\n"));
8043 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8044 sizeof (msg), mode))
8045 return (RSMERR_BAD_ADDR);
8046 else
8047 return (RSM_SUCCESS);
8048 }
8049
8050 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8051 /* map the nodeid or hwaddr */
8052 error = rsmaddr_ioctl(cmd, &msg, mode);
8053 if (error == RSM_SUCCESS) {
8054 #ifdef _MULTI_DATAMODEL
8055 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8056 rsm_ioctlmsg32_t msg32;
8057
8058 msg32.hwaddr = (uint64_t)msg.hwaddr;
8059 msg32.nodeid = (uint32_t)msg.nodeid;
8060
8061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8062 "rsm_ioctl done\n"));
8063 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8064 sizeof (msg32), mode))
8065 return (RSMERR_BAD_ADDR);
8066 else
8067 return (RSM_SUCCESS);
8068 }
8069 #endif
8070 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8071 "rsm_ioctl done\n"));
8072 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8073 sizeof (msg), mode))
8074 return (RSMERR_BAD_ADDR);
8075 else
8076 return (RSM_SUCCESS);
8077 }
8078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8079 "rsm_ioctl done: %d\n", error));
8080 return (error);
8081 }
8082
8083 /* Find resource and look it in read mode */
8084 rnum = getminor(dev);
8085 res = rsmresource_lookup(rnum, RSM_NOLOCK);
8086 ASSERT(res != NULL);
8087
8088 /*
8089 * Find command group
8090 */
8091 switch (RSM_IOCTL_CMDGRP(cmd)) {
8092 case RSM_IOCTL_EXPORT_SEG:
8093 /*
8094 * Export list is searched during publish, loopback and
8095 * remote lookup call.
8096 */
8097 seg = rsmresource_seg(res, rnum, credp,
8098 RSM_RESOURCE_EXPORT_SEGMENT);
8099 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8100 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8101 credp);
8102 } else { /* export ioctl on an import/barrier resource */
8103 error = RSMERR_BAD_SEG_HNDL;
8104 }
8105 break;
8106 case RSM_IOCTL_IMPORT_SEG:
8107 /* Import list is searched during remote unmap call. */
8108 seg = rsmresource_seg(res, rnum, credp,
8109 RSM_RESOURCE_IMPORT_SEGMENT);
8110 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8111 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8112 credp);
8113 } else { /* import ioctl on an export/barrier resource */
8114 error = RSMERR_BAD_SEG_HNDL;
8115 }
8116 break;
8117 case RSM_IOCTL_BAR:
8118 if (res != RSMRC_RESERVED &&
8119 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8120 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8121 mode);
8122 } else { /* invalid res value */
8123 error = RSMERR_BAD_SEG_HNDL;
8124 }
8125 break;
8126 case RSM_IOCTL_BELL:
8127 if (res != RSMRC_RESERVED) {
8128 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8129 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8130 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8131 error = importbell_ioctl((rsmseg_t *)res, cmd);
8132 else /* RSM_RESOURCE_BAR */
8133 error = RSMERR_BAD_SEG_HNDL;
8134 } else { /* invalid res value */
8135 error = RSMERR_BAD_SEG_HNDL;
8136 }
8137 break;
8138 default:
8139 error = EINVAL;
8140 }
8141
8142 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8143 error));
8144 return (error);
8145 }
8146
8147
8148 /* **************************** Segment Mapping Operations ********* */
8149 static rsm_mapinfo_t *
8150 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8151 size_t *map_len)
8152 {
8153 rsm_mapinfo_t *p;
8154 /*
8155 * Find the correct mapinfo structure to use during the mapping
8156 * from the seg->s_mapinfo list.
8157 * The seg->s_mapinfo list contains in reverse order the mappings
8158 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8159 * access the correct entry within this list for the mapping
8160 * requested.
8161 *
8162 * The algorithm for selecting a list entry is as follows:
8163 *
8164 * When start_offset of an entry <= off we have found the entry
8165 * we were looking for. Adjust the dev_offset and map_len (needs
8166 * to be PAGESIZE aligned).
8167 */
8168 p = seg->s_mapinfo;
8169 for (; p; p = p->next) {
8170 if (p->start_offset <= off) {
8171 *dev_offset = p->dev_offset + off - p->start_offset;
8172 *map_len = (len > p->individual_len) ?
8173 p->individual_len : ptob(btopr(len));
8174 return (p);
8175 }
8176 p = p->next;
8177 }
8178
8179 return (NULL);
8180 }
8181
8182 static void
8183 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo)
8184 {
8185 rsm_mapinfo_t *p;
8186
8187 while (mapinfo != NULL) {
8188 p = mapinfo;
8189 mapinfo = mapinfo->next;
8190 kmem_free(p, sizeof (*p));
8191 }
8192 }
8193
8194 static int
8195 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8196 size_t len, void **pvtp)
8197 {
8198 rsmcookie_t *p;
8199 rsmresource_t *res;
8200 rsmseg_t *seg;
8201 minor_t rnum;
8202 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8203
8204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8205
8206 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8207 "rsmmap_map: dhp = %x\n", dhp));
8208
8209 flags = flags;
8210
8211 rnum = getminor(dev);
8212 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8213 ASSERT(res != NULL);
8214
8215 seg = (rsmseg_t *)res;
8216
8217 rsmseglock_acquire(seg);
8218
8219 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8220
8221 /*
8222 * Allocate structure and add cookie to segment list
8223 */
8224 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8225
8226 p->c_dhp = dhp;
8227 p->c_off = off;
8228 p->c_len = len;
8229 p->c_next = seg->s_ckl;
8230 seg->s_ckl = p;
8231
8232 *pvtp = (void *)seg;
8233
8234 rsmseglock_release(seg);
8235
8236 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8237 return (DDI_SUCCESS);
8238 }
8239
8240 /*
8241 * Page fault handling is done here. The prerequisite mapping setup
8242 * has been done in rsm_devmap with calls to ddi_devmem_setup or
8243 * ddi_umem_setup
8244 */
8245 static int
8246 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8247 uint_t type, uint_t rw)
8248 {
8249 int e;
8250 rsmseg_t *seg = (rsmseg_t *)pvt;
8251 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8252
8253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8254
8255 rsmseglock_acquire(seg);
8256
8257 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8258
8259 while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8260 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8261 DBG_PRINTF((category, RSM_DEBUG,
8262 "rsmmap_access done: cv_wait INTR"));
8263 rsmseglock_release(seg);
8264 return (RSMERR_INTERRUPTED);
8265 }
8266 }
8267
8268 ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8269 seg->s_state == RSM_STATE_ACTIVE);
8270
8271 if (seg->s_state == RSM_STATE_DISCONNECT)
8272 seg->s_flags |= RSM_IMPORT_DUMMY;
8273
8274 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8275 "rsmmap_access: dhp = %x\n", dhp));
8276
8277 rsmseglock_release(seg);
8278
8279 if (e = devmap_load(dhp, offset, len, type, rw)) {
8280 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8281 }
8282
8283
8284 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8285
8286 return (e);
8287 }
8288
8289 static int
8290 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8291 void **newpvt)
8292 {
8293 rsmseg_t *seg = (rsmseg_t *)oldpvt;
8294 rsmcookie_t *p, *old;
8295 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8296
8297 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8298
8299 /*
8300 * Same as map, create an entry to hold cookie and add it to
8301 * connect segment list. The oldpvt is a pointer to segment.
8302 * Return segment pointer in newpvt.
8303 */
8304 rsmseglock_acquire(seg);
8305
8306 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8307
8308 /*
8309 * Find old cookie
8310 */
8311 for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8312 if (old->c_dhp == dhp) {
8313 break;
8314 }
8315 }
8316 if (old == NULL) {
8317 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8318 "rsmmap_dup done: EINVAL\n"));
8319 rsmseglock_release(seg);
8320 return (EINVAL);
8321 }
8322
8323 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8324
8325 p->c_dhp = new_dhp;
8326 p->c_off = old->c_off;
8327 p->c_len = old->c_len;
8328 p->c_next = seg->s_ckl;
8329 seg->s_ckl = p;
8330
8331 *newpvt = (void *)seg;
8332
8333 rsmseglock_release(seg);
8334
8335 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8336
8337 return (DDI_SUCCESS);
8338 }
8339
8340 static void
8341 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8342 devmap_cookie_t new_dhp1, void **pvtp1,
8343 devmap_cookie_t new_dhp2, void **pvtp2)
8344 {
8345 /*
8346 * Remove pvtp structure from segment list.
8347 */
8348 rsmseg_t *seg = (rsmseg_t *)pvtp;
8349 int freeflag;
8350
8351 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8352
8353 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8354
8355 off = off; len = len;
8356 pvtp1 = pvtp1; pvtp2 = pvtp2;
8357
8358 rsmseglock_acquire(seg);
8359
8360 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8361
8362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8363 "rsmmap_unmap: dhp = %x\n", dhp));
8364 /*
8365 * We can go ahead and remove the dhps even if we are in
8366 * the MAPPING state because the dhps being removed here
8367 * belong to a different mmap and we are holding the segment
8368 * lock.
8369 */
8370 if (new_dhp1 == NULL && new_dhp2 == NULL) {
8371 /* find and remove dhp handle */
8372 rsmcookie_t *tmp, **back = &seg->s_ckl;
8373
8374 while (*back != NULL) {
8375 tmp = *back;
8376 if (tmp->c_dhp == dhp) {
8377 *back = tmp->c_next;
8378 kmem_free(tmp, sizeof (*tmp));
8379 break;
8380 }
8381 back = &tmp->c_next;
8382 }
8383 } else {
8384 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8385 "rsmmap_unmap:parital unmap"
8386 "new_dhp1 %lx, new_dhp2 %lx\n",
8387 (size_t)new_dhp1, (size_t)new_dhp2));
8388 }
8389
8390 /*
8391 * rsmmap_unmap is called for each mapping cookie on the list.
8392 * When the list becomes empty and we are not in the MAPPING
8393 * state then unmap in the rsmpi driver.
8394 */
8395 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8396 (void) rsm_unmap(seg);
8397
8398 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8399 freeflag = 1;
8400 } else {
8401 freeflag = 0;
8402 }
8403
8404 rsmseglock_release(seg);
8405
8406 if (freeflag) {
8407 /* Free the segment structure */
8408 rsmseg_free(seg);
8409 }
8410 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8411
8412 }
8413
8414 static struct devmap_callback_ctl rsmmap_ops = {
8415 DEVMAP_OPS_REV, /* devmap_ops version number */
8416 rsmmap_map, /* devmap_ops map routine */
8417 rsmmap_access, /* devmap_ops access routine */
8418 rsmmap_dup, /* devmap_ops dup routine */
8419 rsmmap_unmap, /* devmap_ops unmap routine */
8420 };
8421
8422 static int
8423 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8424 size_t *maplen, uint_t model /*ARGSUSED*/)
8425 {
8426 struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8427 int err;
8428 uint_t maxprot;
8429 minor_t rnum;
8430 rsmseg_t *seg;
8431 off_t dev_offset;
8432 size_t cur_len;
8433 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8434
8435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8436
8437 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8438 "rsm_devmap: off = %lx, len = %lx\n", off, len));
8439 rnum = getminor(dev);
8440 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8441 ASSERT(seg != NULL);
8442
8443 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8444 if ((off == barrier_offset) &&
8445 (len == barrier_size)) {
8446
8447 ASSERT(bar_va != NULL && bar_cookie != NULL);
8448
8449 /*
8450 * The offset argument in devmap_umem_setup represents
8451 * the offset within the kernel memory defined by the
8452 * cookie. We use this offset as barrier_offset.
8453 */
8454 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8455 barrier_offset, len, PROT_USER|PROT_READ,
8456 DEVMAP_DEFAULTS, 0);
8457
8458 if (err != 0) {
8459 DBG_PRINTF((category, RSM_ERR,
8460 "rsm_devmap done: %d\n", err));
8461 return (RSMERR_MAP_FAILED);
8462 }
8463 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8464 "rsm_devmap done: %d\n", err));
8465
8466 *maplen = barrier_size;
8467
8468 return (err);
8469 } else {
8470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8471 "rsm_devmap done: %d\n", err));
8472 return (RSMERR_MAP_FAILED);
8473 }
8474 }
8475
8476 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8477 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8478
8479 /*
8480 * Make sure we still have permission for the map operation.
8481 */
8482 maxprot = PROT_USER;
8483 if (seg->s_mode & RSM_PERM_READ) {
8484 maxprot |= PROT_READ;
8485 }
8486
8487 if (seg->s_mode & RSM_PERM_WRITE) {
8488 maxprot |= PROT_WRITE;
8489 }
8490
8491 /*
8492 * For each devmap call, rsmmap_map is called. This maintains driver
8493 * private information for the mapping. Thus, if there are multiple
8494 * devmap calls there will be multiple rsmmap_map calls and for each
8495 * call, the mapping information will be stored.
8496 * In case of an error during the processing of the devmap call, error
8497 * will be returned. This error return causes the caller of rsm_devmap
8498 * to undo all the mappings by calling rsmmap_unmap for each one.
8499 * rsmmap_unmap will free up the private information for the requested
8500 * mapping.
8501 */
8502 if (seg->s_node != my_nodeid) {
8503 rsm_mapinfo_t *p;
8504
8505 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8506 if (p == NULL) {
8507 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8508 "rsm_devmap: incorrect mapping info\n"));
8509 return (RSMERR_MAP_FAILED);
8510 }
8511 err = devmap_devmem_setup(dhc, p->dip,
8512 callbackops, p->dev_register,
8513 dev_offset, cur_len, maxprot,
8514 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8515
8516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8517 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8518 "off=%lx,len=%lx\n",
8519 p->dip, p->dev_register, dev_offset, off, cur_len));
8520
8521 if (err != 0) {
8522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8523 "rsm_devmap: devmap_devmem_setup failed %d\n",
8524 err));
8525 return (RSMERR_MAP_FAILED);
8526 }
8527 /* cur_len is always an integral multiple pagesize */
8528 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8529 *maplen = cur_len;
8530 return (err);
8531
8532 } else {
8533 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8534 seg->s_cookie, off, len, maxprot,
8535 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8536 if (err != 0) {
8537 DBG_PRINTF((category, RSM_DEBUG,
8538 "rsm_devmap: devmap_umem_setup failed %d\n",
8539 err));
8540 return (RSMERR_MAP_FAILED);
8541 }
8542 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8543 "rsm_devmap: loopback done\n"));
8544
8545 *maplen = ptob(btopr(len));
8546
8547 return (err);
8548 }
8549 }
8550
8551 /*
8552 * We can use the devmap framework for mapping device memory to user space by
8553 * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8554 * processing calls this entry point and devmap_setup is called within this
8555 * function, which eventually calls rsm_devmap
8556 */
8557 static int
8558 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8559 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8560 {
8561 int error = 0;
8562 int old_state;
8563 minor_t rnum;
8564 rsmseg_t *seg, *eseg;
8565 adapter_t *adapter;
8566 rsm_import_share_t *sharedp;
8567 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8568
8569 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8570
8571 /*
8572 * find segment
8573 */
8574 rnum = getminor(dev);
8575 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8576
8577 if (seg == NULL) {
8578 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8579 "rsm_segmap done: invalid segment\n"));
8580 return (EINVAL);
8581 }
8582
8583 /*
8584 * the user is trying to map a resource that has not been
8585 * defined yet. The library uses this to map in the
8586 * barrier page.
8587 */
8588 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8589 rsmseglock_release(seg);
8590
8591 /*
8592 * The mapping for the barrier page is identified
8593 * by the special offset barrier_offset
8594 */
8595
8596 if (off == (off_t)barrier_offset ||
8597 len == (off_t)barrier_size) {
8598 if (bar_cookie == NULL || bar_va == NULL) {
8599 DBG_PRINTF((category, RSM_DEBUG,
8600 "rsm_segmap: bar cookie/va is NULL\n"));
8601 return (EINVAL);
8602 }
8603
8604 error = devmap_setup(dev, (offset_t)off, as, addrp,
8605 (size_t)len, prot, maxprot, flags, cred);
8606
8607 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8608 "rsm_segmap done: %d\n", error));
8609 return (error);
8610 } else {
8611 DBG_PRINTF((category, RSM_DEBUG,
8612 "rsm_segmap: bad offset/length\n"));
8613 return (EINVAL);
8614 }
8615 }
8616
8617 /* Make sure you can only map imported segments */
8618 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8619 rsmseglock_release(seg);
8620 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8621 "rsm_segmap done: not an import segment\n"));
8622 return (EINVAL);
8623 }
8624 /* check means library is broken */
8625 ASSERT(seg->s_hdr.rsmrc_num == rnum);
8626
8627 /* wait for the segment to become unquiesced */
8628 while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8629 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8630 rsmseglock_release(seg);
8631 DBG_PRINTF((category, RSM_DEBUG,
8632 "rsm_segmap done: cv_wait INTR"));
8633 return (ENODEV);
8634 }
8635 }
8636
8637 /* wait until segment leaves the mapping state */
8638 while (seg->s_state == RSM_STATE_MAPPING)
8639 cv_wait(&seg->s_cv, &seg->s_lock);
8640
8641 /*
8642 * we allow multiple maps of the same segment in the KA
8643 * and it works because we do an rsmpi map of the whole
8644 * segment during the first map and all the device mapping
8645 * information needed in rsm_devmap is in the mapinfo list.
8646 */
8647 if ((seg->s_state != RSM_STATE_CONNECT) &&
8648 (seg->s_state != RSM_STATE_ACTIVE)) {
8649 rsmseglock_release(seg);
8650 DBG_PRINTF((category, RSM_DEBUG,
8651 "rsm_segmap done: segment not connected\n"));
8652 return (ENODEV);
8653 }
8654
8655 /*
8656 * Make sure we are not mapping a larger segment than what's
8657 * exported
8658 */
8659 if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8660 rsmseglock_release(seg);
8661 DBG_PRINTF((category, RSM_DEBUG,
8662 "rsm_segmap done: off+len>seg size\n"));
8663 return (ENXIO);
8664 }
8665
8666 /*
8667 * Make sure we still have permission for the map operation.
8668 */
8669 maxprot = PROT_USER;
8670 if (seg->s_mode & RSM_PERM_READ) {
8671 maxprot |= PROT_READ;
8672 }
8673
8674 if (seg->s_mode & RSM_PERM_WRITE) {
8675 maxprot |= PROT_WRITE;
8676 }
8677
8678 if ((prot & maxprot) != prot) {
8679 /* No permission */
8680 rsmseglock_release(seg);
8681 DBG_PRINTF((category, RSM_DEBUG,
8682 "rsm_segmap done: no permission\n"));
8683 return (EACCES);
8684 }
8685
8686 old_state = seg->s_state;
8687
8688 ASSERT(seg->s_share != NULL);
8689
8690 rsmsharelock_acquire(seg);
8691
8692 sharedp = seg->s_share;
8693
8694 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8695 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8696
8697 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8698 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8699 rsmsharelock_release(seg);
8700 rsmseglock_release(seg);
8701 DBG_PRINTF((category, RSM_DEBUG,
8702 "rsm_segmap done:RSMSI_STATE %d invalid\n",
8703 sharedp->rsmsi_state));
8704 return (ENODEV);
8705 }
8706
8707 /*
8708 * Do the map - since we want importers to share mappings
8709 * we do the rsmpi map for the whole segment
8710 */
8711 if (seg->s_node != my_nodeid) {
8712 uint_t dev_register;
8713 off_t dev_offset;
8714 dev_info_t *dip;
8715 size_t tmp_len;
8716 size_t total_length_mapped = 0;
8717 size_t length_to_map = seg->s_len;
8718 off_t tmp_off = 0;
8719 rsm_mapinfo_t *p;
8720
8721 /*
8722 * length_to_map = seg->s_len is always an integral
8723 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8724 * list is a multiple of PAGESIZE - RSMPI map ensures this
8725 */
8726
8727 adapter = seg->s_adapter;
8728 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8729 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8730
8731 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8732 error = 0;
8733 /* map the whole segment */
8734 while (total_length_mapped < seg->s_len) {
8735 tmp_len = 0;
8736
8737 error = adapter->rsmpi_ops->rsm_map(
8738 seg->s_handle.in, tmp_off,
8739 length_to_map, &tmp_len,
8740 &dip, &dev_register, &dev_offset,
8741 NULL, NULL);
8742
8743 if (error != 0)
8744 break;
8745
8746 /*
8747 * Store the mapping info obtained from rsm_map
8748 */
8749 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8750 p->dev_register = dev_register;
8751 p->dev_offset = dev_offset;
8752 p->dip = dip;
8753 p->individual_len = tmp_len;
8754 p->start_offset = tmp_off;
8755 p->next = sharedp->rsmsi_mapinfo;
8756 sharedp->rsmsi_mapinfo = p;
8757
8758 total_length_mapped += tmp_len;
8759 length_to_map -= tmp_len;
8760 tmp_off += tmp_len;
8761 }
8762 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8763
8764 if (error != RSM_SUCCESS) {
8765 /* Check if this is the the first rsm_map */
8766 if (sharedp->rsmsi_mapinfo != NULL) {
8767 /*
8768 * A single rsm_unmap undoes
8769 * multiple rsm_maps.
8770 */
8771 (void) seg->s_adapter->rsmpi_ops->
8772 rsm_unmap(sharedp->rsmsi_handle);
8773 rsm_free_mapinfo(sharedp->
8774 rsmsi_mapinfo);
8775 }
8776 sharedp->rsmsi_mapinfo = NULL;
8777 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8778 rsmsharelock_release(seg);
8779 rsmseglock_release(seg);
8780 DBG_PRINTF((category, RSM_DEBUG,
8781 "rsm_segmap done: rsmpi map err %d\n",
8782 error));
8783 ASSERT(error != RSMERR_BAD_LENGTH &&
8784 error != RSMERR_BAD_MEM_ALIGNMENT &&
8785 error != RSMERR_BAD_SEG_HNDL);
8786 if (error == RSMERR_UNSUPPORTED_OPERATION)
8787 return (ENOTSUP);
8788 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8789 return (EAGAIN);
8790 else if (error == RSMERR_CONN_ABORTED)
8791 return (ENODEV);
8792 else
8793 return (error);
8794 } else {
8795 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8796 }
8797 } else {
8798 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8799 }
8800
8801 sharedp->rsmsi_mapcnt++;
8802
8803 rsmsharelock_release(seg);
8804
8805 /* move to an intermediate mapping state */
8806 seg->s_state = RSM_STATE_MAPPING;
8807 rsmseglock_release(seg);
8808
8809 error = devmap_setup(dev, (offset_t)off, as, addrp,
8810 len, prot, maxprot, flags, cred);
8811
8812 rsmseglock_acquire(seg);
8813 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8814
8815 if (error == DDI_SUCCESS) {
8816 seg->s_state = RSM_STATE_ACTIVE;
8817 } else {
8818 rsmsharelock_acquire(seg);
8819
8820 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8821
8822 sharedp->rsmsi_mapcnt--;
8823 if (sharedp->rsmsi_mapcnt == 0) {
8824 /* unmap the shared RSMPI mapping */
8825 ASSERT(sharedp->rsmsi_handle != NULL);
8826 (void) adapter->rsmpi_ops->
8827 rsm_unmap(sharedp->rsmsi_handle);
8828 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8829 sharedp->rsmsi_mapinfo = NULL;
8830 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8831 }
8832
8833 rsmsharelock_release(seg);
8834 seg->s_state = old_state;
8835 DBG_PRINTF((category, RSM_ERR,
8836 "rsm: devmap_setup failed %d\n", error));
8837 }
8838 cv_broadcast(&seg->s_cv);
8839 rsmseglock_release(seg);
8840 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8841 error));
8842 return (error);
8843 } else {
8844 /*
8845 * For loopback, the export segment mapping cookie (s_cookie)
8846 * is also used as the s_cookie value for its import segments
8847 * during mapping.
8848 * Note that reference counting for s_cookie of the export
8849 * segment is not required due to the following:
8850 * We never have a case of the export segment being destroyed,
8851 * leaving the import segments with a stale value for the
8852 * s_cookie field, since a force disconnect is done prior to a
8853 * destroy of an export segment. The force disconnect causes
8854 * the s_cookie value to be reset to NULL. Also for the
8855 * rsm_rebind operation, we change the s_cookie value of the
8856 * export segment as well as of all its local (loopback)
8857 * importers.
8858 */
8859 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8860
8861 rsmsharelock_release(seg);
8862 /*
8863 * In order to maintain the lock ordering between the export
8864 * and import segment locks, we need to acquire the export
8865 * segment lock first and only then acquire the import
8866 * segment lock.
8867 * The above is necessary to avoid any deadlock scenarios
8868 * with rsm_rebind which also acquires both the export
8869 * and import segment locks in the above mentioned order.
8870 * Based on code inspection, there seem to be no other
8871 * situations in which both the export and import segment
8872 * locks are acquired either in the same or opposite order
8873 * as mentioned above.
8874 * Thus in order to conform to the above lock order, we
8875 * need to change the state of the import segment to
8876 * RSM_STATE_MAPPING, release the lock. Once this is done we
8877 * can now safely acquire the export segment lock first
8878 * followed by the import segment lock which is as per
8879 * the lock order mentioned above.
8880 */
8881 /* move to an intermediate mapping state */
8882 seg->s_state = RSM_STATE_MAPPING;
8883 rsmseglock_release(seg);
8884
8885 eseg = rsmexport_lookup(seg->s_key);
8886
8887 if (eseg == NULL) {
8888 rsmseglock_acquire(seg);
8889 /*
8890 * Revert to old_state and signal any waiters
8891 * The shared state is not changed
8892 */
8893
8894 seg->s_state = old_state;
8895 cv_broadcast(&seg->s_cv);
8896 rsmseglock_release(seg);
8897 DBG_PRINTF((category, RSM_DEBUG,
8898 "rsm_segmap done: key %d not found\n", seg->s_key));
8899 return (ENODEV);
8900 }
8901
8902 rsmsharelock_acquire(seg);
8903 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8904 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8905
8906 sharedp->rsmsi_mapcnt++;
8907 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8908 rsmsharelock_release(seg);
8909
8910 ASSERT(eseg->s_cookie != NULL);
8911
8912 /*
8913 * It is not required or necessary to acquire the import
8914 * segment lock here to change the value of s_cookie since
8915 * no one will touch the import segment as long as it is
8916 * in the RSM_STATE_MAPPING state.
8917 */
8918 seg->s_cookie = eseg->s_cookie;
8919
8920 rsmseglock_release(eseg);
8921
8922 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8923 prot, maxprot, flags, cred);
8924
8925 rsmseglock_acquire(seg);
8926 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8927 if (error == 0) {
8928 seg->s_state = RSM_STATE_ACTIVE;
8929 } else {
8930 rsmsharelock_acquire(seg);
8931
8932 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8933
8934 sharedp->rsmsi_mapcnt--;
8935 if (sharedp->rsmsi_mapcnt == 0) {
8936 sharedp->rsmsi_mapinfo = NULL;
8937 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8938 }
8939 rsmsharelock_release(seg);
8940 seg->s_state = old_state;
8941 seg->s_cookie = NULL;
8942 }
8943 cv_broadcast(&seg->s_cv);
8944 rsmseglock_release(seg);
8945 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8946 "rsm_segmap done: %d\n", error));
8947 return (error);
8948 }
8949 }
8950
8951 int
8952 rsmka_null_seg_create(
8953 rsm_controller_handle_t argcp,
8954 rsm_memseg_export_handle_t *handle,
8955 size_t size,
8956 uint_t flags,
8957 rsm_memory_local_t *memory,
8958 rsm_resource_callback_t callback,
8959 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8960 {
8961 return (RSM_SUCCESS);
8962 }
8963
8964
8965 int
8966 rsmka_null_seg_destroy(
8967 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
8968 {
8969 return (RSM_SUCCESS);
8970 }
8971
8972
8973 int
8974 rsmka_null_bind(
8975 rsm_memseg_export_handle_t argmemseg,
8976 off_t offset,
8977 rsm_memory_local_t *argmemory,
8978 rsm_resource_callback_t callback,
8979 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8980 {
8981 return (RSM_SUCCESS);
8982 }
8983
8984
8985 int
8986 rsmka_null_unbind(
8987 rsm_memseg_export_handle_t argmemseg,
8988 off_t offset,
8989 size_t length /*ARGSUSED*/)
8990 {
8991 return (DDI_SUCCESS);
8992 }
8993
8994 int
8995 rsmka_null_rebind(
8996 rsm_memseg_export_handle_t argmemseg,
8997 off_t offset,
8998 rsm_memory_local_t *memory,
8999 rsm_resource_callback_t callback,
9000 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9001 {
9002 return (RSM_SUCCESS);
9003 }
9004
9005 int
9006 rsmka_null_publish(
9007 rsm_memseg_export_handle_t argmemseg,
9008 rsm_access_entry_t access_list[],
9009 uint_t access_list_length,
9010 rsm_memseg_id_t segment_id,
9011 rsm_resource_callback_t callback,
9012 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9013 {
9014 return (RSM_SUCCESS);
9015 }
9016
9017
9018 int
9019 rsmka_null_republish(
9020 rsm_memseg_export_handle_t memseg,
9021 rsm_access_entry_t access_list[],
9022 uint_t access_list_length,
9023 rsm_resource_callback_t callback,
9024 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9025 {
9026 return (RSM_SUCCESS);
9027 }
9028
9029 int
9030 rsmka_null_unpublish(
9031 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
9032 {
9033 return (RSM_SUCCESS);
9034 }
9035
9036
9037 void
9038 rsmka_init_loopback()
9039 {
9040 rsm_ops_t *ops = &null_rsmpi_ops;
9041 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9042
9043 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9044 "rsmka_init_loopback enter\n"));
9045
9046 /* initialize null ops vector */
9047 ops->rsm_seg_create = rsmka_null_seg_create;
9048 ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9049 ops->rsm_bind = rsmka_null_bind;
9050 ops->rsm_unbind = rsmka_null_unbind;
9051 ops->rsm_rebind = rsmka_null_rebind;
9052 ops->rsm_publish = rsmka_null_publish;
9053 ops->rsm_unpublish = rsmka_null_unpublish;
9054 ops->rsm_republish = rsmka_null_republish;
9055
9056 /* initialize attributes for loopback adapter */
9057 loopback_attr.attr_name = loopback_str;
9058 loopback_attr.attr_page_size = 0x8; /* 8K */
9059
9060 /* initialize loopback adapter */
9061 loopback_adapter.rsm_attr = loopback_attr;
9062 loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9063 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9064 "rsmka_init_loopback done\n"));
9065 }
9066
9067 /* ************** DR functions ********************************** */
9068 static void
9069 rsm_quiesce_exp_seg(rsmresource_t *resp)
9070 {
9071 int recheck_state;
9072 rsmseg_t *segp = (rsmseg_t *)resp;
9073 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9074 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9075
9076 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9077 "%s enter: key=%u\n", function, segp->s_key));
9078
9079 rsmseglock_acquire(segp);
9080 do {
9081 recheck_state = 0;
9082 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9083 (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9084 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9085 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9086 rsmseglock_release(segp);
9087 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9088 "%s done:state =%d\n", function,
9089 segp->s_state));
9090 return;
9091 }
9092
9093 if (segp->s_state == RSM_STATE_NEW) {
9094 segp->s_state = RSM_STATE_NEW_QUIESCED;
9095 rsmseglock_release(segp);
9096 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9097 "%s done:state =%d\n", function,
9098 segp->s_state));
9099 return;
9100 }
9101
9102 if (segp->s_state == RSM_STATE_BIND) {
9103 /* unbind */
9104 (void) rsm_unbind_pages(segp);
9105 segp->s_state = RSM_STATE_BIND_QUIESCED;
9106 rsmseglock_release(segp);
9107 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9108 "%s done:state =%d\n", function,
9109 segp->s_state));
9110 return;
9111 }
9112
9113 if (segp->s_state == RSM_STATE_EXPORT) {
9114 /*
9115 * wait for putv/getv to complete if the segp is
9116 * a local memory handle
9117 */
9118 while ((segp->s_state == RSM_STATE_EXPORT) &&
9119 (segp->s_rdmacnt != 0)) {
9120 cv_wait(&segp->s_cv, &segp->s_lock);
9121 }
9122
9123 if (segp->s_state != RSM_STATE_EXPORT) {
9124 /*
9125 * state changed need to see what it
9126 * should be changed to.
9127 */
9128 recheck_state = 1;
9129 continue;
9130 }
9131
9132 segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9133 rsmseglock_release(segp);
9134 /*
9135 * send SUSPEND messages - currently it will be
9136 * done at the end
9137 */
9138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9139 "%s done:state =%d\n", function,
9140 segp->s_state));
9141 return;
9142 }
9143 } while (recheck_state);
9144
9145 rsmseglock_release(segp);
9146 }
9147
9148 static void
9149 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9150 {
9151 int ret;
9152 rsmseg_t *segp = (rsmseg_t *)resp;
9153 rsmapi_access_entry_t *acl;
9154 rsm_access_entry_t *rsmpi_acl;
9155 int acl_len;
9156 int create_flags = 0;
9157 struct buf *xbuf;
9158 rsm_memory_local_t mem;
9159 adapter_t *adapter;
9160 dev_t sdev = 0;
9161 rsm_resource_callback_t callback_flag;
9162 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9163 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9164
9165 rsmseglock_acquire(segp);
9166
9167 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9168 "%s enter: key=%u, state=%d\n", function, segp->s_key,
9169 segp->s_state));
9170
9171 if ((segp->s_state == RSM_STATE_NEW) ||
9172 (segp->s_state == RSM_STATE_BIND) ||
9173 (segp->s_state == RSM_STATE_EXPORT)) {
9174 rsmseglock_release(segp);
9175 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9176 function, segp->s_state));
9177 return;
9178 }
9179
9180 if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9181 segp->s_state = RSM_STATE_NEW;
9182 cv_broadcast(&segp->s_cv);
9183 rsmseglock_release(segp);
9184 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9185 function, segp->s_state));
9186 return;
9187 }
9188
9189 if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9190 /* bind the segment */
9191 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9192 segp->s_len, segp->s_proc);
9193 if (ret == RSM_SUCCESS) { /* bind successful */
9194 segp->s_state = RSM_STATE_BIND;
9195 } else { /* bind failed - resource unavailable */
9196 segp->s_state = RSM_STATE_NEW;
9197 }
9198 cv_broadcast(&segp->s_cv);
9199 rsmseglock_release(segp);
9200 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9201 "%s done: bind_qscd bind = %d\n", function, ret));
9202 return;
9203 }
9204
9205 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9206 /* wait for the segment to move to EXPORT_QUIESCED state */
9207 cv_wait(&segp->s_cv, &segp->s_lock);
9208 }
9209
9210 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9211 /* bind the segment */
9212 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9213 segp->s_len, segp->s_proc);
9214
9215 if (ret != RSM_SUCCESS) {
9216 /* bind failed - resource unavailable */
9217 acl_len = segp->s_acl_len;
9218 acl = segp->s_acl;
9219 rsmpi_acl = segp->s_acl_in;
9220 segp->s_acl_len = 0;
9221 segp->s_acl = NULL;
9222 segp->s_acl_in = NULL;
9223 rsmseglock_release(segp);
9224
9225 rsmexport_rm(segp);
9226 rsmacl_free(acl, acl_len);
9227 rsmpiacl_free(rsmpi_acl, acl_len);
9228
9229 rsmseglock_acquire(segp);
9230 segp->s_state = RSM_STATE_NEW;
9231 cv_broadcast(&segp->s_cv);
9232 rsmseglock_release(segp);
9233 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9234 "%s done: exp_qscd bind failed = %d\n",
9235 function, ret));
9236 return;
9237 }
9238 /*
9239 * publish the segment
9240 * if successful
9241 * segp->s_state = RSM_STATE_EXPORT;
9242 * else failed
9243 * segp->s_state = RSM_STATE_BIND;
9244 */
9245
9246 /* check whether it is a local_memory_handle */
9247 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9248 if ((segp->s_acl[0].ae_node == my_nodeid) &&
9249 (segp->s_acl[0].ae_permission == 0)) {
9250 segp->s_state = RSM_STATE_EXPORT;
9251 cv_broadcast(&segp->s_cv);
9252 rsmseglock_release(segp);
9253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9254 "%s done:exp_qscd\n", function));
9255 return;
9256 }
9257 }
9258 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9259 sdev, 0, NULL, DDI_UMEM_SLEEP);
9260 ASSERT(xbuf != NULL);
9261
9262 mem.ms_type = RSM_MEM_BUF;
9263 mem.ms_bp = xbuf;
9264
9265 adapter = segp->s_adapter;
9266
9267 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9268 create_flags = RSM_ALLOW_UNBIND_REBIND;
9269 }
9270
9271 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9272 callback_flag = RSM_RESOURCE_DONTWAIT;
9273 } else {
9274 callback_flag = RSM_RESOURCE_SLEEP;
9275 }
9276
9277 ret = adapter->rsmpi_ops->rsm_seg_create(
9278 adapter->rsmpi_handle, &segp->s_handle.out,
9279 segp->s_len, create_flags, &mem,
9280 callback_flag, NULL);
9281
9282 if (ret != RSM_SUCCESS) {
9283 acl_len = segp->s_acl_len;
9284 acl = segp->s_acl;
9285 rsmpi_acl = segp->s_acl_in;
9286 segp->s_acl_len = 0;
9287 segp->s_acl = NULL;
9288 segp->s_acl_in = NULL;
9289 rsmseglock_release(segp);
9290
9291 rsmexport_rm(segp);
9292 rsmacl_free(acl, acl_len);
9293 rsmpiacl_free(rsmpi_acl, acl_len);
9294
9295 rsmseglock_acquire(segp);
9296 segp->s_state = RSM_STATE_BIND;
9297 cv_broadcast(&segp->s_cv);
9298 rsmseglock_release(segp);
9299 DBG_PRINTF((category, RSM_ERR,
9300 "%s done: exp_qscd create failed = %d\n",
9301 function, ret));
9302 return;
9303 }
9304
9305 ret = adapter->rsmpi_ops->rsm_publish(
9306 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9307 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9308
9309 if (ret != RSM_SUCCESS) {
9310 acl_len = segp->s_acl_len;
9311 acl = segp->s_acl;
9312 rsmpi_acl = segp->s_acl_in;
9313 segp->s_acl_len = 0;
9314 segp->s_acl = NULL;
9315 segp->s_acl_in = NULL;
9316 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9317 rsmseglock_release(segp);
9318
9319 rsmexport_rm(segp);
9320 rsmacl_free(acl, acl_len);
9321 rsmpiacl_free(rsmpi_acl, acl_len);
9322
9323 rsmseglock_acquire(segp);
9324 segp->s_state = RSM_STATE_BIND;
9325 cv_broadcast(&segp->s_cv);
9326 rsmseglock_release(segp);
9327 DBG_PRINTF((category, RSM_ERR,
9328 "%s done: exp_qscd publish failed = %d\n",
9329 function, ret));
9330 return;
9331 }
9332
9333 segp->s_state = RSM_STATE_EXPORT;
9334 cv_broadcast(&segp->s_cv);
9335 rsmseglock_release(segp);
9336 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9337 function));
9338 return;
9339 }
9340
9341 rsmseglock_release(segp);
9342
9343 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9344 }
9345
9346 static void
9347 rsm_quiesce_imp_seg(rsmresource_t *resp)
9348 {
9349 rsmseg_t *segp = (rsmseg_t *)resp;
9350 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9351 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9352
9353 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9354 "%s enter: key=%u\n", function, segp->s_key));
9355
9356 rsmseglock_acquire(segp);
9357 segp->s_flags |= RSM_DR_INPROGRESS;
9358
9359 while (segp->s_rdmacnt != 0) {
9360 /* wait for the RDMA to complete */
9361 cv_wait(&segp->s_cv, &segp->s_lock);
9362 }
9363
9364 rsmseglock_release(segp);
9365
9366 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9367
9368 }
9369
9370 static void
9371 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9372 {
9373 rsmseg_t *segp = (rsmseg_t *)resp;
9374 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9375 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9376
9377 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9378 "%s enter: key=%u\n", function, segp->s_key));
9379
9380 rsmseglock_acquire(segp);
9381
9382 segp->s_flags &= ~RSM_DR_INPROGRESS;
9383 /* wake up any waiting putv/getv ops */
9384 cv_broadcast(&segp->s_cv);
9385
9386 rsmseglock_release(segp);
9387
9388 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9389
9390
9391 }
9392
9393 static void
9394 rsm_process_exp_seg(rsmresource_t *resp, int event)
9395 {
9396 if (event == RSM_DR_QUIESCE)
9397 rsm_quiesce_exp_seg(resp);
9398 else /* UNQUIESCE */
9399 rsm_unquiesce_exp_seg(resp);
9400 }
9401
9402 static void
9403 rsm_process_imp_seg(rsmresource_t *resp, int event)
9404 {
9405 if (event == RSM_DR_QUIESCE)
9406 rsm_quiesce_imp_seg(resp);
9407 else /* UNQUIESCE */
9408 rsm_unquiesce_imp_seg(resp);
9409 }
9410
9411 static void
9412 rsm_dr_process_local_segments(int event)
9413 {
9414
9415 int i, j;
9416 rsmresource_blk_t *blk;
9417 rsmresource_t *p;
9418 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9419
9420 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9421 "rsm_dr_process_local_segments enter\n"));
9422
9423 /* iterate through the resource structure */
9424
9425 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9426
9427 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9428 blk = rsm_resource.rsmrc_root[i];
9429 if (blk != NULL) {
9430 for (j = 0; j < RSMRC_BLKSZ; j++) {
9431 p = blk->rsmrcblk_blks[j];
9432 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9433 /* valid resource */
9434 if (p->rsmrc_type ==
9435 RSM_RESOURCE_EXPORT_SEGMENT)
9436 rsm_process_exp_seg(p, event);
9437 else if (p->rsmrc_type ==
9438 RSM_RESOURCE_IMPORT_SEGMENT)
9439 rsm_process_imp_seg(p, event);
9440 }
9441 }
9442 }
9443 }
9444
9445 rw_exit(&rsm_resource.rsmrc_lock);
9446
9447 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9448 "rsm_dr_process_local_segments done\n"));
9449 }
9450
9451 /* *************** DR callback functions ************ */
9452 static void
9453 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9454 {
9455 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9456 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9457 "rsm_dr_callback_post_add is a no-op\n"));
9458 /* Noop */
9459 }
9460
9461 static int
9462 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9463 {
9464 int recheck_state = 0;
9465 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9466
9467 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9468 "rsm_dr_callback_pre_del enter\n"));
9469
9470 mutex_enter(&rsm_drv_data.drv_lock);
9471
9472 do {
9473 recheck_state = 0;
9474 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9475 "rsm_dr_callback_pre_del:state=%d\n",
9476 rsm_drv_data.drv_state));
9477
9478 switch (rsm_drv_data.drv_state) {
9479 case RSM_DRV_NEW:
9480 /*
9481 * The state should usually never be RSM_DRV_NEW
9482 * since in this state the callbacks have not yet
9483 * been registered. So, ASSERT.
9484 */
9485 ASSERT(0);
9486 return (0);
9487 case RSM_DRV_REG_PROCESSING:
9488 /*
9489 * The driver is in the process of registering
9490 * with the DR framework. So, wait till the
9491 * registration process is complete.
9492 */
9493 recheck_state = 1;
9494 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9495 break;
9496 case RSM_DRV_UNREG_PROCESSING:
9497 /*
9498 * If the state is RSM_DRV_UNREG_PROCESSING, the
9499 * module is in the process of detaching and
9500 * unregistering the callbacks from the DR
9501 * framework. So, simply return.
9502 */
9503 mutex_exit(&rsm_drv_data.drv_lock);
9504 DBG_PRINTF((category, RSM_DEBUG,
9505 "rsm_dr_callback_pre_del:"
9506 "pre-del on NEW/UNREG\n"));
9507 return (0);
9508 case RSM_DRV_OK:
9509 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9510 break;
9511 case RSM_DRV_PREDEL_STARTED:
9512 /* FALLTHRU */
9513 case RSM_DRV_PREDEL_COMPLETED:
9514 /* FALLTHRU */
9515 case RSM_DRV_POSTDEL_IN_PROGRESS:
9516 recheck_state = 1;
9517 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9518 break;
9519 case RSM_DRV_DR_IN_PROGRESS:
9520 rsm_drv_data.drv_memdel_cnt++;
9521 mutex_exit(&rsm_drv_data.drv_lock);
9522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9523 "rsm_dr_callback_pre_del done\n"));
9524 return (0);
9525 /* break; */
9526 default:
9527 ASSERT(0);
9528 break;
9529 }
9530
9531 } while (recheck_state);
9532
9533 rsm_drv_data.drv_memdel_cnt++;
9534
9535 mutex_exit(&rsm_drv_data.drv_lock);
9536
9537 /* Do all the quiescing stuff here */
9538 DBG_PRINTF((category, RSM_DEBUG,
9539 "rsm_dr_callback_pre_del: quiesce things now\n"));
9540
9541 rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9542
9543 /*
9544 * now that all local segments have been quiesced lets inform
9545 * the importers
9546 */
9547 rsm_send_suspend();
9548
9549 /*
9550 * In response to the suspend message the remote node(s) will process
9551 * the segments and send a suspend_complete message. Till all
9552 * the nodes send the suspend_complete message we wait in the
9553 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9554 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9555 */
9556 mutex_enter(&rsm_drv_data.drv_lock);
9557
9558 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9559 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9560 }
9561
9562 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9563
9564 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9565 cv_broadcast(&rsm_drv_data.drv_cv);
9566
9567 mutex_exit(&rsm_drv_data.drv_lock);
9568
9569 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9570 "rsm_dr_callback_pre_del done\n"));
9571
9572 return (0);
9573 }
9574
9575 static void
9576 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9577 {
9578 int recheck_state = 0;
9579 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9580
9581 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9582 "rsm_dr_callback_post_del enter\n"));
9583
9584 mutex_enter(&rsm_drv_data.drv_lock);
9585
9586 do {
9587 recheck_state = 0;
9588 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9589 "rsm_dr_callback_post_del:state=%d\n",
9590 rsm_drv_data.drv_state));
9591
9592 switch (rsm_drv_data.drv_state) {
9593 case RSM_DRV_NEW:
9594 /*
9595 * The driver state cannot not be RSM_DRV_NEW
9596 * since in this state the callbacks have not
9597 * yet been registered.
9598 */
9599 ASSERT(0);
9600 return;
9601 case RSM_DRV_REG_PROCESSING:
9602 /*
9603 * The driver is in the process of registering with
9604 * the DR framework. Wait till the registration is
9605 * complete.
9606 */
9607 recheck_state = 1;
9608 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9609 break;
9610 case RSM_DRV_UNREG_PROCESSING:
9611 /*
9612 * RSM_DRV_UNREG_PROCESSING state means the module
9613 * is detaching and unregistering the callbacks
9614 * from the DR framework. So simply return.
9615 */
9616 /* FALLTHRU */
9617 case RSM_DRV_OK:
9618 /*
9619 * RSM_DRV_OK means we missed the pre-del
9620 * corresponding to this post-del coz we had not
9621 * registered yet, so simply return.
9622 */
9623 mutex_exit(&rsm_drv_data.drv_lock);
9624 DBG_PRINTF((category, RSM_DEBUG,
9625 "rsm_dr_callback_post_del:"
9626 "post-del on OK/UNREG\n"));
9627 return;
9628 /* break; */
9629 case RSM_DRV_PREDEL_STARTED:
9630 /* FALLTHRU */
9631 case RSM_DRV_PREDEL_COMPLETED:
9632 /* FALLTHRU */
9633 case RSM_DRV_POSTDEL_IN_PROGRESS:
9634 recheck_state = 1;
9635 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9636 break;
9637 case RSM_DRV_DR_IN_PROGRESS:
9638 rsm_drv_data.drv_memdel_cnt--;
9639 if (rsm_drv_data.drv_memdel_cnt > 0) {
9640 mutex_exit(&rsm_drv_data.drv_lock);
9641 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9642 "rsm_dr_callback_post_del done:\n"));
9643 return;
9644 }
9645 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9646 break;
9647 default:
9648 ASSERT(0);
9649 return;
9650 /* break; */
9651 }
9652 } while (recheck_state);
9653
9654 mutex_exit(&rsm_drv_data.drv_lock);
9655
9656 /* Do all the unquiescing stuff here */
9657 DBG_PRINTF((category, RSM_DEBUG,
9658 "rsm_dr_callback_post_del: unquiesce things now\n"));
9659
9660 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9661
9662 /*
9663 * now that all local segments have been unquiesced lets inform
9664 * the importers
9665 */
9666 rsm_send_resume();
9667
9668 mutex_enter(&rsm_drv_data.drv_lock);
9669
9670 rsm_drv_data.drv_state = RSM_DRV_OK;
9671
9672 cv_broadcast(&rsm_drv_data.drv_cv);
9673
9674 mutex_exit(&rsm_drv_data.drv_lock);
9675
9676 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9677 "rsm_dr_callback_post_del done\n"));
9678
9679 return;
9680
9681 }