Print this page
8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/rsm/rsm.c
+++ new/usr/src/uts/common/io/rsm/rsm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2012 Milan Jurik. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 + * Copyright 2017 Joyent, Inc.
26 27 */
27 28
28 29
29 30 /*
30 31 * Overview of the RSM Kernel Agent:
31 32 * ---------------------------------
32 33 *
33 34 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
34 35 * kernel agent is a pseudo device driver which makes use of the RSMPI
35 36 * interface on behalf of the RSMAPI user library.
36 37 *
37 38 * The kernel agent functionality can be categorized into the following
38 39 * components:
39 40 * 1. Driver Infrastructure
40 41 * 2. Export/Import Segment Management
41 42 * 3. Internal resource allocation/deallocation
42 43 *
43 44 * The driver infrastructure includes the basic module loading entry points
44 45 * like _init, _info, _fini to load, unload and report information about
45 46 * the driver module. The driver infrastructure also includes the
46 47 * autoconfiguration entry points namely, attach, detach and getinfo for
47 48 * the device autoconfiguration.
48 49 *
49 50 * The kernel agent is a pseudo character device driver and exports
50 51 * a cb_ops structure which defines the driver entry points for character
51 52 * device access. This includes the open and close entry points. The
52 53 * other entry points provided include ioctl, devmap and segmap and chpoll.
53 54 * read and write entry points are not used since the device is memory
54 55 * mapped. Also ddi_prop_op is used for the prop_op entry point.
55 56 *
56 57 * The ioctl entry point supports a number of commands, which are used by
57 58 * the RSMAPI library in order to export and import segments. These
58 59 * commands include commands for binding and rebinding the physical pages
59 60 * allocated to the virtual address range, publishing the export segment,
60 61 * unpublishing and republishing an export segment, creating an
61 62 * import segment and a virtual connection from this import segment to
62 63 * an export segment, performing scatter-gather data transfer, barrier
63 64 * operations.
64 65 *
65 66 *
66 67 * Export and Import segments:
67 68 * ---------------------------
68 69 *
69 70 * In order to create an RSM export segment a process allocates a range in its
70 71 * virtual address space for the segment using standard Solaris interfaces.
71 72 * The process then calls RSMAPI, which in turn makes an ioctl call to the
72 73 * RSM kernel agent for an allocation of physical memory pages and for
73 74 * creation of the export segment by binding these pages to the virtual
74 75 * address range. These pages are locked in memory so that remote accesses
75 76 * are always applied to the correct page. Then the RSM segment is published,
76 77 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
77 78 * is assigned to it.
78 79 *
79 80 * In order to import a published RSM segment, RSMAPI creates an import
80 81 * segment and forms a virtual connection across the interconnect to the
81 82 * export segment, via an ioctl into the kernel agent with the connect
82 83 * command. The import segment setup is completed by mapping the
83 84 * local device memory into the importers virtual address space. The
84 85 * mapping of the import segment is handled by the segmap/devmap
85 86 * infrastructure described as follows.
86 87 *
87 88 * Segmap and Devmap interfaces:
88 89 *
89 90 * The RSM kernel agent allows device memory to be directly accessed by user
90 91 * threads via memory mapping. In order to do so, the RSM kernel agent
91 92 * supports the devmap and segmap entry points.
92 93 *
93 94 * The segmap entry point(rsm_segmap) is responsible for setting up a memory
94 95 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
95 96 * responsible for exporting the device memory to the user applications.
96 97 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
97 98 * control is transfered to the devmap_setup call which calls rsm_devmap.
98 99 *
99 100 * rsm_devmap validates the user mapping to the device or kernel memory
100 101 * and passes the information to the system for setting up the mapping. The
101 102 * actual setting up of the mapping is done by devmap_devmem_setup(for
102 103 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
103 104 * registered for device context management via the devmap_devmem_setup
104 105 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
105 106 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
106 107 * is created, a mapping is freed, a mapping is accessed or an existing
107 108 * mapping is duplicated respectively. These callbacks allow the RSM kernel
108 109 * agent to maintain state information associated with the mappings.
109 110 * The state information is mainly in the form of a cookie list for the import
110 111 * segment for which mapping has been done.
111 112 *
112 113 * Forced disconnect of import segments:
113 114 *
114 115 * When an exported segment is unpublished, the exporter sends a forced
115 116 * disconnect message to all its importers. The importer segments are
116 117 * unloaded and disconnected. This involves unloading the original
117 118 * mappings and remapping to a preallocated kernel trash page. This is
118 119 * done by devmap_umem_remap. The trash/dummy page is a kernel page,
119 120 * preallocated by the kernel agent during attach using ddi_umem_alloc with
120 121 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
121 122 * due to unloading of the original mappings.
122 123 *
123 124 * Additionally every segment has a mapping generation number associated
124 125 * with it. This is an entry in the barrier generation page, created
125 126 * during attach time. This mapping generation number for the import
126 127 * segments is incremented on a force disconnect to notify the application
127 128 * of the force disconnect. On this notification, the application needs
128 129 * to reconnect the segment to establish a new legitimate mapping.
129 130 *
130 131 *
131 132 * Locks used in the kernel agent:
132 133 * -------------------------------
133 134 *
134 135 * The kernel agent uses a variety of mutexes and condition variables for
135 136 * mutual exclusion of the shared data structures and for synchronization
136 137 * between the various threads. Some of the locks are described as follows.
137 138 *
138 139 * Each resource structure, which represents either an export/import segment
139 140 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
140 141 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
141 142 * rsmseglock_acquire and rsmseglock_release macros. An additional
142 143 * lock called the rsmsi_lock is used for the shared import data structure
143 144 * that is relevant for resources representing import segments. There is
144 145 * also a condition variable associated with the resource called s_cv. This
145 146 * is used to wait for events like the segment state change etc.
146 147 *
147 148 * The resource structures are allocated from a pool of resource structures,
148 149 * called rsm_resource. This pool is protected via a reader-writer lock,
149 150 * called rsmrc_lock.
150 151 *
151 152 * There are two separate hash tables, one for the export segments and
152 153 * one for the import segments. The export segments are inserted into the
153 154 * export segment hash table only after they have been published and the
154 155 * import segments are inserted in the import segments list only after they
155 156 * have successfully connected to an exported segment. These tables are
156 157 * protected via reader-writer locks.
157 158 *
158 159 * Debug Support in the kernel agent:
159 160 * ----------------------------------
160 161 *
161 162 * Debugging support in the kernel agent is provided by the following
162 163 * macros.
163 164 *
164 165 * DBG_PRINTF((category, level, message)) is a macro which logs a debug
165 166 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
166 167 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
167 168 * on the definition of the category and level. All messages that belong to
168 169 * the specified category(rsmdbg_category) and are of an equal or greater
169 170 * severity than the specified level(rsmdbg_level) are logged. The message
170 171 * is a string which uses the same formatting rules as the strings used in
171 172 * printf.
172 173 *
173 174 * The category defines which component of the kernel agent has logged this
174 175 * message. There are a number of categories that have been defined such as
175 176 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
176 177 * DBG_ADDCATEGORY is used to add in another category to the currently
177 178 * specified category value so that the component using this new category
178 179 * can also effectively log debug messages. Thus, the category of a specific
179 180 * message is some combination of the available categories and we can define
180 181 * sub-categories if we want a finer level of granularity.
181 182 *
182 183 * The level defines the severity of the message. Different level values are
183 184 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
184 185 * the least severe(debug level is 0).
185 186 *
186 187 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
187 188 * variable or a string respectively.
188 189 *
189 190 *
190 191 * NOTES:
191 192 *
192 193 * Special Fork and Exec Handling:
193 194 * -------------------------------
194 195 *
195 196 * The backing physical pages of an exported segment are always locked down.
196 197 * Thus, there are two cases in which a process having exported segments
197 198 * will cause a cpu to hang: (1) the process invokes exec; (2) a process
198 199 * forks and invokes exit before the duped file descriptors for the export
199 200 * segments are closed in the child process. The hang is caused because the
200 201 * address space release algorithm in Solaris VM subsystem is based on a
201 202 * non-blocking loop which does not terminate while segments are locked
202 203 * down. In addition to this, Solaris VM subsystem lacks a callback
203 204 * mechanism to the rsm kernel agent to allow unlocking these export
204 205 * segment pages.
205 206 *
206 207 * In order to circumvent this problem, the kernel agent does the following.
207 208 * The Solaris VM subsystem keeps memory segments in increasing order of
208 209 * virtual addressses. Thus a special page(special_exit_offset) is allocated
209 210 * by the kernel agent and is mmapped into the heap area of the process address
210 211 * space(the mmap is done by the RSMAPI library). During the mmap processing
211 212 * of this special page by the devmap infrastructure, a callback(the same
212 213 * devmap context management callbacks discussed above) is registered for an
213 214 * unmap.
214 215 *
215 216 * As discussed above, this page is processed by the Solaris address space
216 217 * release code before any of the exported segments pages(which are allocated
217 218 * from high memory). It is during this processing that the unmap callback gets
218 219 * called and this callback is responsible for force destroying the exported
219 220 * segments and thus eliminating the problem of locked pages.
220 221 *
221 222 * Flow-control:
222 223 * ------------
223 224 *
224 225 * A credit based flow control algorithm is used for messages whose
225 226 * processing cannot be done in the interrupt context because it might
226 227 * involve invoking rsmpi calls, or might take a long time to complete
227 228 * or might need to allocate resources. The algorithm operates on a per
228 229 * path basis. To send a message the pathend needs to have a credit and
229 230 * it consumes one for every message that is flow controlled. On the
230 231 * receiving pathend the message is put on a msgbuf_queue and a task is
231 232 * dispatched on the worker thread - recv_taskq where it is processed.
232 233 * After processing the message, the receiving pathend dequeues the message,
233 234 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
234 235 * credits to the sender pathend.
235 236 *
236 237 * RSM_DRTEST:
237 238 * -----------
238 239 *
239 240 * This is used to enable the DR testing using a test driver on test
240 241 * platforms which do not supported DR.
241 242 *
242 243 */
243 244
244 245 #include <sys/types.h>
245 246 #include <sys/param.h>
246 247 #include <sys/user.h>
247 248 #include <sys/buf.h>
248 249 #include <sys/systm.h>
249 250 #include <sys/cred.h>
250 251 #include <sys/vm.h>
251 252 #include <sys/uio.h>
252 253 #include <vm/seg.h>
253 254 #include <vm/page.h>
254 255 #include <sys/stat.h>
255 256
256 257 #include <sys/time.h>
257 258 #include <sys/errno.h>
258 259
259 260 #include <sys/file.h>
260 261 #include <sys/uio.h>
261 262 #include <sys/proc.h>
262 263 #include <sys/mman.h>
263 264 #include <sys/open.h>
264 265 #include <sys/atomic.h>
265 266 #include <sys/mem_config.h>
266 267
267 268
268 269 #include <sys/ddi.h>
269 270 #include <sys/devops.h>
270 271 #include <sys/ddidevmap.h>
271 272 #include <sys/sunddi.h>
272 273 #include <sys/esunddi.h>
273 274 #include <sys/ddi_impldefs.h>
274 275
275 276 #include <sys/kmem.h>
276 277 #include <sys/conf.h>
277 278 #include <sys/devops.h>
278 279 #include <sys/ddi_impldefs.h>
279 280
280 281 #include <sys/modctl.h>
281 282
282 283 #include <sys/policy.h>
283 284 #include <sys/types.h>
284 285 #include <sys/conf.h>
285 286 #include <sys/param.h>
286 287
287 288 #include <sys/taskq.h>
288 289
289 290 #include <sys/rsm/rsm_common.h>
290 291 #include <sys/rsm/rsmapi_common.h>
291 292 #include <sys/rsm/rsm.h>
292 293 #include <rsm_in.h>
293 294 #include <sys/rsm/rsmka_path_int.h>
294 295 #include <sys/rsm/rsmpi.h>
295 296
296 297 #include <sys/modctl.h>
297 298 #include <sys/debug.h>
298 299
299 300 #include <sys/tuneable.h>
300 301
301 302 #ifdef RSM_DRTEST
302 303 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
303 304 void *arg);
304 305 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
305 306 void *arg);
306 307 #endif
307 308
308 309 extern void dbg_printf(int category, int level, char *fmt, ...);
309 310 extern void rsmka_pathmanager_init();
310 311 extern void rsmka_pathmanager_cleanup();
311 312 extern void rele_sendq_token(sendq_token_t *);
312 313 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
313 314 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
314 315 extern int rsmka_topology_ioctl(caddr_t, int, int);
315 316
316 317 extern pri_t maxclsyspri;
317 318 extern work_queue_t work_queue;
318 319 extern kmutex_t ipc_info_lock;
319 320 extern kmutex_t ipc_info_cvlock;
320 321 extern kcondvar_t ipc_info_cv;
321 322 extern kmutex_t path_hold_cvlock;
322 323 extern kcondvar_t path_hold_cv;
323 324
324 325 extern kmutex_t rsmka_buf_lock;
325 326
326 327 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
327 328 extern adapter_t *rsmka_lookup_adapter(char *, int);
328 329 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
329 330 extern boolean_t rsmka_do_path_active(path_t *, int);
330 331 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
331 332 extern void rsmka_release_adapter(adapter_t *);
332 333 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
333 334 extern void rsmka_dequeue_msgbuf(path_t *path);
334 335 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
335 336 /* lint -w2 */
336 337
337 338 static int rsm_open(dev_t *, int, int, cred_t *);
338 339 static int rsm_close(dev_t, int, int, cred_t *);
339 340 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
340 341 cred_t *credp, int *rvalp);
341 342 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
342 343 uint_t);
343 344 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
344 345 uint_t, uint_t, cred_t *);
345 346 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
346 347 struct pollhead **phpp);
347 348
348 349 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
349 350 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
350 351 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
351 352
352 353 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
353 354 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
354 355 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
355 356 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
356 357 rsm_permission_t);
357 358 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
358 359 static void rsmacl_free(rsmapi_access_entry_t *, int);
359 360 static void rsmpiacl_free(rsm_access_entry_t *, int);
360 361
361 362 static int rsm_inc_pgcnt(pgcnt_t);
362 363 static void rsm_dec_pgcnt(pgcnt_t);
363 364 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
364 365 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
365 366 size_t *);
366 367 static void exporter_quiesce();
367 368 static void rsmseg_suspend(rsmseg_t *, int *);
368 369 static void rsmsegshare_suspend(rsmseg_t *);
369 370 static int rsmseg_resume(rsmseg_t *, void **);
370 371 static int rsmsegshare_resume(rsmseg_t *);
371 372
372 373 static struct cb_ops rsm_cb_ops = {
373 374 rsm_open, /* open */
374 375 rsm_close, /* close */
375 376 nodev, /* strategy */
376 377 nodev, /* print */
377 378 nodev, /* dump */
378 379 nodev, /* read */
379 380 nodev, /* write */
380 381 rsm_ioctl, /* ioctl */
381 382 rsm_devmap, /* devmap */
382 383 NULL, /* mmap */
383 384 rsm_segmap, /* segmap */
384 385 rsm_chpoll, /* poll */
385 386 ddi_prop_op, /* cb_prop_op */
386 387 0, /* streamtab */
387 388 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
388 389 0,
389 390 0,
390 391 0
391 392 };
392 393
393 394 static struct dev_ops rsm_ops = {
394 395 DEVO_REV, /* devo_rev, */
395 396 0, /* refcnt */
396 397 rsm_info, /* get_dev_info */
397 398 nulldev, /* identify */
398 399 nulldev, /* probe */
399 400 rsm_attach, /* attach */
400 401 rsm_detach, /* detach */
401 402 nodev, /* reset */
402 403 &rsm_cb_ops, /* driver operations */
403 404 (struct bus_ops *)0, /* bus operations */
404 405 0,
405 406 ddi_quiesce_not_needed, /* quiesce */
406 407 };
407 408
408 409 /*
409 410 * Module linkage information for the kernel.
410 411 */
411 412
412 413 static struct modldrv modldrv = {
413 414 &mod_driverops, /* Type of module. This one is a pseudo driver */
414 415 "Remote Shared Memory Driver",
415 416 &rsm_ops, /* driver ops */
416 417 };
417 418
418 419 static struct modlinkage modlinkage = {
419 420 MODREV_1,
420 421 (void *)&modldrv,
421 422 0,
422 423 0,
423 424 0
424 425 };
425 426
426 427 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
427 428 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
428 429 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
429 430
430 431 static kphysm_setup_vector_t rsm_dr_callback_vec = {
431 432 KPHYSM_SETUP_VECTOR_VERSION,
432 433 rsm_dr_callback_post_add,
433 434 rsm_dr_callback_pre_del,
434 435 rsm_dr_callback_post_del
435 436 };
436 437
437 438 /* This flag can be changed to 0 to help with PIT testing */
438 439 int rsmka_modunloadok = 1;
439 440 int no_reply_cnt = 0;
440 441
441 442 uint64_t rsm_ctrlmsg_errcnt = 0;
442 443 uint64_t rsm_ipcsend_errcnt = 0;
443 444
444 445 #define MAX_NODES 64
445 446
446 447 static struct rsm_driver_data rsm_drv_data;
447 448 static struct rsmresource_table rsm_resource;
448 449
449 450 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
450 451 static void rsmresource_destroy(void);
451 452 static int rsmresource_alloc(minor_t *);
452 453 static rsmresource_t *rsmresource_free(minor_t rnum);
453 454 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
454 455 static int rsm_unpublish(rsmseg_t *seg, int mode);
455 456 static int rsm_unbind(rsmseg_t *seg);
456 457 static uint_t rsmhash(rsm_memseg_id_t key);
457 458 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
458 459 static void rsmhash_free(rsmhash_table_t *rhash, int size);
459 460 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
460 461 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
461 462 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
462 463 void *cookie);
463 464 int rsm_disconnect(rsmseg_t *seg);
464 465 void rsmseg_unload(rsmseg_t *);
465 466 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
466 467
467 468 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
468 469 rsm_intr_q_op_t opcode, rsm_addr_t src,
469 470 void *data, size_t size, rsm_intr_hand_arg_t arg);
470 471
471 472 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
472 473
473 474 rsm_node_id_t my_nodeid;
474 475
475 476 /* cookie, va, offsets and length for the barrier */
476 477 static rsm_gnum_t *bar_va;
477 478 static ddi_umem_cookie_t bar_cookie;
478 479 static off_t barrier_offset;
479 480 static size_t barrier_size;
480 481 static int max_segs;
481 482
482 483 /* cookie for the trash memory */
483 484 static ddi_umem_cookie_t remap_cookie;
484 485
485 486 static rsm_memseg_id_t rsm_nextavail_segmentid;
486 487
487 488 extern taskq_t *work_taskq;
488 489 extern char *taskq_name;
489 490
490 491 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */
491 492
492 493 static rsmhash_table_t rsm_export_segs; /* list of exported segs */
493 494 rsmhash_table_t rsm_import_segs; /* list of imported segs */
494 495 static rsmhash_table_t rsm_event_queues; /* list of event queues */
495 496
496 497 static rsm_ipc_t rsm_ipc; /* ipc info */
497 498
498 499 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
499 500 static list_head_t rsm_suspend_list;
500 501
501 502 /* list of descriptors for remote importers */
502 503 static importers_table_t importer_list;
503 504
504 505 kmutex_t rsm_suspend_cvlock;
505 506 kcondvar_t rsm_suspend_cv;
506 507
507 508 static kmutex_t rsm_lock;
508 509
509 510 adapter_t loopback_adapter;
510 511 rsm_controller_attr_t loopback_attr;
511 512
512 513 int rsmipc_send_controlmsg(path_t *path, int msgtype);
513 514
514 515 void rsmka_init_loopback();
515 516
516 517 int rsmka_null_seg_create(
517 518 rsm_controller_handle_t,
518 519 rsm_memseg_export_handle_t *,
519 520 size_t,
520 521 uint_t,
521 522 rsm_memory_local_t *,
522 523 rsm_resource_callback_t,
523 524 rsm_resource_callback_arg_t);
524 525
525 526 int rsmka_null_seg_destroy(
526 527 rsm_memseg_export_handle_t);
527 528
528 529 int rsmka_null_bind(
529 530 rsm_memseg_export_handle_t,
530 531 off_t,
531 532 rsm_memory_local_t *,
532 533 rsm_resource_callback_t,
533 534 rsm_resource_callback_arg_t);
534 535
535 536 int rsmka_null_unbind(
536 537 rsm_memseg_export_handle_t,
537 538 off_t,
538 539 size_t);
539 540
540 541 int rsmka_null_rebind(
541 542 rsm_memseg_export_handle_t,
542 543 off_t,
543 544 rsm_memory_local_t *,
544 545 rsm_resource_callback_t,
545 546 rsm_resource_callback_arg_t);
546 547
547 548 int rsmka_null_publish(
548 549 rsm_memseg_export_handle_t,
549 550 rsm_access_entry_t [],
550 551 uint_t,
551 552 rsm_memseg_id_t,
552 553 rsm_resource_callback_t,
553 554 rsm_resource_callback_arg_t);
554 555
555 556
556 557 int rsmka_null_republish(
557 558 rsm_memseg_export_handle_t,
558 559 rsm_access_entry_t [],
559 560 uint_t,
560 561 rsm_resource_callback_t,
561 562 rsm_resource_callback_arg_t);
562 563
563 564 int rsmka_null_unpublish(
564 565 rsm_memseg_export_handle_t);
565 566
566 567 rsm_ops_t null_rsmpi_ops;
567 568
568 569 /*
569 570 * data and locks to keep track of total amount of exported memory
570 571 */
571 572 static pgcnt_t rsm_pgcnt;
572 573 static pgcnt_t rsm_pgcnt_max; /* max allowed */
573 574 static kmutex_t rsm_pgcnt_lock;
574 575
575 576 static int rsm_enable_dr;
576 577
577 578 static char loopback_str[] = "loopback";
578 579
579 580 int rsm_hash_size;
580 581
581 582 /*
582 583 * The locking model is as follows:
583 584 *
584 585 * Local operations:
585 586 * find resource - grab reader lock on resouce list
586 587 * insert rc - grab writer lock
587 588 * delete rc - grab writer lock and resource mutex
588 589 * read/write - no lock
589 590 *
590 591 * Remote invocations:
591 592 * find resource - grab read lock and resource mutex
592 593 *
593 594 * State:
594 595 * resource state - grab resource mutex
595 596 */
596 597
597 598 int
598 599 _init(void)
599 600 {
600 601 int e;
601 602
602 603 e = mod_install(&modlinkage);
603 604 if (e != 0) {
604 605 return (e);
605 606 }
606 607
607 608 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
608 609
609 610 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
610 611
611 612
612 613 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
613 614
614 615 rsm_hash_size = RSM_HASHSZ;
615 616
616 617 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
617 618
618 619 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
619 620
620 621 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
621 622
622 623 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
623 624 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
624 625
625 626 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
626 627 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
627 628
628 629 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
629 630 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
630 631
631 632 rsm_ipc.count = RSMIPC_SZ;
632 633 rsm_ipc.wanted = 0;
633 634 rsm_ipc.sequence = 0;
634 635
635 636 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
636 637
637 638 for (e = 0; e < RSMIPC_SZ; e++) {
638 639 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
639 640
640 641 RSMIPC_SET(slot, RSMIPC_FREE);
641 642 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
642 643 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
643 644 }
644 645
645 646 /*
646 647 * Initialize the suspend message list
647 648 */
648 649 rsm_suspend_list.list_head = NULL;
649 650 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
650 651
651 652 /*
652 653 * It is assumed here that configuration data is available
653 654 * during system boot since _init may be called at that time.
654 655 */
655 656
656 657 rsmka_pathmanager_init();
657 658
658 659 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
659 660 "rsm: _init done\n"));
660 661
661 662 return (DDI_SUCCESS);
662 663
663 664 }
664 665
665 666 int
666 667 _info(struct modinfo *modinfop)
667 668 {
668 669
669 670 return (mod_info(&modlinkage, modinfop));
670 671 }
671 672
672 673 int
673 674 _fini(void)
674 675 {
675 676 int e;
676 677
677 678 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
678 679 "rsm: _fini enter\n"));
679 680
680 681 /*
681 682 * The rsmka_modunloadok flag is simply used to help with
682 683 * the PIT testing. Make this flag 0 to disallow modunload.
683 684 */
684 685 if (rsmka_modunloadok == 0)
685 686 return (EBUSY);
686 687
687 688 /* rsm_detach will be called as a result of mod_remove */
688 689 e = mod_remove(&modlinkage);
689 690 if (e) {
690 691 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
691 692 "Unable to fini RSM %x\n", e));
692 693 return (e);
693 694 }
694 695
695 696 rsmka_pathmanager_cleanup();
696 697
697 698 rw_destroy(&rsm_resource.rsmrc_lock);
698 699
699 700 rw_destroy(&rsm_export_segs.rsmhash_rw);
700 701 rw_destroy(&rsm_import_segs.rsmhash_rw);
701 702 rw_destroy(&rsm_event_queues.rsmhash_rw);
702 703
703 704 mutex_destroy(&importer_list.lock);
704 705
705 706 mutex_destroy(&rsm_ipc.lock);
706 707 cv_destroy(&rsm_ipc.cv);
707 708
708 709 (void) mutex_destroy(&rsm_suspend_list.list_lock);
709 710
710 711 (void) mutex_destroy(&rsm_pgcnt_lock);
711 712
712 713 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
713 714
714 715 return (DDI_SUCCESS);
715 716
716 717 }
717 718
718 719 /*ARGSUSED1*/
719 720 static int
720 721 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
721 722 {
722 723 minor_t rnum;
723 724 int percent;
724 725 int ret;
725 726 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
726 727
727 728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
728 729
729 730 switch (cmd) {
730 731 case DDI_ATTACH:
731 732 break;
732 733 case DDI_RESUME:
733 734 default:
734 735 DBG_PRINTF((category, RSM_ERR,
735 736 "rsm:rsm_attach - cmd not supported\n"));
736 737 return (DDI_FAILURE);
737 738 }
738 739
739 740 if (rsm_dip != NULL) {
740 741 DBG_PRINTF((category, RSM_ERR,
741 742 "rsm:rsm_attach - supports only "
742 743 "one instance\n"));
743 744 return (DDI_FAILURE);
744 745 }
745 746
746 747 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
747 748 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
748 749 "enable-dynamic-reconfiguration", 1);
749 750
750 751 mutex_enter(&rsm_drv_data.drv_lock);
751 752 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
752 753 mutex_exit(&rsm_drv_data.drv_lock);
753 754
754 755 if (rsm_enable_dr) {
755 756 #ifdef RSM_DRTEST
756 757 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
757 758 (void *)NULL);
758 759 #else
759 760 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
760 761 (void *)NULL);
761 762 #endif
762 763 if (ret != 0) {
763 764 mutex_exit(&rsm_drv_data.drv_lock);
764 765 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
765 766 "reconfiguration setup failed\n");
766 767 return (DDI_FAILURE);
767 768 }
768 769 }
769 770
770 771 mutex_enter(&rsm_drv_data.drv_lock);
771 772 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
772 773 rsm_drv_data.drv_state = RSM_DRV_OK;
773 774 cv_broadcast(&rsm_drv_data.drv_cv);
774 775 mutex_exit(&rsm_drv_data.drv_lock);
775 776
776 777 /*
777 778 * page_list_read_lock();
778 779 * xx_setup();
779 780 * page_list_read_unlock();
780 781 */
781 782
782 783 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
783 784 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
784 785 "segment-hashtable-size", RSM_HASHSZ);
785 786 if (rsm_hash_size == 0) {
786 787 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
787 788 "rsm: segment-hashtable-size in rsm.conf "
788 789 "must be greater than 0, defaulting to 128\n"));
789 790 rsm_hash_size = RSM_HASHSZ;
790 791 }
791 792
792 793 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
793 794 rsm_hash_size));
794 795
795 796 rsm_pgcnt = 0;
796 797
797 798 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
798 799 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
799 800 "max-exported-memory", 0);
800 801 if (percent < 0) {
801 802 DBG_PRINTF((category, RSM_ERR,
802 803 "rsm:rsm_attach not enough memory available to "
803 804 "export, or max-exported-memory set incorrectly.\n"));
804 805 return (DDI_FAILURE);
805 806 }
806 807 /* 0 indicates no fixed upper limit. maxmem is the max */
807 808 /* available pageable physical mem */
808 809 rsm_pgcnt_max = (percent*maxmem)/100;
809 810
810 811 if (rsm_pgcnt_max > 0) {
811 812 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
812 813 "rsm: Available physical memory = %lu pages, "
813 814 "Max exportable memory = %lu pages",
814 815 maxmem, rsm_pgcnt_max));
815 816 }
816 817
817 818 /*
818 819 * Create minor number
819 820 */
820 821 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
821 822 DBG_PRINTF((category, RSM_ERR,
822 823 "rsm: rsm_attach - Unable to get "
823 824 "minor number\n"));
824 825 return (DDI_FAILURE);
825 826 }
826 827
827 828 ASSERT(rnum == RSM_DRIVER_MINOR);
828 829
829 830 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
830 831 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
831 832 DBG_PRINTF((category, RSM_ERR,
832 833 "rsm: rsm_attach - unable to allocate "
833 834 "minor #\n"));
834 835 return (DDI_FAILURE);
835 836 }
836 837
837 838 rsm_dip = devi;
838 839 /*
839 840 * Allocate the hashtables
840 841 */
841 842 rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
842 843 rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
843 844
844 845 importer_list.bucket = (importing_token_t **)
845 846 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
846 847
847 848 /*
848 849 * Allocate a resource struct
849 850 */
850 851 {
851 852 rsmresource_t *p;
852 853
853 854 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
854 855
855 856 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
856 857
857 858 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
858 859 }
859 860
860 861 /*
861 862 * Based on the rsm.conf property max-segments, determine the maximum
862 863 * number of segments that can be exported/imported. This is then used
863 864 * to determine the size for barrier failure pages.
864 865 */
865 866
866 867 /* First get the max number of segments from the rsm.conf file */
867 868 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
868 869 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
869 870 "max-segments", 0);
870 871 if (max_segs == 0) {
871 872 /* Use default number of segments */
872 873 max_segs = RSM_MAX_NUM_SEG;
873 874 }
874 875
875 876 /*
876 877 * Based on the max number of segments allowed, determine the barrier
877 878 * page size. add 1 to max_segs since the barrier page itself uses
878 879 * a slot
879 880 */
880 881 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
881 882 PAGESIZE);
882 883
883 884 /*
884 885 * allocation of the barrier failure page
885 886 */
886 887 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
887 888 DDI_UMEM_SLEEP, &bar_cookie);
888 889
889 890 /*
890 891 * Set the barrier_offset
891 892 */
892 893 barrier_offset = 0;
893 894
894 895 /*
895 896 * Allocate a trash memory and get a cookie for it. This will be used
896 897 * when remapping segments during force disconnects. Allocate the
897 898 * trash memory with a large size which is page aligned.
898 899 */
899 900 (void) ddi_umem_alloc((size_t)TRASHSIZE,
900 901 DDI_UMEM_TRASH, &remap_cookie);
901 902
902 903 /* initialize user segment id allocation variable */
903 904 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
904 905
905 906 /*
906 907 * initialize the null_rsmpi_ops vector and the loopback adapter
907 908 */
908 909 rsmka_init_loopback();
909 910
910 911
911 912 ddi_report_dev(devi);
912 913
913 914 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
914 915
915 916 return (DDI_SUCCESS);
916 917 }
917 918
918 919 /*
919 920 * The call to mod_remove in the _fine routine will cause the system
920 921 * to call rsm_detach
921 922 */
922 923 /*ARGSUSED*/
923 924 static int
924 925 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
925 926 {
926 927 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
927 928
928 929 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
929 930
930 931 switch (cmd) {
931 932 case DDI_DETACH:
932 933 break;
933 934 default:
934 935 DBG_PRINTF((category, RSM_ERR,
935 936 "rsm:rsm_detach - cmd %x not supported\n",
936 937 cmd));
937 938 return (DDI_FAILURE);
938 939 }
939 940
940 941 mutex_enter(&rsm_drv_data.drv_lock);
941 942 while (rsm_drv_data.drv_state != RSM_DRV_OK)
942 943 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
943 944 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
944 945 mutex_exit(&rsm_drv_data.drv_lock);
945 946
946 947 /*
947 948 * Unregister the DR callback functions
948 949 */
949 950 if (rsm_enable_dr) {
950 951 #ifdef RSM_DRTEST
951 952 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
952 953 (void *)NULL);
953 954 #else
954 955 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
955 956 (void *)NULL);
956 957 #endif
957 958 }
958 959
959 960 mutex_enter(&rsm_drv_data.drv_lock);
960 961 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
961 962 rsm_drv_data.drv_state = RSM_DRV_NEW;
962 963 mutex_exit(&rsm_drv_data.drv_lock);
963 964
964 965 ASSERT(rsm_suspend_list.list_head == NULL);
965 966
966 967 /*
967 968 * Release all resources, seglist, controller, ...
968 969 */
969 970
970 971 /* remove intersend queues */
971 972 /* remove registered services */
972 973
973 974
974 975 ddi_remove_minor_node(dip, DRIVER_NAME);
975 976 rsm_dip = NULL;
976 977
977 978 /*
978 979 * Free minor zero resource
979 980 */
980 981 {
981 982 rsmresource_t *p;
982 983
983 984 p = rsmresource_free(RSM_DRIVER_MINOR);
984 985 if (p) {
985 986 mutex_destroy(&p->rsmrc_lock);
986 987 kmem_free((void *)p, sizeof (*p));
987 988 }
988 989 }
989 990
990 991 /*
991 992 * Free resource table
992 993 */
993 994
994 995 rsmresource_destroy();
995 996
996 997 /*
997 998 * Free the hash tables
998 999 */
999 1000 rsmhash_free(&rsm_export_segs, rsm_hash_size);
1000 1001 rsmhash_free(&rsm_import_segs, rsm_hash_size);
1001 1002
1002 1003 kmem_free((void *)importer_list.bucket,
1003 1004 rsm_hash_size * sizeof (importing_token_t *));
1004 1005 importer_list.bucket = NULL;
1005 1006
1006 1007
1007 1008 /* free barrier page */
1008 1009 if (bar_cookie != NULL) {
1009 1010 ddi_umem_free(bar_cookie);
1010 1011 }
1011 1012 bar_va = NULL;
1012 1013 bar_cookie = NULL;
1013 1014
1014 1015 /*
1015 1016 * Free the memory allocated for the trash
1016 1017 */
1017 1018 if (remap_cookie != NULL) {
1018 1019 ddi_umem_free(remap_cookie);
1019 1020 }
1020 1021 remap_cookie = NULL;
1021 1022
1022 1023 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1023 1024
1024 1025 return (DDI_SUCCESS);
1025 1026 }
1026 1027
1027 1028 /*ARGSUSED*/
1028 1029 static int
1029 1030 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1030 1031 {
1031 1032 register int error;
1032 1033 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1033 1034
1034 1035 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1035 1036
1036 1037 switch (infocmd) {
1037 1038 case DDI_INFO_DEVT2DEVINFO:
1038 1039 if (rsm_dip == NULL)
1039 1040 error = DDI_FAILURE;
1040 1041 else {
1041 1042 *result = (void *)rsm_dip;
1042 1043 error = DDI_SUCCESS;
1043 1044 }
1044 1045 break;
1045 1046 case DDI_INFO_DEVT2INSTANCE:
1046 1047 *result = (void *)0;
1047 1048 error = DDI_SUCCESS;
1048 1049 break;
1049 1050 default:
1050 1051 error = DDI_FAILURE;
1051 1052 }
1052 1053
1053 1054 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1054 1055 return (error);
1055 1056 }
1056 1057
1057 1058 adapter_t *
1058 1059 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1059 1060 {
1060 1061 adapter_t *adapter;
1061 1062 char adapter_devname[MAXNAMELEN];
1062 1063 int instance;
1063 1064 DBG_DEFINE(category,
1064 1065 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1065 1066
1066 1067 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1067 1068
1068 1069 instance = msg->cnum;
1069 1070
1070 1071 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1071 1072 return (NULL);
1072 1073 }
1073 1074
1074 1075 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1075 1076 return (NULL);
1076 1077
1077 1078 if (strcmp(adapter_devname, "loopback") == 0)
1078 1079 return (&loopback_adapter);
1079 1080
1080 1081 adapter = rsmka_lookup_adapter(adapter_devname, instance);
1081 1082
1082 1083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1083 1084
1084 1085 return (adapter);
1085 1086 }
1086 1087
1087 1088
1088 1089 /*
1089 1090 * *********************** Resource Number Management ********************
1090 1091 * All resources are stored in a simple hash table. The table is an array
1091 1092 * of pointers to resource blks. Each blk contains:
1092 1093 * base - base number of this blk
1093 1094 * used - number of used slots in this blk.
1094 1095 * blks - array of pointers to resource items.
1095 1096 * An entry in a resource blk is empty if it's NULL.
1096 1097 *
1097 1098 * We start with no resource array. Each time we run out of slots, we
1098 1099 * reallocate a new larger array and copy the pointer to the new array and
1099 1100 * a new resource blk is allocated and added to the hash table.
1100 1101 *
1101 1102 * The resource control block contains:
1102 1103 * root - array of pointer of resource blks
1103 1104 * sz - current size of array.
1104 1105 * len - last valid entry in array.
1105 1106 *
1106 1107 * A search operation based on a resource number is as follows:
1107 1108 * index = rnum / RESOURCE_BLKSZ;
1108 1109 * ASSERT(index < resource_block.len);
1109 1110 * ASSERT(index < resource_block.sz);
1110 1111 * offset = rnum % RESOURCE_BLKSZ;
1111 1112 * ASSERT(offset >= resource_block.root[index]->base);
1112 1113 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1113 1114 * return resource_block.root[index]->blks[offset];
1114 1115 *
1115 1116 * A resource blk is freed with its used count reachs zero.
1116 1117 */
1117 1118 static int
1118 1119 rsmresource_alloc(minor_t *rnum)
1119 1120 {
1120 1121
1121 1122 /* search for available resource slot */
1122 1123 int i, j, empty = -1;
1123 1124 rsmresource_blk_t *blk;
1124 1125
1125 1126 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1126 1127 "rsmresource_alloc enter\n"));
1127 1128
1128 1129 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1129 1130
1130 1131 /* Try to find an empty slot */
1131 1132 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1132 1133 blk = rsm_resource.rsmrc_root[i];
1133 1134 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1134 1135 /* found an empty slot in this blk */
1135 1136 for (j = 0; j < RSMRC_BLKSZ; j++) {
1136 1137 if (blk->rsmrcblk_blks[j] == NULL) {
1137 1138 *rnum = (minor_t)
1138 1139 (j + (i * RSMRC_BLKSZ));
1139 1140 /*
1140 1141 * obey gen page limits
1141 1142 */
1142 1143 if (*rnum >= max_segs + 1) {
1143 1144 if (empty < 0) {
1144 1145 rw_exit(&rsm_resource.
1145 1146 rsmrc_lock);
1146 1147 DBG_PRINTF((
1147 1148 RSM_KERNEL_ALL,
1148 1149 RSM_ERR,
1149 1150 "rsmresource"
1150 1151 "_alloc failed:"
1151 1152 "not enough res"
1152 1153 "%d\n", *rnum));
1153 1154 return (RSMERR_INSUFFICIENT_RESOURCES);
1154 1155 } else {
1155 1156 /* use empty slot */
1156 1157 break;
1157 1158 }
1158 1159
1159 1160 }
1160 1161
1161 1162 blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1162 1163 blk->rsmrcblk_avail--;
1163 1164 rw_exit(&rsm_resource.rsmrc_lock);
1164 1165 DBG_PRINTF((RSM_KERNEL_ALL,
1165 1166 RSM_DEBUG_VERBOSE,
1166 1167 "rsmresource_alloc done\n"));
1167 1168 return (RSM_SUCCESS);
1168 1169 }
1169 1170 }
1170 1171 } else if (blk == NULL && empty < 0) {
1171 1172 /* remember first empty slot */
1172 1173 empty = i;
1173 1174 }
1174 1175 }
1175 1176
1176 1177 /* Couldn't find anything, allocate a new blk */
1177 1178 /*
1178 1179 * Do we need to reallocate the root array
1179 1180 */
1180 1181 if (empty < 0) {
1181 1182 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1182 1183 /*
1183 1184 * Allocate new array and copy current stuff into it
1184 1185 */
1185 1186 rsmresource_blk_t **p;
1186 1187 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1187 1188 RSMRC_BLKSZ;
1188 1189 /*
1189 1190 * Don't allocate more that max valid rnum
1190 1191 */
1191 1192 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1192 1193 max_segs + 1) {
1193 1194 rw_exit(&rsm_resource.rsmrc_lock);
1194 1195 return (RSMERR_INSUFFICIENT_RESOURCES);
1195 1196 }
1196 1197
1197 1198 p = (rsmresource_blk_t **)kmem_zalloc(
1198 1199 newsz * sizeof (*p),
1199 1200 KM_SLEEP);
1200 1201
1201 1202 if (rsm_resource.rsmrc_root) {
1202 1203 uint_t oldsz;
1203 1204
1204 1205 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1205 1206 (int)sizeof (*p));
1206 1207
1207 1208 /*
1208 1209 * Copy old data into new space and
1209 1210 * free old stuff
1210 1211 */
1211 1212 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1212 1213 kmem_free(rsm_resource.rsmrc_root, oldsz);
1213 1214 }
1214 1215
1215 1216 rsm_resource.rsmrc_root = p;
1216 1217 rsm_resource.rsmrc_sz = (int)newsz;
1217 1218 }
1218 1219
1219 1220 empty = rsm_resource.rsmrc_len;
1220 1221 rsm_resource.rsmrc_len++;
1221 1222 }
1222 1223
1223 1224 /*
1224 1225 * Allocate a new blk
1225 1226 */
1226 1227 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1227 1228 ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1228 1229 rsm_resource.rsmrc_root[empty] = blk;
1229 1230 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1230 1231
1231 1232 /*
1232 1233 * Allocate slot
1233 1234 */
1234 1235
1235 1236 *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1236 1237
1237 1238 /*
1238 1239 * watch out not to exceed bounds of barrier page
1239 1240 */
1240 1241 if (*rnum >= max_segs + 1) {
1241 1242 rw_exit(&rsm_resource.rsmrc_lock);
1242 1243 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1243 1244 "rsmresource_alloc failed %d\n", *rnum));
1244 1245
1245 1246 return (RSMERR_INSUFFICIENT_RESOURCES);
1246 1247 }
1247 1248 blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1248 1249
1249 1250
1250 1251 rw_exit(&rsm_resource.rsmrc_lock);
1251 1252
1252 1253 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1253 1254 "rsmresource_alloc done\n"));
1254 1255
1255 1256 return (RSM_SUCCESS);
1256 1257 }
1257 1258
1258 1259 static rsmresource_t *
1259 1260 rsmresource_free(minor_t rnum)
1260 1261 {
1261 1262
1262 1263 /* search for available resource slot */
1263 1264 int i, j;
1264 1265 rsmresource_blk_t *blk;
1265 1266 rsmresource_t *p;
1266 1267
1267 1268 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1268 1269 "rsmresource_free enter\n"));
1269 1270
1270 1271 i = (int)(rnum / RSMRC_BLKSZ);
1271 1272 j = (int)(rnum % RSMRC_BLKSZ);
1272 1273
1273 1274 if (i >= rsm_resource.rsmrc_len) {
1274 1275 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1275 1276 "rsmresource_free done\n"));
1276 1277 return (NULL);
1277 1278 }
1278 1279
1279 1280 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1280 1281
1281 1282 ASSERT(rsm_resource.rsmrc_root);
1282 1283 ASSERT(i < rsm_resource.rsmrc_len);
1283 1284 ASSERT(i < rsm_resource.rsmrc_sz);
1284 1285 blk = rsm_resource.rsmrc_root[i];
1285 1286 if (blk == NULL) {
1286 1287 rw_exit(&rsm_resource.rsmrc_lock);
1287 1288 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1288 1289 "rsmresource_free done\n"));
1289 1290 return (NULL);
1290 1291 }
1291 1292
1292 1293 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1293 1294
1294 1295 p = blk->rsmrcblk_blks[j];
1295 1296 if (p == RSMRC_RESERVED) {
1296 1297 p = NULL;
1297 1298 }
1298 1299
1299 1300 blk->rsmrcblk_blks[j] = NULL;
1300 1301 blk->rsmrcblk_avail++;
1301 1302 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1302 1303 /* free this blk */
1303 1304 kmem_free(blk, sizeof (*blk));
1304 1305 rsm_resource.rsmrc_root[i] = NULL;
1305 1306 }
1306 1307
1307 1308 rw_exit(&rsm_resource.rsmrc_lock);
1308 1309
1309 1310 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1310 1311 "rsmresource_free done\n"));
1311 1312
1312 1313 return (p);
1313 1314 }
1314 1315
1315 1316 static rsmresource_t *
1316 1317 rsmresource_lookup(minor_t rnum, int lock)
1317 1318 {
1318 1319 int i, j;
1319 1320 rsmresource_blk_t *blk;
1320 1321 rsmresource_t *p;
1321 1322
1322 1323 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1323 1324 "rsmresource_lookup enter\n"));
1324 1325
1325 1326 /* Find resource and lock it in READER mode */
1326 1327 /* search for available resource slot */
1327 1328
1328 1329 i = (int)(rnum / RSMRC_BLKSZ);
1329 1330 j = (int)(rnum % RSMRC_BLKSZ);
1330 1331
1331 1332 if (i >= rsm_resource.rsmrc_len) {
1332 1333 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1333 1334 "rsmresource_lookup done\n"));
1334 1335 return (NULL);
1335 1336 }
1336 1337
1337 1338 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1338 1339
1339 1340 blk = rsm_resource.rsmrc_root[i];
1340 1341 if (blk != NULL) {
1341 1342 ASSERT(i < rsm_resource.rsmrc_len);
1342 1343 ASSERT(i < rsm_resource.rsmrc_sz);
1343 1344
1344 1345 p = blk->rsmrcblk_blks[j];
1345 1346 if (lock == RSM_LOCK) {
1346 1347 if (p != RSMRC_RESERVED) {
1347 1348 mutex_enter(&p->rsmrc_lock);
1348 1349 } else {
1349 1350 p = NULL;
1350 1351 }
1351 1352 }
1352 1353 } else {
1353 1354 p = NULL;
1354 1355 }
1355 1356 rw_exit(&rsm_resource.rsmrc_lock);
1356 1357
1357 1358 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1358 1359 "rsmresource_lookup done\n"));
1359 1360
1360 1361 return (p);
1361 1362 }
1362 1363
1363 1364 static void
1364 1365 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1365 1366 {
1366 1367 /* Find resource and lock it in READER mode */
1367 1368 /* Caller can upgrade if need be */
1368 1369 /* search for available resource slot */
1369 1370 int i, j;
1370 1371 rsmresource_blk_t *blk;
1371 1372
1372 1373 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1373 1374 "rsmresource_insert enter\n"));
1374 1375
1375 1376 i = (int)(rnum / RSMRC_BLKSZ);
1376 1377 j = (int)(rnum % RSMRC_BLKSZ);
1377 1378
1378 1379 p->rsmrc_type = type;
1379 1380 p->rsmrc_num = rnum;
1380 1381
1381 1382 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1382 1383
1383 1384 ASSERT(rsm_resource.rsmrc_root);
1384 1385 ASSERT(i < rsm_resource.rsmrc_len);
1385 1386 ASSERT(i < rsm_resource.rsmrc_sz);
1386 1387
1387 1388 blk = rsm_resource.rsmrc_root[i];
1388 1389 ASSERT(blk);
1389 1390
1390 1391 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1391 1392
1392 1393 blk->rsmrcblk_blks[j] = p;
1393 1394
1394 1395 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1395 1396 "rsmresource_insert done\n"));
1396 1397
1397 1398 rw_exit(&rsm_resource.rsmrc_lock);
1398 1399 }
1399 1400
1400 1401 static void
1401 1402 rsmresource_destroy()
1402 1403 {
1403 1404 int i, j;
1404 1405
1405 1406 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1406 1407 "rsmresource_destroy enter\n"));
1407 1408
1408 1409 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1409 1410
1410 1411 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1411 1412 rsmresource_blk_t *blk;
1412 1413
1413 1414 blk = rsm_resource.rsmrc_root[i];
1414 1415 if (blk == NULL) {
1415 1416 continue;
1416 1417 }
1417 1418 for (j = 0; j < RSMRC_BLKSZ; j++) {
1418 1419 if (blk->rsmrcblk_blks[j] != NULL) {
1419 1420 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1420 1421 "Not null slot %d, %lx\n", j,
1421 1422 (size_t)blk->rsmrcblk_blks[j]));
1422 1423 }
1423 1424 }
1424 1425 kmem_free(blk, sizeof (*blk));
1425 1426 rsm_resource.rsmrc_root[i] = NULL;
1426 1427 }
1427 1428 if (rsm_resource.rsmrc_root) {
1428 1429 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1429 1430 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1430 1431 rsm_resource.rsmrc_root = NULL;
1431 1432 rsm_resource.rsmrc_len = 0;
1432 1433 rsm_resource.rsmrc_sz = 0;
1433 1434 }
1434 1435
1435 1436 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1436 1437 "rsmresource_destroy done\n"));
1437 1438
1438 1439 rw_exit(&rsm_resource.rsmrc_lock);
1439 1440 }
1440 1441
1441 1442
1442 1443 /* ******************** Generic Key Hash Table Management ********* */
1443 1444 static rsmresource_t *
1444 1445 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1445 1446 rsm_resource_state_t state)
1446 1447 {
1447 1448 rsmresource_t *p;
1448 1449 uint_t hashval;
1449 1450 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1450 1451
1451 1452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1452 1453
1453 1454 hashval = rsmhash(key);
1454 1455
1455 1456 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1456 1457 key, hashval));
1457 1458
1458 1459 rw_enter(&rhash->rsmhash_rw, RW_READER);
1459 1460
1460 1461 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1461 1462
1462 1463 for (; p; p = p->rsmrc_next) {
1463 1464 if (p->rsmrc_key == key) {
1464 1465 /* acquire resource lock */
1465 1466 RSMRC_LOCK(p);
1466 1467 break;
1467 1468 }
1468 1469 }
1469 1470
1470 1471 rw_exit(&rhash->rsmhash_rw);
1471 1472
1472 1473 if (p != NULL && p->rsmrc_state != state) {
1473 1474 /* state changed, release lock and return null */
1474 1475 RSMRC_UNLOCK(p);
1475 1476 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1476 1477 "rsmhash_lookup done: state changed\n"));
1477 1478 return (NULL);
1478 1479 }
1479 1480
1480 1481 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1481 1482
1482 1483 return (p);
1483 1484 }
1484 1485
1485 1486 static void
1486 1487 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1487 1488 {
1488 1489 rsmresource_t *p, **back;
1489 1490 uint_t hashval;
1490 1491 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1491 1492
1492 1493 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1493 1494
1494 1495 hashval = rsmhash(rcelm->rsmrc_key);
1495 1496
1496 1497 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1497 1498 rcelm->rsmrc_key, hashval));
1498 1499
1499 1500 /*
1500 1501 * It's ok not to find the segment.
1501 1502 */
1502 1503 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1503 1504
1504 1505 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1505 1506
1506 1507 for (; (p = *back) != NULL; back = &p->rsmrc_next) {
1507 1508 if (p == rcelm) {
1508 1509 *back = rcelm->rsmrc_next;
1509 1510 break;
1510 1511 }
1511 1512 }
1512 1513
1513 1514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1514 1515
1515 1516 rw_exit(&rhash->rsmhash_rw);
1516 1517 }
1517 1518
1518 1519 static int
1519 1520 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1520 1521 int dup_check, rsm_resource_state_t state)
1521 1522 {
1522 1523 rsmresource_t *p = NULL, **bktp;
1523 1524 uint_t hashval;
1524 1525 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1525 1526
1526 1527 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1527 1528
1528 1529 /* lock table */
1529 1530 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1530 1531
1531 1532 /*
1532 1533 * If the current resource state is other than the state passed in
1533 1534 * then the resource is (probably) already on the list. eg. for an
1534 1535 * import segment if the state is not RSM_STATE_NEW then it's on the
1535 1536 * list already.
1536 1537 */
1537 1538 RSMRC_LOCK(new);
1538 1539 if (new->rsmrc_state != state) {
1539 1540 RSMRC_UNLOCK(new);
1540 1541 rw_exit(&rhash->rsmhash_rw);
1541 1542 return (RSMERR_BAD_SEG_HNDL);
1542 1543 }
1543 1544
1544 1545 hashval = rsmhash(key);
1545 1546 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1546 1547
1547 1548 if (dup_check) {
1548 1549 /*
1549 1550 * Used for checking export segments; don't want to have
1550 1551 * the same key used for multiple segments.
1551 1552 */
1552 1553
1553 1554 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1554 1555
1555 1556 for (; p; p = p->rsmrc_next) {
1556 1557 if (p->rsmrc_key == key) {
1557 1558 RSMRC_UNLOCK(new);
1558 1559 break;
1559 1560 }
1560 1561 }
1561 1562 }
1562 1563
1563 1564 if (p == NULL) {
1564 1565 /* Key doesn't exist, add it */
1565 1566
1566 1567 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1567 1568
1568 1569 new->rsmrc_key = key;
1569 1570 new->rsmrc_next = *bktp;
1570 1571 *bktp = new;
1571 1572 }
1572 1573
1573 1574 rw_exit(&rhash->rsmhash_rw);
1574 1575
1575 1576 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1576 1577
1577 1578 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1578 1579 }
1579 1580
1580 1581 /*
1581 1582 * XOR each byte of the key.
1582 1583 */
1583 1584 static uint_t
1584 1585 rsmhash(rsm_memseg_id_t key)
1585 1586 {
1586 1587 uint_t hash = key;
1587 1588
1588 1589 hash ^= (key >> 8);
1589 1590 hash ^= (key >> 16);
1590 1591 hash ^= (key >> 24);
1591 1592
1592 1593 return (hash % rsm_hash_size);
1593 1594
1594 1595 }
1595 1596
1596 1597 /*
1597 1598 * generic function to get a specific bucket
1598 1599 */
1599 1600 static void *
1600 1601 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1601 1602 {
1602 1603
1603 1604 if (rhash->bucket == NULL)
1604 1605 return (NULL);
1605 1606 else
1606 1607 return ((void *)rhash->bucket[hashval]);
1607 1608 }
1608 1609
1609 1610 /*
1610 1611 * generic function to get a specific bucket's address
1611 1612 */
1612 1613 static void **
1613 1614 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1614 1615 {
1615 1616 if (rhash->bucket == NULL)
1616 1617 return (NULL);
1617 1618 else
1618 1619 return ((void **)&(rhash->bucket[hashval]));
1619 1620 }
1620 1621
1621 1622 /*
1622 1623 * generic function to alloc a hash table
1623 1624 */
1624 1625 static void
1625 1626 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1626 1627 {
1627 1628 rhash->bucket = (rsmresource_t **)
1628 1629 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1629 1630 }
1630 1631
1631 1632 /*
1632 1633 * generic function to free a hash table
1633 1634 */
1634 1635 static void
1635 1636 rsmhash_free(rsmhash_table_t *rhash, int size)
1636 1637 {
1637 1638
1638 1639 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1639 1640 rhash->bucket = NULL;
1640 1641
1641 1642 }
1642 1643 /* *********************** Exported Segment Key Management ************ */
1643 1644
1644 1645 #define rsmexport_add(new, key) \
1645 1646 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1646 1647 RSM_STATE_BIND)
1647 1648
1648 1649 #define rsmexport_rm(arg) \
1649 1650 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1650 1651
1651 1652 #define rsmexport_lookup(key) \
1652 1653 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1653 1654
1654 1655 /* ************************** Import Segment List Management ********** */
1655 1656
1656 1657 /*
1657 1658 * Add segment to import list. This will be useful for paging and loopback
1658 1659 * segment unloading.
1659 1660 */
1660 1661 #define rsmimport_add(arg, key) \
1661 1662 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1662 1663 RSM_STATE_NEW)
1663 1664
1664 1665 #define rsmimport_rm(arg) \
1665 1666 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1666 1667
1667 1668 /*
1668 1669 * #define rsmimport_lookup(key) \
1669 1670 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1670 1671 */
1671 1672
1672 1673 /*
1673 1674 * increase the ref count and make the import segment point to the
1674 1675 * shared data structure. Return a pointer to the share data struct
1675 1676 * and the shared data struct is locked upon return
1676 1677 */
1677 1678 static rsm_import_share_t *
1678 1679 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1679 1680 rsmseg_t *segp)
1680 1681 {
1681 1682 uint_t hash;
1682 1683 rsmresource_t *p;
1683 1684 rsm_import_share_t *shdatap;
1684 1685 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1685 1686
1686 1687 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1687 1688
1688 1689 hash = rsmhash(key);
1689 1690 /* lock table */
1690 1691 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1691 1692 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1692 1693 key, hash));
1693 1694
1694 1695 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1695 1696
1696 1697 for (; p; p = p->rsmrc_next) {
1697 1698 /*
1698 1699 * Look for an entry that is importing the same exporter
1699 1700 * with the share data structure allocated.
1700 1701 */
1701 1702 if ((p->rsmrc_key == key) &&
1702 1703 (p->rsmrc_node == node) &&
1703 1704 (p->rsmrc_adapter == adapter) &&
1704 1705 (((rsmseg_t *)p)->s_share != NULL)) {
1705 1706 shdatap = ((rsmseg_t *)p)->s_share;
1706 1707 break;
1707 1708 }
1708 1709 }
1709 1710
1710 1711 if (p == NULL) {
1711 1712 /* we are the first importer, create the shared data struct */
1712 1713 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1713 1714 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1714 1715 shdatap->rsmsi_segid = key;
1715 1716 shdatap->rsmsi_node = node;
1716 1717 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1717 1718 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1718 1719 }
1719 1720
1720 1721 rsmseglock_acquire(segp);
1721 1722
1722 1723 /* we grab the shared lock before returning from this function */
1723 1724 mutex_enter(&shdatap->rsmsi_lock);
1724 1725
1725 1726 shdatap->rsmsi_refcnt++;
1726 1727 segp->s_share = shdatap;
1727 1728
1728 1729 rsmseglock_release(segp);
1729 1730
1730 1731 rw_exit(&rsm_import_segs.rsmhash_rw);
1731 1732
1732 1733 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1733 1734
1734 1735 return (shdatap);
1735 1736 }
1736 1737
1737 1738 /*
1738 1739 * the shared data structure should be locked before calling
1739 1740 * rsmsharecv_signal().
1740 1741 * Change the state and signal any waiting segments.
1741 1742 */
1742 1743 void
1743 1744 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1744 1745 {
1745 1746 ASSERT(rsmsharelock_held(seg));
1746 1747
1747 1748 if (seg->s_share->rsmsi_state == oldstate) {
1748 1749 seg->s_share->rsmsi_state = newstate;
1749 1750 cv_broadcast(&seg->s_share->rsmsi_cv);
1750 1751 }
1751 1752 }
1752 1753
1753 1754 /*
1754 1755 * Add to the hash table
1755 1756 */
1756 1757 static void
1757 1758 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1758 1759 void *cookie)
1759 1760 {
1760 1761
1761 1762 importing_token_t *head;
1762 1763 importing_token_t *new_token;
1763 1764 int index;
1764 1765
1765 1766 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1766 1767
1767 1768 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1768 1769
1769 1770 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1770 1771 new_token->importing_node = node;
1771 1772 new_token->key = key;
1772 1773 new_token->import_segment_cookie = cookie;
1773 1774 new_token->importing_adapter_hwaddr = hwaddr;
1774 1775
1775 1776 index = rsmhash(key);
1776 1777
1777 1778 mutex_enter(&importer_list.lock);
1778 1779
1779 1780 head = importer_list.bucket[index];
1780 1781 importer_list.bucket[index] = new_token;
1781 1782 new_token->next = head;
1782 1783 mutex_exit(&importer_list.lock);
1783 1784
1784 1785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1785 1786 }
1786 1787
1787 1788 static void
1788 1789 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie)
1789 1790 {
1790 1791
1791 1792 importing_token_t *prev, *token = NULL;
1792 1793 int index;
1793 1794 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1794 1795
1795 1796 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1796 1797
1797 1798 index = rsmhash(key);
1798 1799
1799 1800 mutex_enter(&importer_list.lock);
1800 1801
1801 1802 token = importer_list.bucket[index];
1802 1803
1803 1804 prev = token;
1804 1805 while (token != NULL) {
1805 1806 if (token->importing_node == node &&
1806 1807 token->import_segment_cookie == cookie) {
1807 1808 if (prev == token)
1808 1809 importer_list.bucket[index] = token->next;
1809 1810 else
1810 1811 prev->next = token->next;
1811 1812 kmem_free((void *)token, sizeof (*token));
1812 1813 break;
1813 1814 } else {
1814 1815 prev = token;
1815 1816 token = token->next;
1816 1817 }
1817 1818 }
1818 1819
1819 1820 mutex_exit(&importer_list.lock);
1820 1821
1821 1822 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1822 1823
1823 1824
1824 1825 }
1825 1826
1826 1827 /* **************************Segment Structure Management ************* */
1827 1828
1828 1829 /*
1829 1830 * Free segment structure
1830 1831 */
1831 1832 static void
1832 1833 rsmseg_free(rsmseg_t *seg)
1833 1834 {
1834 1835
1835 1836 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1836 1837
1837 1838 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1838 1839
1839 1840 /* need to take seglock here to avoid race with rsmmap_unmap() */
1840 1841 rsmseglock_acquire(seg);
1841 1842 if (seg->s_ckl != NULL) {
1842 1843 /* Segment is still busy */
1843 1844 seg->s_state = RSM_STATE_END;
1844 1845 rsmseglock_release(seg);
1845 1846 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1846 1847 "rsmseg_free done\n"));
1847 1848 return;
1848 1849 }
1849 1850
1850 1851 rsmseglock_release(seg);
1851 1852
1852 1853 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1853 1854
1854 1855 /*
1855 1856 * If it's an importer decrement the refcount
1856 1857 * and if its down to zero free the shared data structure.
1857 1858 * This is where failures during rsm_connect() are unrefcounted
1858 1859 */
1859 1860 if (seg->s_share != NULL) {
1860 1861
1861 1862 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1862 1863
1863 1864 rsmsharelock_acquire(seg);
1864 1865
1865 1866 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1866 1867
1867 1868 seg->s_share->rsmsi_refcnt--;
1868 1869
1869 1870 if (seg->s_share->rsmsi_refcnt == 0) {
1870 1871 rsmsharelock_release(seg);
1871 1872 mutex_destroy(&seg->s_share->rsmsi_lock);
1872 1873 cv_destroy(&seg->s_share->rsmsi_cv);
1873 1874 kmem_free((void *)(seg->s_share),
1874 1875 sizeof (rsm_import_share_t));
1875 1876 } else {
1876 1877 rsmsharelock_release(seg);
1877 1878 }
1878 1879 /*
1879 1880 * The following needs to be done after any
1880 1881 * rsmsharelock calls which use seg->s_share.
1881 1882 */
1882 1883 seg->s_share = NULL;
1883 1884 }
1884 1885
1885 1886 cv_destroy(&seg->s_cv);
1886 1887 mutex_destroy(&seg->s_lock);
1887 1888 rsmacl_free(seg->s_acl, seg->s_acl_len);
1888 1889 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1889 1890 if (seg->s_adapter)
1890 1891 rsmka_release_adapter(seg->s_adapter);
1891 1892
1892 1893 kmem_free((void *)seg, sizeof (*seg));
1893 1894
1894 1895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1895 1896
1896 1897 }
1897 1898
1898 1899
1899 1900 static rsmseg_t *
1900 1901 rsmseg_alloc(minor_t num, struct cred *cred)
1901 1902 {
1902 1903 rsmseg_t *new;
1903 1904 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1904 1905
1905 1906 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1906 1907 /*
1907 1908 * allocate memory for new segment. This should be a segkmem cache.
1908 1909 */
1909 1910 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1910 1911
1911 1912 new->s_state = RSM_STATE_NEW;
1912 1913 new->s_minor = num;
1913 1914 new->s_acl_len = 0;
1914 1915 new->s_cookie = NULL;
1915 1916 new->s_adapter = NULL;
1916 1917
1917 1918 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1918 1919 /* we don't have a key yet, will set at export/connect */
1919 1920 new->s_uid = crgetuid(cred);
1920 1921 new->s_gid = crgetgid(cred);
1921 1922
1922 1923 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1923 1924 cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1924 1925
1925 1926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1926 1927
1927 1928 return (new);
1928 1929 }
1929 1930
1930 1931 /* ******************************** Driver Open/Close/Poll *************** */
1931 1932
1932 1933 /*ARGSUSED1*/
1933 1934 static int
1934 1935 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1935 1936 {
1936 1937 minor_t rnum;
1937 1938 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1938 1939
1939 1940 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1940 1941 /*
1941 1942 * Char only
1942 1943 */
1943 1944 if (otyp != OTYP_CHR) {
1944 1945 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1945 1946 return (EINVAL);
1946 1947 }
1947 1948
1948 1949 /*
1949 1950 * Only zero can be opened, clones are used for resources.
1950 1951 */
1951 1952 if (getminor(*devp) != RSM_DRIVER_MINOR) {
1952 1953 DBG_PRINTF((category, RSM_ERR,
1953 1954 "rsm_open: bad minor %d\n", getminor(*devp)));
1954 1955 return (ENODEV);
1955 1956 }
1956 1957
1957 1958 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1958 1959 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1959 1960 return (EPERM);
1960 1961 }
1961 1962
1962 1963 if (!(flag & FWRITE)) {
1963 1964 /*
1964 1965 * The library function _rsm_librsm_init calls open for
1965 1966 * /dev/rsm with flag set to O_RDONLY. We want a valid
1966 1967 * file descriptor to be returned for minor device zero.
1967 1968 */
1968 1969
1969 1970 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1970 1971 "rsm_open RDONLY done\n"));
1971 1972 return (DDI_SUCCESS);
1972 1973 }
1973 1974
1974 1975 /*
1975 1976 * - allocate new minor number and segment.
1976 1977 * - add segment to list of all segments.
1977 1978 * - set minordev data to segment
1978 1979 * - update devp argument to new device
1979 1980 * - update s_cred to cred; make sure you do crhold(cred);
1980 1981 */
1981 1982
1982 1983 /* allocate a new resource number */
1983 1984 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1984 1985 /*
1985 1986 * We will bind this minor to a specific resource in first
1986 1987 * ioctl
1987 1988 */
1988 1989 *devp = makedevice(getmajor(*devp), rnum);
1989 1990 } else {
1990 1991 return (EAGAIN);
1991 1992 }
1992 1993
1993 1994 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1994 1995 return (DDI_SUCCESS);
1995 1996 }
1996 1997
1997 1998 static void
1998 1999 rsmseg_close(rsmseg_t *seg, int force_flag)
1999 2000 {
2000 2001 int e = RSM_SUCCESS;
2001 2002
2002 2003 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2003 2004
2004 2005 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2005 2006
2006 2007 rsmseglock_acquire(seg);
2007 2008 if (!force_flag && (seg->s_hdr.rsmrc_type ==
2008 2009 RSM_RESOURCE_EXPORT_SEGMENT)) {
2009 2010 /*
2010 2011 * If we are processing rsm_close wait for force_destroy
2011 2012 * processing to complete since force_destroy processing
2012 2013 * needs to finish first before we can free the segment.
2013 2014 * force_destroy is only for export segments
2014 2015 */
2015 2016 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2016 2017 cv_wait(&seg->s_cv, &seg->s_lock);
2017 2018 }
2018 2019 }
2019 2020 rsmseglock_release(seg);
2020 2021
2021 2022 /* It's ok to read the state without a lock */
2022 2023 switch (seg->s_state) {
2023 2024 case RSM_STATE_EXPORT:
2024 2025 case RSM_STATE_EXPORT_QUIESCING:
2025 2026 case RSM_STATE_EXPORT_QUIESCED:
2026 2027 e = rsm_unpublish(seg, 1);
2027 2028 /* FALLTHRU */
2028 2029 case RSM_STATE_BIND_QUIESCED:
2029 2030 /* FALLTHRU */
2030 2031 case RSM_STATE_BIND:
2031 2032 e = rsm_unbind(seg);
2032 2033 if (e != RSM_SUCCESS && force_flag == 1)
2033 2034 return;
2034 2035 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2035 2036 /* FALLTHRU */
2036 2037 case RSM_STATE_NEW_QUIESCED:
2037 2038 rsmseglock_acquire(seg);
2038 2039 seg->s_state = RSM_STATE_NEW;
2039 2040 cv_broadcast(&seg->s_cv);
2040 2041 rsmseglock_release(seg);
2041 2042 break;
2042 2043 case RSM_STATE_NEW:
2043 2044 break;
2044 2045 case RSM_STATE_ZOMBIE:
2045 2046 /*
2046 2047 * Segments in this state have been removed off the
2047 2048 * exported segments list and have been unpublished
2048 2049 * and unbind. These segments have been removed during
2049 2050 * a callback to the rsm_export_force_destroy, which
2050 2051 * is called for the purpose of unlocking these
2051 2052 * exported memory segments when a process exits but
2052 2053 * leaves the segments locked down since rsm_close is
2053 2054 * is not called for the segments. This can happen
2054 2055 * when a process calls fork or exec and then exits.
2055 2056 * Once the segments are in the ZOMBIE state, all that
2056 2057 * remains is to destroy them when rsm_close is called.
2057 2058 * This is done here. Thus, for such segments the
2058 2059 * the state is changed to new so that later in this
2059 2060 * function rsmseg_free is called.
2060 2061 */
2061 2062 rsmseglock_acquire(seg);
2062 2063 seg->s_state = RSM_STATE_NEW;
2063 2064 rsmseglock_release(seg);
2064 2065 break;
2065 2066 case RSM_STATE_MAP_QUIESCE:
2066 2067 case RSM_STATE_ACTIVE:
2067 2068 /* Disconnect will handle the unmap */
2068 2069 case RSM_STATE_CONN_QUIESCE:
2069 2070 case RSM_STATE_CONNECT:
2070 2071 case RSM_STATE_DISCONNECT:
2071 2072 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2072 2073 (void) rsm_disconnect(seg);
2073 2074 break;
2074 2075 case RSM_STATE_MAPPING:
2075 2076 /*FALLTHRU*/
2076 2077 case RSM_STATE_END:
2077 2078 DBG_PRINTF((category, RSM_ERR,
2078 2079 "Invalid segment state %d in rsm_close\n", seg->s_state));
2079 2080 break;
2080 2081 default:
2081 2082 DBG_PRINTF((category, RSM_ERR,
2082 2083 "Invalid segment state %d in rsm_close\n", seg->s_state));
2083 2084 break;
2084 2085 }
2085 2086
2086 2087 /*
2087 2088 * check state.
2088 2089 * - make sure you do crfree(s_cred);
2089 2090 * release segment and minor number
2090 2091 */
2091 2092 ASSERT(seg->s_state == RSM_STATE_NEW);
2092 2093
2093 2094 /*
2094 2095 * The export_force_destroy callback is created to unlock
2095 2096 * the exported segments of a process
2096 2097 * when the process does a fork or exec and then exits calls this
2097 2098 * function with the force flag set to 1 which indicates that the
2098 2099 * segment state must be converted to ZOMBIE. This state means that the
2099 2100 * segments still exist and have been unlocked and most importantly the
2100 2101 * only operation allowed is to destroy them on an rsm_close.
2101 2102 */
2102 2103 if (force_flag) {
2103 2104 rsmseglock_acquire(seg);
2104 2105 seg->s_state = RSM_STATE_ZOMBIE;
2105 2106 rsmseglock_release(seg);
2106 2107 } else {
2107 2108 rsmseg_free(seg);
2108 2109 }
2109 2110
2110 2111 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2111 2112 }
2112 2113
2113 2114 static int
2114 2115 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2115 2116 {
2116 2117 minor_t rnum = getminor(dev);
2117 2118 rsmresource_t *res;
2118 2119 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2119 2120
2120 2121 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2121 2122
2122 2123 flag = flag; cred = cred;
2123 2124
2124 2125 if (otyp != OTYP_CHR)
2125 2126 return (EINVAL);
2126 2127
2127 2128 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2128 2129
2129 2130 /*
2130 2131 * At this point we are the last reference to the resource.
2131 2132 * Free resource number from resource table.
2132 2133 * It's ok to remove number before we free the segment.
2133 2134 * We need to lock the resource to protect against remote calls.
2134 2135 */
2135 2136 if (rnum == RSM_DRIVER_MINOR ||
2136 2137 (res = rsmresource_free(rnum)) == NULL) {
2137 2138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2138 2139 return (DDI_SUCCESS);
2139 2140 }
2140 2141
2141 2142 switch (res->rsmrc_type) {
2142 2143 case RSM_RESOURCE_EXPORT_SEGMENT:
2143 2144 case RSM_RESOURCE_IMPORT_SEGMENT:
2144 2145 rsmseg_close((rsmseg_t *)res, 0);
2145 2146 break;
2146 2147 case RSM_RESOURCE_BAR:
2147 2148 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2148 2149 break;
2149 2150 default:
2150 2151 break;
2151 2152 }
2152 2153
2153 2154 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2154 2155
2155 2156 return (DDI_SUCCESS);
2156 2157 }
2157 2158
2158 2159 /*
2159 2160 * rsm_inc_pgcnt
2160 2161 *
2161 2162 * Description: increment rsm page counter.
2162 2163 *
2163 2164 * Parameters: pgcnt_t pnum; number of pages to be used
2164 2165 *
2165 2166 * Returns: RSM_SUCCESS if memory limit not exceeded
2166 2167 * ENOSPC if memory limit exceeded. In this case, the
2167 2168 * page counter remains unchanged.
2168 2169 *
2169 2170 */
2170 2171 static int
2171 2172 rsm_inc_pgcnt(pgcnt_t pnum)
2172 2173 {
2173 2174 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2174 2175 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2175 2176 return (RSM_SUCCESS);
2176 2177 }
2177 2178
2178 2179 mutex_enter(&rsm_pgcnt_lock);
2179 2180
2180 2181 if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2181 2182 /* ensure that limits have not been exceeded */
2182 2183 mutex_exit(&rsm_pgcnt_lock);
2183 2184 return (RSMERR_INSUFFICIENT_MEM);
2184 2185 }
2185 2186
2186 2187 rsm_pgcnt += pnum;
2187 2188 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2188 2189 rsm_pgcnt));
2189 2190 mutex_exit(&rsm_pgcnt_lock);
2190 2191
2191 2192 return (RSM_SUCCESS);
2192 2193 }
2193 2194
2194 2195 /*
2195 2196 * rsm_dec_pgcnt
2196 2197 *
2197 2198 * Description: decrement rsm page counter.
2198 2199 *
2199 2200 * Parameters: pgcnt_t pnum; number of pages freed
2200 2201 *
2201 2202 */
2202 2203 static void
2203 2204 rsm_dec_pgcnt(pgcnt_t pnum)
2204 2205 {
2205 2206 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2206 2207
2207 2208 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2208 2209 return;
2209 2210 }
2210 2211
2211 2212 mutex_enter(&rsm_pgcnt_lock);
2212 2213 ASSERT(rsm_pgcnt >= pnum);
2213 2214 rsm_pgcnt -= pnum;
2214 2215 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2215 2216 rsm_pgcnt));
2216 2217 mutex_exit(&rsm_pgcnt_lock);
2217 2218 }
2218 2219
2219 2220 static struct umem_callback_ops rsm_as_ops = {
2220 2221 UMEM_CALLBACK_VERSION, /* version number */
2221 2222 rsm_export_force_destroy,
2222 2223 };
2223 2224
2224 2225 static int
2225 2226 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2226 2227 proc_t *procp)
2227 2228 {
2228 2229 int error = RSM_SUCCESS;
2229 2230 ulong_t pnum;
2230 2231 struct umem_callback_ops *callbackops = &rsm_as_ops;
2231 2232
2232 2233 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2233 2234
2234 2235 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2235 2236
2236 2237 /*
2237 2238 * Make sure vaddr and len are aligned on a page boundary
2238 2239 */
2239 2240 if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2240 2241 return (RSMERR_BAD_ADDR);
2241 2242 }
2242 2243
2243 2244 if (len & (PAGESIZE - 1)) {
2244 2245 return (RSMERR_BAD_LENGTH);
2245 2246 }
2246 2247
2247 2248 /*
2248 2249 * Find number of pages
2249 2250 */
2250 2251 pnum = btopr(len);
2251 2252 error = rsm_inc_pgcnt(pnum);
2252 2253 if (error != RSM_SUCCESS) {
2253 2254 DBG_PRINTF((category, RSM_ERR,
2254 2255 "rsm_bind_pages:mem limit exceeded\n"));
2255 2256 return (RSMERR_INSUFFICIENT_MEM);
2256 2257 }
2257 2258
2258 2259 error = umem_lockmemory(vaddr, len,
2259 2260 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2260 2261 cookie,
2261 2262 callbackops, procp);
2262 2263
2263 2264 if (error) {
2264 2265 rsm_dec_pgcnt(pnum);
2265 2266 DBG_PRINTF((category, RSM_ERR,
2266 2267 "rsm_bind_pages:ddi_umem_lock failed\n"));
2267 2268 /*
2268 2269 * ddi_umem_lock, in the case of failure, returns one of
2269 2270 * the following three errors. These are translated into
2270 2271 * the RSMERR namespace and returned.
2271 2272 */
2272 2273 if (error == EFAULT)
2273 2274 return (RSMERR_BAD_ADDR);
2274 2275 else if (error == EACCES)
2275 2276 return (RSMERR_PERM_DENIED);
2276 2277 else
2277 2278 return (RSMERR_INSUFFICIENT_MEM);
2278 2279 }
2279 2280
2280 2281 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2281 2282
2282 2283 return (error);
2283 2284
2284 2285 }
2285 2286
2286 2287 static int
2287 2288 rsm_unbind_pages(rsmseg_t *seg)
2288 2289 {
2289 2290 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2290 2291
2291 2292 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2292 2293
2293 2294 ASSERT(rsmseglock_held(seg));
2294 2295
2295 2296 if (seg->s_cookie != NULL) {
2296 2297 /* unlock address range */
2297 2298 ddi_umem_unlock(seg->s_cookie);
2298 2299 rsm_dec_pgcnt(btopr(seg->s_len));
2299 2300 seg->s_cookie = NULL;
2300 2301 }
2301 2302
2302 2303 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2303 2304
2304 2305 return (RSM_SUCCESS);
2305 2306 }
2306 2307
2307 2308
2308 2309 static int
2309 2310 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2310 2311 {
2311 2312 int e;
2312 2313 adapter_t *adapter;
2313 2314 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2314 2315
2315 2316 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2316 2317
2317 2318 adapter = rsm_getadapter(msg, mode);
2318 2319 if (adapter == NULL) {
2319 2320 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2320 2321 "rsm_bind done:no adapter\n"));
2321 2322 return (RSMERR_CTLR_NOT_PRESENT);
2322 2323 }
2323 2324
2324 2325 /* lock address range */
2325 2326 if (msg->vaddr == NULL) {
2326 2327 rsmka_release_adapter(adapter);
2327 2328 DBG_PRINTF((category, RSM_ERR,
2328 2329 "rsm: rsm_bind done: invalid vaddr\n"));
2329 2330 return (RSMERR_BAD_ADDR);
2330 2331 }
2331 2332 if (msg->len <= 0) {
2332 2333 rsmka_release_adapter(adapter);
2333 2334 DBG_PRINTF((category, RSM_ERR,
2334 2335 "rsm_bind: invalid length\n"));
2335 2336 return (RSMERR_BAD_LENGTH);
2336 2337 }
2337 2338
2338 2339 /* Lock segment */
2339 2340 rsmseglock_acquire(seg);
2340 2341
2341 2342 while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2342 2343 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2343 2344 DBG_PRINTF((category, RSM_DEBUG,
2344 2345 "rsm_bind done: cv_wait INTERRUPTED"));
2345 2346 rsmka_release_adapter(adapter);
2346 2347 rsmseglock_release(seg);
2347 2348 return (RSMERR_INTERRUPTED);
2348 2349 }
2349 2350 }
2350 2351
2351 2352 ASSERT(seg->s_state == RSM_STATE_NEW);
2352 2353
2353 2354 ASSERT(seg->s_cookie == NULL);
2354 2355
2355 2356 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2356 2357 if (e == RSM_SUCCESS) {
2357 2358 seg->s_flags |= RSM_USER_MEMORY;
2358 2359 if (msg->perm & RSM_ALLOW_REBIND) {
2359 2360 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2360 2361 }
2361 2362 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2362 2363 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2363 2364 }
2364 2365 seg->s_region.r_vaddr = msg->vaddr;
2365 2366 /*
2366 2367 * Set the s_pid value in the segment structure. This is used
2367 2368 * to identify exported segments belonging to a particular
2368 2369 * process so that when the process exits, these segments can
2369 2370 * be unlocked forcefully even if rsm_close is not called on
2370 2371 * process exit since there maybe other processes referencing
2371 2372 * them (for example on a fork or exec).
2372 2373 * The s_pid value is also used to authenticate the process
2373 2374 * doing a publish or unpublish on the export segment. Only
2374 2375 * the creator of the export segment has a right to do a
2375 2376 * publish or unpublish and unbind on the segment.
2376 2377 */
2377 2378 seg->s_pid = ddi_get_pid();
2378 2379 seg->s_len = msg->len;
2379 2380 seg->s_state = RSM_STATE_BIND;
2380 2381 seg->s_adapter = adapter;
2381 2382 seg->s_proc = curproc;
2382 2383 } else {
2383 2384 rsmka_release_adapter(adapter);
2384 2385 DBG_PRINTF((category, RSM_WARNING,
2385 2386 "unable to lock down pages\n"));
2386 2387 }
2387 2388
2388 2389 msg->rnum = seg->s_minor;
2389 2390 /* Unlock segment */
2390 2391 rsmseglock_release(seg);
2391 2392
2392 2393 if (e == RSM_SUCCESS) {
2393 2394 /* copyout the resource number */
2394 2395 #ifdef _MULTI_DATAMODEL
2395 2396 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2396 2397 rsm_ioctlmsg32_t msg32;
2397 2398
2398 2399 msg32.rnum = msg->rnum;
2399 2400 if (ddi_copyout((caddr_t)&msg32.rnum,
2400 2401 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2401 2402 sizeof (minor_t), mode)) {
2402 2403 rsmka_release_adapter(adapter);
2403 2404 e = RSMERR_BAD_ADDR;
2404 2405 }
2405 2406 }
2406 2407 #endif
2407 2408 if (ddi_copyout((caddr_t)&msg->rnum,
2408 2409 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2409 2410 sizeof (minor_t), mode)) {
2410 2411 rsmka_release_adapter(adapter);
2411 2412 e = RSMERR_BAD_ADDR;
|
↓ open down ↓ |
2376 lines elided |
↑ open up ↑ |
2412 2413 }
2413 2414 }
2414 2415
2415 2416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2416 2417
2417 2418 return (e);
2418 2419 }
2419 2420
2420 2421 static void
2421 2422 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2422 - rsm_memseg_id_t ex_segid,
2423 - ddi_umem_cookie_t cookie)
2424 -
2423 + rsm_memseg_id_t ex_segid, ddi_umem_cookie_t cookie)
2425 2424 {
2426 2425 rsmresource_t *p = NULL;
2427 2426 rsmhash_table_t *rhash = &rsm_import_segs;
2428 2427 uint_t index;
2429 2428
2430 2429 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2431 2430 "rsm_remap_local_importers enter\n"));
2432 2431
2433 2432 index = rsmhash(ex_segid);
2434 2433
2435 2434 rw_enter(&rhash->rsmhash_rw, RW_READER);
2436 2435
2437 2436 p = rsmhash_getbkt(rhash, index);
2438 2437
2439 2438 for (; p; p = p->rsmrc_next) {
2440 2439 rsmseg_t *seg = (rsmseg_t *)p;
2441 2440 rsmseglock_acquire(seg);
2442 2441 /*
2443 2442 * Change the s_cookie value of only the local importers
2444 2443 * which have been mapped (in state RSM_STATE_ACTIVE).
2445 2444 * Note that there is no need to change the s_cookie value
2446 2445 * if the imported segment is in RSM_STATE_MAPPING since
2447 2446 * eventually the s_cookie will be updated via the mapping
2448 2447 * functionality.
2449 2448 */
2450 2449 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2451 2450 (seg->s_state == RSM_STATE_ACTIVE)) {
2452 2451 seg->s_cookie = cookie;
2453 2452 }
2454 2453 rsmseglock_release(seg);
2455 2454 }
2456 2455 rw_exit(&rhash->rsmhash_rw);
2457 2456
2458 2457 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2459 2458 "rsm_remap_local_importers done\n"));
2460 2459 }
2461 2460
2462 2461 static int
2463 2462 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2464 2463 {
2465 2464 int e;
2466 2465 adapter_t *adapter;
2467 2466 ddi_umem_cookie_t cookie;
2468 2467 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2469 2468
2470 2469 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2471 2470
2472 2471 /* Check for permissions to rebind */
2473 2472 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2474 2473 return (RSMERR_REBIND_NOT_ALLOWED);
2475 2474 }
2476 2475
2477 2476 if (seg->s_pid != ddi_get_pid() &&
2478 2477 ddi_get_pid() != 0) {
2479 2478 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2480 2479 return (RSMERR_NOT_CREATOR);
2481 2480 }
2482 2481
2483 2482 /*
2484 2483 * We will not be allowing partial rebind and hence length passed
2485 2484 * in must be same as segment length
2486 2485 */
2487 2486 if (msg->vaddr == NULL) {
2488 2487 DBG_PRINTF((category, RSM_ERR,
2489 2488 "rsm_rebind done: null msg->vaddr\n"));
2490 2489 return (RSMERR_BAD_ADDR);
2491 2490 }
2492 2491 if (msg->len != seg->s_len) {
2493 2492 DBG_PRINTF((category, RSM_ERR,
2494 2493 "rsm_rebind: invalid length\n"));
2495 2494 return (RSMERR_BAD_LENGTH);
2496 2495 }
2497 2496
2498 2497 /* Lock segment */
2499 2498 rsmseglock_acquire(seg);
2500 2499
2501 2500 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2502 2501 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2503 2502 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2504 2503 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2505 2504 rsmseglock_release(seg);
2506 2505 DBG_PRINTF((category, RSM_DEBUG,
2507 2506 "rsm_rebind done: cv_wait INTERRUPTED"));
2508 2507 return (RSMERR_INTERRUPTED);
2509 2508 }
2510 2509 }
2511 2510
2512 2511 /* verify segment state */
2513 2512 if ((seg->s_state != RSM_STATE_BIND) &&
2514 2513 (seg->s_state != RSM_STATE_EXPORT)) {
2515 2514 /* Unlock segment */
2516 2515 rsmseglock_release(seg);
2517 2516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2518 2517 "rsm_rebind done: invalid state\n"));
2519 2518 return (RSMERR_BAD_SEG_HNDL);
2520 2519 }
2521 2520
2522 2521 ASSERT(seg->s_cookie != NULL);
2523 2522
2524 2523 if (msg->vaddr == seg->s_region.r_vaddr) {
2525 2524 rsmseglock_release(seg);
2526 2525 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2527 2526 return (RSM_SUCCESS);
2528 2527 }
2529 2528
2530 2529 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2531 2530 if (e == RSM_SUCCESS) {
2532 2531 struct buf *xbuf;
2533 2532 dev_t sdev = 0;
2534 2533 rsm_memory_local_t mem;
2535 2534
2536 2535 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2537 2536 sdev, 0, NULL, DDI_UMEM_SLEEP);
2538 2537 ASSERT(xbuf != NULL);
2539 2538
2540 2539 mem.ms_type = RSM_MEM_BUF;
2541 2540 mem.ms_bp = xbuf;
2542 2541
2543 2542 adapter = seg->s_adapter;
2544 2543 e = adapter->rsmpi_ops->rsm_rebind(
2545 2544 seg->s_handle.out, 0, &mem,
2546 2545 RSM_RESOURCE_DONTWAIT, NULL);
2547 2546
2548 2547 if (e == RSM_SUCCESS) {
2549 2548 /*
2550 2549 * unbind the older pages, and unload local importers;
2551 2550 * but don't disconnect importers
2552 2551 */
2553 2552 (void) rsm_unbind_pages(seg);
2554 2553 seg->s_cookie = cookie;
2555 2554 seg->s_region.r_vaddr = msg->vaddr;
2556 2555 rsm_remap_local_importers(my_nodeid, seg->s_segid,
2557 2556 cookie);
2558 2557 } else {
2559 2558 /*
2560 2559 * Unbind the pages associated with "cookie" by the
2561 2560 * rsm_bind_pages calls prior to this. This is
2562 2561 * similar to what is done in the rsm_unbind_pages
2563 2562 * routine for the seg->s_cookie.
2564 2563 */
2565 2564 ddi_umem_unlock(cookie);
2566 2565 rsm_dec_pgcnt(btopr(msg->len));
2567 2566 DBG_PRINTF((category, RSM_ERR,
2568 2567 "rsm_rebind failed with %d\n", e));
2569 2568 }
2570 2569 /*
2571 2570 * At present there is no dependency on the existence of xbuf.
2572 2571 * So we can free it here. If in the future this changes, it can
2573 2572 * be freed sometime during the segment destroy.
2574 2573 */
2575 2574 freerbuf(xbuf);
2576 2575 }
2577 2576
2578 2577 /* Unlock segment */
2579 2578 rsmseglock_release(seg);
2580 2579
2581 2580 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2582 2581
2583 2582 return (e);
2584 2583 }
2585 2584
2586 2585 static int
2587 2586 rsm_unbind(rsmseg_t *seg)
2588 2587 {
2589 2588 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2590 2589
2591 2590 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2592 2591
2593 2592 rsmseglock_acquire(seg);
2594 2593
2595 2594 /* verify segment state */
2596 2595 if ((seg->s_state != RSM_STATE_BIND) &&
2597 2596 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2598 2597 rsmseglock_release(seg);
2599 2598 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2600 2599 "rsm_unbind: invalid state\n"));
2601 2600 return (RSMERR_BAD_SEG_HNDL);
2602 2601 }
2603 2602
2604 2603 /* unlock current range */
2605 2604 (void) rsm_unbind_pages(seg);
2606 2605
2607 2606 if (seg->s_state == RSM_STATE_BIND) {
2608 2607 seg->s_state = RSM_STATE_NEW;
2609 2608 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2610 2609 seg->s_state = RSM_STATE_NEW_QUIESCED;
2611 2610 }
2612 2611
2613 2612 rsmseglock_release(seg);
2614 2613
2615 2614 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2616 2615
2617 2616 return (RSM_SUCCESS);
2618 2617 }
2619 2618
2620 2619 /* **************************** Exporter Access List Management ******* */
2621 2620 static void
2622 2621 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2623 2622 {
2624 2623 int acl_sz;
2625 2624 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2626 2625
2627 2626 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2628 2627
2629 2628 /* acl could be NULL */
2630 2629
2631 2630 if (acl != NULL && acl_len > 0) {
2632 2631 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2633 2632 kmem_free((void *)acl, acl_sz);
2634 2633 }
2635 2634
2636 2635 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2637 2636 }
2638 2637
2639 2638 static void
2640 2639 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2641 2640 {
2642 2641 int acl_sz;
2643 2642 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2644 2643
2645 2644 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2646 2645
2647 2646 if (acl != NULL && acl_len > 0) {
2648 2647 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2649 2648 kmem_free((void *)acl, acl_sz);
2650 2649 }
2651 2650
2652 2651 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2653 2652
2654 2653 }
2655 2654
2656 2655 static int
2657 2656 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2658 2657 rsmapi_access_entry_t **list, int *len, int loopback)
2659 2658 {
2660 2659 rsmapi_access_entry_t *acl;
2661 2660 int acl_len;
2662 2661 int i;
2663 2662 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2664 2663
2665 2664 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2666 2665
2667 2666 *len = 0;
2668 2667 *list = NULL;
2669 2668
2670 2669 acl_len = msg->acl_len;
2671 2670 if ((loopback && acl_len > 1) || (acl_len < 0) ||
2672 2671 (acl_len > MAX_NODES)) {
2673 2672 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2674 2673 "rsmacl_build done: acl invalid\n"));
2675 2674 return (RSMERR_BAD_ACL);
2676 2675 }
2677 2676
2678 2677 if (acl_len > 0 && acl_len <= MAX_NODES) {
2679 2678 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2680 2679
2681 2680 acl = kmem_alloc(acl_size, KM_SLEEP);
2682 2681
2683 2682 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2684 2683 acl_size, mode)) {
2685 2684 kmem_free((void *) acl, acl_size);
2686 2685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2687 2686 "rsmacl_build done: BAD_ADDR\n"));
2688 2687 return (RSMERR_BAD_ADDR);
2689 2688 }
2690 2689
2691 2690 /*
2692 2691 * Verify access list
2693 2692 */
2694 2693 for (i = 0; i < acl_len; i++) {
2695 2694 if (acl[i].ae_node > MAX_NODES ||
2696 2695 (loopback && (acl[i].ae_node != my_nodeid)) ||
2697 2696 acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2698 2697 /* invalid entry */
2699 2698 kmem_free((void *) acl, acl_size);
2700 2699 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2701 2700 "rsmacl_build done: EINVAL\n"));
2702 2701 return (RSMERR_BAD_ACL);
2703 2702 }
2704 2703 }
2705 2704
2706 2705 *len = acl_len;
2707 2706 *list = acl;
2708 2707 }
2709 2708
2710 2709 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2711 2710
2712 2711 return (DDI_SUCCESS);
2713 2712 }
2714 2713
2715 2714 static int
2716 2715 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2717 2716 int acl_len, adapter_t *adapter)
2718 2717 {
2719 2718 rsm_access_entry_t *acl;
2720 2719 rsm_addr_t hwaddr;
2721 2720 int i;
2722 2721 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2723 2722
2724 2723 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2725 2724
2726 2725 if (src != NULL) {
2727 2726 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2728 2727 acl = kmem_alloc(acl_size, KM_SLEEP);
2729 2728
2730 2729 /*
2731 2730 * translate access list
2732 2731 */
2733 2732 for (i = 0; i < acl_len; i++) {
2734 2733 if (src[i].ae_node == my_nodeid) {
2735 2734 acl[i].ae_addr = adapter->hwaddr;
2736 2735 } else {
2737 2736 hwaddr = get_remote_hwaddr(adapter,
2738 2737 src[i].ae_node);
2739 2738 if ((int64_t)hwaddr < 0) {
2740 2739 /* invalid hwaddr */
2741 2740 kmem_free((void *) acl, acl_size);
2742 2741 DBG_PRINTF((category,
2743 2742 RSM_DEBUG_VERBOSE,
2744 2743 "rsmpiacl_create done:"
2745 2744 "EINVAL hwaddr\n"));
2746 2745 return (RSMERR_INTERNAL_ERROR);
2747 2746 }
2748 2747 acl[i].ae_addr = hwaddr;
2749 2748 }
2750 2749 /* rsmpi understands only RSM_PERM_XXXX */
2751 2750 acl[i].ae_permission =
2752 2751 src[i].ae_permission & RSM_PERM_RDWR;
2753 2752 }
2754 2753 *dest = acl;
2755 2754 } else {
2756 2755 *dest = NULL;
2757 2756 }
2758 2757
2759 2758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2760 2759
2761 2760 return (RSM_SUCCESS);
2762 2761 }
2763 2762
2764 2763 static int
2765 2764 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2766 2765 rsmipc_reply_t *reply)
2767 2766 {
2768 2767
2769 2768 int i;
2770 2769 rsmseg_t *seg;
2771 2770 rsm_memseg_id_t key = req->rsmipc_key;
2772 2771 rsm_permission_t perm = req->rsmipc_perm;
2773 2772 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2774 2773
2775 2774 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2776 2775 "rsmsegacl_validate enter\n"));
2777 2776
2778 2777 /*
2779 2778 * Find segment and grab its lock. The reason why we grab the segment
2780 2779 * lock in side the search is to avoid the race when the segment is
2781 2780 * being deleted and we already have a pointer to it.
2782 2781 */
2783 2782 seg = rsmexport_lookup(key);
2784 2783 if (!seg) {
2785 2784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2786 2785 "rsmsegacl_validate done: %u ENXIO\n", key));
2787 2786 return (RSMERR_SEG_NOT_PUBLISHED);
2788 2787 }
2789 2788
2790 2789 ASSERT(rsmseglock_held(seg));
2791 2790 ASSERT(seg->s_state == RSM_STATE_EXPORT);
2792 2791
2793 2792 /*
2794 2793 * We implement a 2-level protection scheme.
2795 2794 * First, we check if local/remote host has access rights.
2796 2795 * Second, we check if the user has access rights.
2797 2796 *
2798 2797 * This routine only validates the rnode access_list
2799 2798 */
2800 2799 if (seg->s_acl_len > 0) {
2801 2800 /*
2802 2801 * Check host access list
2803 2802 */
2804 2803 ASSERT(seg->s_acl != NULL);
2805 2804 for (i = 0; i < seg->s_acl_len; i++) {
2806 2805 if (seg->s_acl[i].ae_node == rnode) {
2807 2806 perm &= seg->s_acl[i].ae_permission;
2808 2807 goto found;
2809 2808 }
2810 2809 }
2811 2810 /* rnode is not found in the list */
2812 2811 rsmseglock_release(seg);
2813 2812 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2814 2813 "rsmsegacl_validate done: EPERM\n"));
2815 2814 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2816 2815 } else {
2817 2816 /* use default owner creation umask */
2818 2817 perm &= seg->s_mode;
2819 2818 }
2820 2819
2821 2820 found:
2822 2821 /* update perm for this node */
2823 2822 reply->rsmipc_mode = perm;
2824 2823 reply->rsmipc_uid = seg->s_uid;
2825 2824 reply->rsmipc_gid = seg->s_gid;
2826 2825 reply->rsmipc_segid = seg->s_segid;
2827 2826 reply->rsmipc_seglen = seg->s_len;
2828 2827
2829 2828 /*
2830 2829 * Perm of requesting node is valid; source will validate user
2831 2830 */
2832 2831 rsmseglock_release(seg);
2833 2832
2834 2833 /*
2835 2834 * Add the importer to the list right away, if connect fails
2836 2835 * the importer will ask the exporter to remove it.
2837 2836 */
2838 2837 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2839 2838 req->rsmipc_segment_cookie);
2840 2839
2841 2840 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2842 2841
2843 2842 return (RSM_SUCCESS);
2844 2843 }
2845 2844
2846 2845
2847 2846 /* ************************** Exporter Calls ************************* */
2848 2847
2849 2848 static int
2850 2849 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2851 2850 {
2852 2851 int e;
2853 2852 int acl_len;
2854 2853 rsmapi_access_entry_t *acl;
2855 2854 rsm_access_entry_t *rsmpi_acl;
2856 2855 rsm_memory_local_t mem;
2857 2856 struct buf *xbuf;
2858 2857 dev_t sdev = 0;
2859 2858 adapter_t *adapter;
2860 2859 rsm_memseg_id_t segment_id = 0;
2861 2860 int loopback_flag = 0;
2862 2861 int create_flags = 0;
2863 2862 rsm_resource_callback_t callback_flag;
2864 2863 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2865 2864
2866 2865 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2867 2866
2868 2867 if (seg->s_adapter == &loopback_adapter)
2869 2868 loopback_flag = 1;
2870 2869
2871 2870 if (seg->s_pid != ddi_get_pid() &&
2872 2871 ddi_get_pid() != 0) {
2873 2872 DBG_PRINTF((category, RSM_ERR,
2874 2873 "rsm_publish: Not creator\n"));
2875 2874 return (RSMERR_NOT_CREATOR);
2876 2875 }
2877 2876
2878 2877 /*
2879 2878 * Get per node access list
2880 2879 */
2881 2880 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2882 2881 if (e != DDI_SUCCESS) {
2883 2882 DBG_PRINTF((category, RSM_ERR,
2884 2883 "rsm_publish done: rsmacl_build failed\n"));
2885 2884 return (e);
2886 2885 }
2887 2886
2888 2887 /*
2889 2888 * The application provided msg->key is used for resolving a
2890 2889 * segment id according to the following:
2891 2890 * key = 0 Kernel Agent selects the segment id
2892 2891 * key <= RSM_DLPI_ID_END Reserved for system usage except
2893 2892 * RSMLIB range
2894 2893 * key < RSM_USER_APP_ID_BASE segment id = key
2895 2894 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2896 2895 *
2897 2896 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2898 2897 * overflows to zero after 0x80000000 allocations.
2899 2898 * An algorithm is needed which allows reinitialization and provides
2900 2899 * for reallocation after overflow. For now, ENOMEM is returned
2901 2900 * once the overflow condition has occurred.
2902 2901 */
2903 2902 if (msg->key == 0) {
2904 2903 mutex_enter(&rsm_lock);
2905 2904 segment_id = rsm_nextavail_segmentid;
2906 2905 if (segment_id != 0) {
2907 2906 rsm_nextavail_segmentid++;
2908 2907 mutex_exit(&rsm_lock);
2909 2908 } else {
2910 2909 mutex_exit(&rsm_lock);
2911 2910 DBG_PRINTF((category, RSM_ERR,
2912 2911 "rsm_publish done: no more keys avlbl\n"));
2913 2912 return (RSMERR_INSUFFICIENT_RESOURCES);
2914 2913 }
2915 2914 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2916 2915 /* range reserved for internal use by base/ndi libraries */
2917 2916 segment_id = msg->key;
2918 2917 else if (msg->key <= RSM_DLPI_ID_END)
2919 2918 return (RSMERR_RESERVED_SEGID);
2920 2919 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2921 2920 segment_id = msg->key;
2922 2921 else {
2923 2922 DBG_PRINTF((category, RSM_ERR,
2924 2923 "rsm_publish done: invalid key %u\n", msg->key));
2925 2924 return (RSMERR_RESERVED_SEGID);
2926 2925 }
2927 2926
2928 2927 /* Add key to exportlist; The segment lock is held on success */
2929 2928 e = rsmexport_add(seg, segment_id);
2930 2929 if (e) {
2931 2930 rsmacl_free(acl, acl_len);
2932 2931 DBG_PRINTF((category, RSM_ERR,
2933 2932 "rsm_publish done: export_add failed: %d\n", e));
2934 2933 return (e);
2935 2934 }
2936 2935
2937 2936 seg->s_segid = segment_id;
2938 2937
2939 2938 if ((seg->s_state != RSM_STATE_BIND) &&
2940 2939 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2941 2940 /* state changed since then, free acl and return */
2942 2941 rsmseglock_release(seg);
2943 2942 rsmexport_rm(seg);
2944 2943 rsmacl_free(acl, acl_len);
2945 2944 DBG_PRINTF((category, RSM_ERR,
2946 2945 "rsm_publish done: segment in wrong state: %d\n",
2947 2946 seg->s_state));
2948 2947 return (RSMERR_BAD_SEG_HNDL);
2949 2948 }
2950 2949
2951 2950 /*
2952 2951 * If this is for a local memory handle and permissions are zero,
2953 2952 * then the surrogate segment is very large and we want to skip
2954 2953 * allocation of DVMA space.
2955 2954 *
2956 2955 * Careful! If the user didn't use an ACL list, acl will be a NULL
2957 2956 * pointer. Check that before dereferencing it.
2958 2957 */
2959 2958 if (acl != (rsmapi_access_entry_t *)NULL) {
2960 2959 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2961 2960 goto skipdriver;
2962 2961 }
2963 2962
2964 2963 /* create segment */
2965 2964 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2966 2965 sdev, 0, NULL, DDI_UMEM_SLEEP);
2967 2966 ASSERT(xbuf != NULL);
2968 2967
2969 2968 mem.ms_type = RSM_MEM_BUF;
2970 2969 mem.ms_bp = xbuf;
2971 2970
2972 2971 /* This call includes a bind operations */
2973 2972
2974 2973 adapter = seg->s_adapter;
2975 2974 /*
2976 2975 * create a acl list with hwaddr for RSMPI publish
2977 2976 */
2978 2977 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2979 2978
2980 2979 if (e != RSM_SUCCESS) {
2981 2980 rsmseglock_release(seg);
2982 2981 rsmexport_rm(seg);
2983 2982 rsmacl_free(acl, acl_len);
2984 2983 freerbuf(xbuf);
2985 2984 DBG_PRINTF((category, RSM_ERR,
2986 2985 "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2987 2986 return (e);
2988 2987 }
2989 2988
2990 2989 if (seg->s_state == RSM_STATE_BIND) {
2991 2990 /* create segment */
2992 2991
2993 2992 /* This call includes a bind operations */
2994 2993
2995 2994 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2996 2995 create_flags = RSM_ALLOW_UNBIND_REBIND;
2997 2996 }
2998 2997
2999 2998 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
3000 2999 callback_flag = RSM_RESOURCE_DONTWAIT;
3001 3000 } else {
3002 3001 callback_flag = RSM_RESOURCE_SLEEP;
3003 3002 }
3004 3003
3005 3004 e = adapter->rsmpi_ops->rsm_seg_create(
3006 3005 adapter->rsmpi_handle,
3007 3006 &seg->s_handle.out, seg->s_len,
3008 3007 create_flags, &mem,
3009 3008 callback_flag, NULL);
3010 3009 /*
3011 3010 * At present there is no dependency on the existence of xbuf.
3012 3011 * So we can free it here. If in the future this changes, it can
3013 3012 * be freed sometime during the segment destroy.
3014 3013 */
3015 3014 freerbuf(xbuf);
3016 3015
3017 3016 if (e != RSM_SUCCESS) {
3018 3017 rsmseglock_release(seg);
3019 3018 rsmexport_rm(seg);
3020 3019 rsmacl_free(acl, acl_len);
3021 3020 rsmpiacl_free(rsmpi_acl, acl_len);
3022 3021 DBG_PRINTF((category, RSM_ERR,
3023 3022 "rsm_publish done: export_create failed: %d\n", e));
3024 3023 /*
3025 3024 * The following assertion ensures that the two errors
3026 3025 * related to the length and its alignment do not occur
3027 3026 * since they have been checked during export_create
3028 3027 */
3029 3028 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3030 3029 e != RSMERR_BAD_LENGTH);
3031 3030 if (e == RSMERR_NOT_MEM)
3032 3031 e = RSMERR_INSUFFICIENT_MEM;
3033 3032
3034 3033 return (e);
3035 3034 }
3036 3035 /* export segment, this should create an IMMU mapping */
3037 3036 e = adapter->rsmpi_ops->rsm_publish(
3038 3037 seg->s_handle.out,
3039 3038 rsmpi_acl, acl_len,
3040 3039 seg->s_segid,
3041 3040 RSM_RESOURCE_DONTWAIT, NULL);
3042 3041
3043 3042 if (e != RSM_SUCCESS) {
3044 3043 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3045 3044 rsmseglock_release(seg);
3046 3045 rsmexport_rm(seg);
3047 3046 rsmacl_free(acl, acl_len);
3048 3047 rsmpiacl_free(rsmpi_acl, acl_len);
3049 3048 DBG_PRINTF((category, RSM_ERR,
3050 3049 "rsm_publish done: export_publish failed: %d\n",
3051 3050 e));
3052 3051 return (e);
3053 3052 }
3054 3053 }
3055 3054
3056 3055 seg->s_acl_in = rsmpi_acl;
3057 3056
3058 3057 skipdriver:
3059 3058 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3060 3059 seg->s_acl_len = acl_len;
3061 3060 seg->s_acl = acl;
3062 3061
3063 3062 if (seg->s_state == RSM_STATE_BIND) {
3064 3063 seg->s_state = RSM_STATE_EXPORT;
3065 3064 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3066 3065 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3067 3066 cv_broadcast(&seg->s_cv);
3068 3067 }
3069 3068
3070 3069 rsmseglock_release(seg);
3071 3070
3072 3071 /*
3073 3072 * If the segment id was solicited, then return it in
3074 3073 * the original incoming message.
3075 3074 */
3076 3075 if (msg->key == 0) {
3077 3076 msg->key = segment_id;
3078 3077 #ifdef _MULTI_DATAMODEL
3079 3078 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3080 3079 rsm_ioctlmsg32_t msg32;
3081 3080
3082 3081 msg32.key = msg->key;
3083 3082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3084 3083 "rsm_publish done\n"));
3085 3084 return (ddi_copyout((caddr_t)&msg32,
3086 3085 (caddr_t)dataptr, sizeof (msg32), mode));
3087 3086 }
3088 3087 #endif
3089 3088 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3090 3089 "rsm_publish done\n"));
3091 3090 return (ddi_copyout((caddr_t)msg,
3092 3091 (caddr_t)dataptr, sizeof (*msg), mode));
3093 3092 }
3094 3093
3095 3094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3096 3095 return (DDI_SUCCESS);
3097 3096 }
3098 3097
3099 3098 /*
3100 3099 * This function modifies the access control list of an already published
3101 3100 * segment. There is no effect on import segments which are already
3102 3101 * connected.
3103 3102 */
3104 3103 static int
3105 3104 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3106 3105 {
3107 3106 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl;
3108 3107 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl;
3109 3108 int new_acl_len, old_acl_len, tmp_acl_len;
3110 3109 int e, i;
3111 3110 adapter_t *adapter;
3112 3111 int loopback_flag = 0;
3113 3112 rsm_memseg_id_t key;
3114 3113 rsm_permission_t permission;
3115 3114 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3116 3115
3117 3116 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3118 3117
3119 3118 if ((seg->s_state != RSM_STATE_EXPORT) &&
3120 3119 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3121 3120 (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3122 3121 return (RSMERR_SEG_NOT_PUBLISHED);
3123 3122
3124 3123 if (seg->s_pid != ddi_get_pid() &&
3125 3124 ddi_get_pid() != 0) {
3126 3125 DBG_PRINTF((category, RSM_ERR,
3127 3126 "rsm_republish: Not owner\n"));
3128 3127 return (RSMERR_NOT_CREATOR);
3129 3128 }
3130 3129
3131 3130 if (seg->s_adapter == &loopback_adapter)
3132 3131 loopback_flag = 1;
3133 3132
3134 3133 /*
3135 3134 * Build new list first
3136 3135 */
3137 3136 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3138 3137 if (e) {
3139 3138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3140 3139 "rsm_republish done: rsmacl_build failed %d", e));
3141 3140 return (e);
3142 3141 }
3143 3142
3144 3143 /* Lock segment */
3145 3144 rsmseglock_acquire(seg);
3146 3145 /*
3147 3146 * a republish is in progress - REPUBLISH message is being
3148 3147 * sent to the importers so wait for it to complete OR
3149 3148 * wait till DR completes
3150 3149 */
3151 3150 while (((seg->s_state == RSM_STATE_EXPORT) &&
3152 3151 (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3153 3152 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3154 3153 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3155 3154 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3156 3155 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3157 3156 "rsm_republish done: cv_wait INTERRUPTED"));
3158 3157 rsmseglock_release(seg);
3159 3158 rsmacl_free(new_acl, new_acl_len);
3160 3159 return (RSMERR_INTERRUPTED);
3161 3160 }
3162 3161 }
3163 3162
3164 3163 /* recheck if state is valid */
3165 3164 if (seg->s_state != RSM_STATE_EXPORT) {
3166 3165 rsmseglock_release(seg);
3167 3166 rsmacl_free(new_acl, new_acl_len);
3168 3167 return (RSMERR_SEG_NOT_PUBLISHED);
3169 3168 }
3170 3169
3171 3170 key = seg->s_key;
3172 3171 old_acl = seg->s_acl;
3173 3172 old_acl_len = seg->s_acl_len;
3174 3173
3175 3174 seg->s_acl = new_acl;
3176 3175 seg->s_acl_len = new_acl_len;
3177 3176
3178 3177 /*
3179 3178 * This call will only be meaningful if and when the interconnect
3180 3179 * layer makes use of the access list
3181 3180 */
3182 3181 adapter = seg->s_adapter;
3183 3182 /*
3184 3183 * create a acl list with hwaddr for RSMPI publish
3185 3184 */
3186 3185 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3187 3186
3188 3187 if (e != RSM_SUCCESS) {
3189 3188 seg->s_acl = old_acl;
3190 3189 seg->s_acl_len = old_acl_len;
3191 3190 rsmseglock_release(seg);
3192 3191 rsmacl_free(new_acl, new_acl_len);
3193 3192 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3194 3193 "rsm_republish done: rsmpiacl_create failed %d", e));
3195 3194 return (e);
3196 3195 }
3197 3196 rsmpi_old_acl = seg->s_acl_in;
3198 3197 seg->s_acl_in = rsmpi_new_acl;
3199 3198
3200 3199 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3201 3200 seg->s_acl_in, seg->s_acl_len,
3202 3201 RSM_RESOURCE_DONTWAIT, NULL);
3203 3202
3204 3203 if (e != RSM_SUCCESS) {
3205 3204 seg->s_acl = old_acl;
3206 3205 seg->s_acl_in = rsmpi_old_acl;
3207 3206 seg->s_acl_len = old_acl_len;
3208 3207 rsmseglock_release(seg);
3209 3208 rsmacl_free(new_acl, new_acl_len);
3210 3209 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3211 3210
3212 3211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3213 3212 "rsm_republish done: rsmpi republish failed %d\n", e));
3214 3213 return (e);
3215 3214 }
3216 3215
3217 3216 /* create a tmp copy of the new acl */
3218 3217 tmp_acl_len = new_acl_len;
3219 3218 if (tmp_acl_len > 0) {
3220 3219 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3221 3220 for (i = 0; i < tmp_acl_len; i++) {
3222 3221 tmp_acl[i].ae_node = new_acl[i].ae_node;
3223 3222 tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3224 3223 }
3225 3224 /*
3226 3225 * The default permission of a node which was in the old
3227 3226 * ACL but not in the new ACL is 0 ie no access.
3228 3227 */
3229 3228 permission = 0;
3230 3229 } else {
3231 3230 /*
3232 3231 * NULL acl means all importers can connect and
3233 3232 * default permission will be owner creation umask
3234 3233 */
3235 3234 tmp_acl = NULL;
3236 3235 permission = seg->s_mode;
3237 3236 }
3238 3237
3239 3238 /* make other republishers to wait for republish to complete */
3240 3239 seg->s_flags |= RSM_REPUBLISH_WAIT;
3241 3240
3242 3241 rsmseglock_release(seg);
3243 3242
3244 3243 /* send the new perms to the importing nodes */
3245 3244 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3246 3245
3247 3246 rsmseglock_acquire(seg);
3248 3247 seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3249 3248 /* wake up any one waiting for republish to complete */
3250 3249 cv_broadcast(&seg->s_cv);
3251 3250 rsmseglock_release(seg);
3252 3251
3253 3252 rsmacl_free(tmp_acl, tmp_acl_len);
3254 3253 rsmacl_free(old_acl, old_acl_len);
3255 3254 rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3256 3255
3257 3256 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3258 3257 return (DDI_SUCCESS);
3259 3258 }
3260 3259
3261 3260 static int
3262 3261 rsm_unpublish(rsmseg_t *seg, int mode)
3263 3262 {
3264 3263 rsmapi_access_entry_t *acl;
3265 3264 rsm_access_entry_t *rsmpi_acl;
3266 3265 int acl_len;
3267 3266 int e;
3268 3267 adapter_t *adapter;
3269 3268 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3270 3269
3271 3270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3272 3271
3273 3272 if (seg->s_pid != ddi_get_pid() &&
3274 3273 ddi_get_pid() != 0) {
3275 3274 DBG_PRINTF((category, RSM_ERR,
3276 3275 "rsm_unpublish: Not creator\n"));
3277 3276 return (RSMERR_NOT_CREATOR);
3278 3277 }
3279 3278
3280 3279 rsmseglock_acquire(seg);
3281 3280 /*
3282 3281 * wait for QUIESCING to complete here before rsmexport_rm
3283 3282 * is called because the SUSPEND_COMPLETE mesg which changes
3284 3283 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3285 3284 * signals the cv_wait needs to find it in the hashtable.
3286 3285 */
3287 3286 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3288 3287 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3289 3288 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3290 3289 rsmseglock_release(seg);
3291 3290 DBG_PRINTF((category, RSM_ERR,
3292 3291 "rsm_unpublish done: cv_wait INTR qscing"
3293 3292 "getv/putv in progress"));
3294 3293 return (RSMERR_INTERRUPTED);
3295 3294 }
3296 3295 }
3297 3296
3298 3297 /* verify segment state */
3299 3298 if ((seg->s_state != RSM_STATE_EXPORT) &&
3300 3299 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3301 3300 rsmseglock_release(seg);
3302 3301 DBG_PRINTF((category, RSM_ERR,
3303 3302 "rsm_unpublish done: bad state %x\n", seg->s_state));
3304 3303 return (RSMERR_SEG_NOT_PUBLISHED);
3305 3304 }
3306 3305
3307 3306 rsmseglock_release(seg);
3308 3307
3309 3308 rsmexport_rm(seg);
3310 3309
3311 3310 rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3312 3311
3313 3312 rsmseglock_acquire(seg);
3314 3313 /*
3315 3314 * wait for republish to complete
3316 3315 */
3317 3316 while ((seg->s_state == RSM_STATE_EXPORT) &&
3318 3317 (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3319 3318 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3320 3319 DBG_PRINTF((category, RSM_ERR,
3321 3320 "rsm_unpublish done: cv_wait INTR repubing"));
3322 3321 rsmseglock_release(seg);
3323 3322 return (RSMERR_INTERRUPTED);
3324 3323 }
3325 3324 }
3326 3325
3327 3326 if ((seg->s_state != RSM_STATE_EXPORT) &&
3328 3327 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3329 3328 DBG_PRINTF((category, RSM_ERR,
3330 3329 "rsm_unpublish done: invalid state"));
3331 3330 rsmseglock_release(seg);
3332 3331 return (RSMERR_SEG_NOT_PUBLISHED);
3333 3332 }
3334 3333
3335 3334 /*
3336 3335 * check for putv/get surrogate segment which was not published
3337 3336 * to the driver.
3338 3337 *
3339 3338 * Be certain to see if there is an ACL first! If this segment was
3340 3339 * not published with an ACL, acl will be a null pointer. Check
3341 3340 * that before dereferencing it.
3342 3341 */
3343 3342 acl = seg->s_acl;
3344 3343 if (acl != (rsmapi_access_entry_t *)NULL) {
3345 3344 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3346 3345 goto bypass;
3347 3346 }
3348 3347
3349 3348 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3350 3349 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3351 3350 goto bypass;
3352 3351
3353 3352 adapter = seg->s_adapter;
3354 3353 for (;;) {
3355 3354 if (seg->s_state != RSM_STATE_EXPORT) {
3356 3355 rsmseglock_release(seg);
3357 3356 DBG_PRINTF((category, RSM_ERR,
3358 3357 "rsm_unpublish done: bad state %x\n",
3359 3358 seg->s_state));
3360 3359 return (RSMERR_SEG_NOT_PUBLISHED);
3361 3360 }
3362 3361
3363 3362 /* unpublish from adapter */
3364 3363 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3365 3364
3366 3365 if (e == RSM_SUCCESS) {
3367 3366 break;
3368 3367 }
3369 3368
3370 3369 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3371 3370 /*
3372 3371 * wait for unpublish to succeed, it's busy.
3373 3372 */
3374 3373 seg->s_flags |= RSM_EXPORT_WAIT;
3375 3374
3376 3375 /* wait for a max of 1 ms - this is an empirical */
3377 3376 /* value that was found by some minimal testing */
3378 3377 /* can be fine tuned when we have better numbers */
3379 3378 /* A long term fix would be to send cv_signal */
3380 3379 /* from the intr callback routine */
3381 3380 /* currently nobody signals this wait */
3382 3381 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3383 3382 drv_usectohz(1000), TR_CLOCK_TICK);
3384 3383
3385 3384 DBG_PRINTF((category, RSM_ERR,
3386 3385 "rsm_unpublish: SEG_IN_USE\n"));
3387 3386
3388 3387 seg->s_flags &= ~RSM_EXPORT_WAIT;
3389 3388 } else {
3390 3389 if (mode == 1) {
3391 3390 DBG_PRINTF((category, RSM_ERR,
3392 3391 "rsm:rsmpi unpublish err %x\n", e));
3393 3392 seg->s_state = RSM_STATE_BIND;
3394 3393 }
3395 3394 rsmseglock_release(seg);
3396 3395 return (e);
3397 3396 }
3398 3397 }
3399 3398
3400 3399 /* Free segment */
3401 3400 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3402 3401
3403 3402 if (e != RSM_SUCCESS) {
3404 3403 DBG_PRINTF((category, RSM_ERR,
3405 3404 "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3406 3405 seg->s_key, e));
3407 3406 }
3408 3407
3409 3408 bypass:
3410 3409 acl = seg->s_acl;
3411 3410 rsmpi_acl = seg->s_acl_in;
3412 3411 acl_len = seg->s_acl_len;
3413 3412
3414 3413 seg->s_acl = NULL;
3415 3414 seg->s_acl_in = NULL;
3416 3415 seg->s_acl_len = 0;
3417 3416
3418 3417 if (seg->s_state == RSM_STATE_EXPORT) {
3419 3418 seg->s_state = RSM_STATE_BIND;
3420 3419 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3421 3420 seg->s_state = RSM_STATE_BIND_QUIESCED;
3422 3421 cv_broadcast(&seg->s_cv);
3423 3422 }
3424 3423
3425 3424 rsmseglock_release(seg);
3426 3425
3427 3426 rsmacl_free(acl, acl_len);
3428 3427 rsmpiacl_free(rsmpi_acl, acl_len);
3429 3428
3430 3429 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3431 3430
3432 3431 return (DDI_SUCCESS);
3433 3432 }
3434 3433
3435 3434 /*
3436 3435 * Called from rsm_unpublish to force an unload and disconnection of all
3437 3436 * importers of the unpublished segment.
3438 3437 *
3439 3438 * First build the list of segments requiring a force disconnect, then
3440 3439 * send a request for each.
3441 3440 */
3442 3441 static void
3443 3442 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3444 3443 rsm_node_id_t ex_nodeid)
3445 3444 {
3446 3445 rsmipc_request_t request;
3447 3446 importing_token_t *prev_token, *token, *tmp_token, *tokp;
3448 3447 importing_token_t *force_disconnect_list = NULL;
3449 3448 int index;
3450 3449
3451 3450 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3452 3451 "rsm_send_importer_disconnects enter\n"));
3453 3452
3454 3453 index = rsmhash(ex_segid);
3455 3454
3456 3455 mutex_enter(&importer_list.lock);
3457 3456
3458 3457 prev_token = NULL;
3459 3458 token = importer_list.bucket[index];
3460 3459
3461 3460 while (token != NULL) {
3462 3461 if (token->key == ex_segid) {
3463 3462 /*
3464 3463 * take it off the importer list and add it
3465 3464 * to the force disconnect list.
3466 3465 */
3467 3466 if (prev_token == NULL)
3468 3467 importer_list.bucket[index] = token->next;
3469 3468 else
3470 3469 prev_token->next = token->next;
3471 3470 tmp_token = token;
3472 3471 token = token->next;
3473 3472 if (force_disconnect_list == NULL) {
3474 3473 force_disconnect_list = tmp_token;
3475 3474 tmp_token->next = NULL;
3476 3475 } else {
3477 3476 tokp = force_disconnect_list;
3478 3477 /*
3479 3478 * make sure that the tmp_token's node
3480 3479 * is not already on the force disconnect
3481 3480 * list.
3482 3481 */
3483 3482 while (tokp != NULL) {
3484 3483 if (tokp->importing_node ==
3485 3484 tmp_token->importing_node) {
3486 3485 break;
3487 3486 }
3488 3487 tokp = tokp->next;
3489 3488 }
3490 3489 if (tokp == NULL) {
3491 3490 tmp_token->next =
3492 3491 force_disconnect_list;
3493 3492 force_disconnect_list = tmp_token;
3494 3493 } else {
3495 3494 kmem_free((void *)tmp_token,
3496 3495 sizeof (*token));
3497 3496 }
3498 3497 }
3499 3498
3500 3499 } else {
3501 3500 prev_token = token;
3502 3501 token = token->next;
3503 3502 }
3504 3503 }
3505 3504 mutex_exit(&importer_list.lock);
3506 3505
3507 3506 token = force_disconnect_list;
3508 3507 while (token != NULL) {
3509 3508 if (token->importing_node == my_nodeid) {
3510 3509 rsm_force_unload(ex_nodeid, ex_segid,
3511 3510 DISCONNECT);
3512 3511 } else {
3513 3512 request.rsmipc_hdr.rsmipc_type =
3514 3513 RSMIPC_MSG_DISCONNECT;
3515 3514 request.rsmipc_key = token->key;
3516 3515 for (;;) {
3517 3516 if (rsmipc_send(token->importing_node,
3518 3517 &request,
3519 3518 RSM_NO_REPLY) == RSM_SUCCESS) {
3520 3519 break;
3521 3520 } else {
3522 3521 delay(drv_usectohz(10000));
3523 3522 }
3524 3523 }
3525 3524 }
3526 3525 tmp_token = token;
3527 3526 token = token->next;
3528 3527 kmem_free((void *)tmp_token, sizeof (*token));
3529 3528 }
3530 3529
3531 3530 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3532 3531 "rsm_send_importer_disconnects done\n"));
3533 3532 }
3534 3533
3535 3534 /*
3536 3535 * This function is used as a callback for unlocking the pages locked
3537 3536 * down by a process which then does a fork or an exec.
3538 3537 * It marks the export segments corresponding to umem cookie given by
3539 3538 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3540 3539 * destroyed later when an rsm_close occurs).
3541 3540 */
3542 3541 static void
3543 3542 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3544 3543 {
3545 3544 rsmresource_blk_t *blk;
3546 3545 rsmresource_t *p;
3547 3546 rsmseg_t *eseg = NULL;
3548 3547 int i, j;
3549 3548 int found = 0;
3550 3549
3551 3550 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3552 3551 "rsm_export_force_destroy enter\n"));
3553 3552
3554 3553 /*
3555 3554 * Walk the resource list and locate the export segment (either
3556 3555 * in the BIND or the EXPORT state) which corresponds to the
3557 3556 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3558 3557 * Change the state to ZOMBIE by calling rsmseg_close with the
3559 3558 * force_flag argument (the second argument) set to 1. Also,
3560 3559 * unpublish and unbind the segment, but don't free it. Free it
3561 3560 * only on a rsm_close call for the segment.
3562 3561 */
3563 3562 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3564 3563
3565 3564 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3566 3565 blk = rsm_resource.rsmrc_root[i];
3567 3566 if (blk == NULL) {
3568 3567 continue;
3569 3568 }
3570 3569
3571 3570 for (j = 0; j < RSMRC_BLKSZ; j++) {
3572 3571 p = blk->rsmrcblk_blks[j];
3573 3572 if ((p != NULL) && (p != RSMRC_RESERVED) &&
3574 3573 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3575 3574 eseg = (rsmseg_t *)p;
3576 3575 if (eseg->s_cookie != ck)
3577 3576 continue; /* continue searching */
3578 3577 /*
3579 3578 * Found the segment, set flag to indicate
3580 3579 * force destroy processing is in progress
3581 3580 */
3582 3581 rsmseglock_acquire(eseg);
3583 3582 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3584 3583 rsmseglock_release(eseg);
3585 3584 found = 1;
3586 3585 break;
3587 3586 }
3588 3587 }
3589 3588
3590 3589 if (found)
3591 3590 break;
3592 3591 }
3593 3592
3594 3593 rw_exit(&rsm_resource.rsmrc_lock);
3595 3594
3596 3595 if (found) {
3597 3596 ASSERT(eseg != NULL);
3598 3597 /* call rsmseg_close with force flag set to 1 */
3599 3598 rsmseg_close(eseg, 1);
3600 3599 /*
3601 3600 * force destroy processing done, clear flag and signal any
3602 3601 * thread waiting in rsmseg_close.
3603 3602 */
3604 3603 rsmseglock_acquire(eseg);
3605 3604 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3606 3605 cv_broadcast(&eseg->s_cv);
3607 3606 rsmseglock_release(eseg);
3608 3607 }
3609 3608
3610 3609 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3611 3610 "rsm_export_force_destroy done\n"));
3612 3611 }
3613 3612
3614 3613 /* ******************************* Remote Calls *********************** */
3615 3614 static void
3616 3615 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3617 3616 {
3618 3617 rsmipc_reply_t reply;
3619 3618 DBG_DEFINE(category,
3620 3619 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3621 3620
3622 3621 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3623 3622 "rsm_intr_segconnect enter\n"));
3624 3623
3625 3624 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3626 3625
3627 3626 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3628 3627 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3629 3628
3630 3629 (void) rsmipc_send(src, NULL, &reply);
3631 3630
3632 3631 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3633 3632 "rsm_intr_segconnect done\n"));
3634 3633 }
3635 3634
3636 3635
|
↓ open down ↓ |
1202 lines elided |
↑ open up ↑ |
3637 3636 /*
3638 3637 * When an exported segment is unpublished the exporter sends an ipc
3639 3638 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher
3640 3639 * calls this function. The import list is scanned; segments which match the
3641 3640 * exported segment id are unloaded and disconnected.
3642 3641 *
3643 3642 * Will also be called from rsm_rebind with disconnect_flag FALSE.
3644 3643 *
3645 3644 */
3646 3645 static void
3647 -rsm_force_unload(rsm_node_id_t src_nodeid,
3648 - rsm_memseg_id_t ex_segid,
3646 +rsm_force_unload(rsm_node_id_t src_nodeid, rsm_memseg_id_t ex_segid,
3649 3647 boolean_t disconnect_flag)
3650 -
3651 3648 {
3652 3649 rsmresource_t *p = NULL;
3653 3650 rsmhash_table_t *rhash = &rsm_import_segs;
3654 3651 uint_t index;
3655 3652 DBG_DEFINE(category,
3656 3653 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3657 3654
3658 3655 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3659 3656
3660 3657 index = rsmhash(ex_segid);
3661 3658
3662 3659 rw_enter(&rhash->rsmhash_rw, RW_READER);
3663 3660
3664 3661 p = rsmhash_getbkt(rhash, index);
3665 3662
3666 3663 for (; p; p = p->rsmrc_next) {
3667 3664 rsmseg_t *seg = (rsmseg_t *)p;
3668 3665 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3669 3666 /*
3670 3667 * In order to make rsmseg_unload and rsm_force_unload
3671 3668 * thread safe, acquire the segment lock here.
3672 3669 * rsmseg_unload is responsible for releasing the lock.
3673 3670 * rsmseg_unload releases the lock just before a call
3674 3671 * to rsmipc_send or in case of an early exit which
3675 3672 * occurs if the segment was in the state
3676 3673 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3677 3674 */
3678 3675 rsmseglock_acquire(seg);
3679 3676 if (disconnect_flag)
3680 3677 seg->s_flags |= RSM_FORCE_DISCONNECT;
3681 3678 rsmseg_unload(seg);
3682 3679 }
3683 3680 }
3684 3681 rw_exit(&rhash->rsmhash_rw);
3685 3682
3686 3683 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3687 3684 }
3688 3685
3689 3686 static void
3690 3687 rsm_intr_reply(rsmipc_msghdr_t *msg)
3691 3688 {
3692 3689 /*
3693 3690 * Find slot for cookie in reply.
3694 3691 * Match sequence with sequence in cookie
3695 3692 * If no match; return
3696 3693 * Try to grap lock of slot, if locked return
3697 3694 * copy data into reply slot area
3698 3695 * signal waiter
3699 3696 */
3700 3697 rsmipc_slot_t *slot;
3701 3698 rsmipc_cookie_t *cookie;
3702 3699 void *data = (void *) msg;
3703 3700 size_t size = sizeof (rsmipc_reply_t);
3704 3701 DBG_DEFINE(category,
3705 3702 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3706 3703
3707 3704 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3708 3705
3709 3706 cookie = &msg->rsmipc_cookie;
3710 3707 if (cookie->ic.index >= RSMIPC_SZ) {
3711 3708 DBG_PRINTF((category, RSM_ERR,
3712 3709 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3713 3710 return;
3714 3711 }
3715 3712
3716 3713 ASSERT(cookie->ic.index < RSMIPC_SZ);
3717 3714 slot = &rsm_ipc.slots[cookie->ic.index];
3718 3715 mutex_enter(&slot->rsmipc_lock);
3719 3716 if (slot->rsmipc_cookie.value == cookie->value) {
3720 3717 /* found a match */
3721 3718 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3722 3719 bcopy(data, slot->rsmipc_data, size);
3723 3720 RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3724 3721 cv_signal(&slot->rsmipc_cv);
3725 3722 }
3726 3723 } else {
3727 3724 DBG_PRINTF((category, RSM_DEBUG,
3728 3725 "rsm: rsm_intr_reply mismatched reply %d\n",
3729 3726 cookie->ic.index));
3730 3727 }
3731 3728 mutex_exit(&slot->rsmipc_lock);
3732 3729 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3733 3730 }
3734 3731
3735 3732 /*
3736 3733 * This function gets dispatched on the worker thread when we receive
3737 3734 * the SQREADY message. This function sends the SQREADY_ACK message.
3738 3735 */
3739 3736 static void
3740 3737 rsm_sqready_ack_deferred(void *arg)
3741 3738 {
3742 3739 path_t *path = (path_t *)arg;
3743 3740 DBG_DEFINE(category,
3744 3741 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3745 3742
3746 3743 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3747 3744 "rsm_sqready_ack_deferred enter\n"));
3748 3745
3749 3746 mutex_enter(&path->mutex);
3750 3747
3751 3748 /*
3752 3749 * If path is not active no point in sending the ACK
3753 3750 * because the whole SQREADY protocol will again start
3754 3751 * when the path becomes active.
3755 3752 */
3756 3753 if (path->state != RSMKA_PATH_ACTIVE) {
3757 3754 /*
3758 3755 * decrement the path refcnt incremented in rsm_proc_sqready
3759 3756 */
3760 3757 PATH_RELE_NOLOCK(path);
3761 3758 mutex_exit(&path->mutex);
3762 3759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3763 3760 "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3764 3761 return;
3765 3762 }
3766 3763
3767 3764 /* send an SQREADY_ACK message */
3768 3765 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3769 3766
3770 3767 /* initialize credits to the max level */
3771 3768 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3772 3769
3773 3770 /* wake up any send that is waiting for credits */
3774 3771 cv_broadcast(&path->sendq_token.sendq_cv);
3775 3772
3776 3773 /*
3777 3774 * decrement the path refcnt since we incremented it in
3778 3775 * rsm_proc_sqready
3779 3776 */
3780 3777 PATH_RELE_NOLOCK(path);
3781 3778
3782 3779 mutex_exit(&path->mutex);
3783 3780
3784 3781 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3785 3782 "rsm_sqready_ack_deferred done\n"));
3786 3783 }
3787 3784
3788 3785 /*
3789 3786 * Process the SQREADY message
3790 3787 */
3791 3788 static void
3792 3789 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3793 3790 rsm_intr_hand_arg_t arg)
3794 3791 {
3795 3792 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3796 3793 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3797 3794 path_t *path;
3798 3795 DBG_DEFINE(category,
3799 3796 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3800 3797
3801 3798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3802 3799
3803 3800 /* look up the path - incr the path refcnt */
3804 3801 path = rsm_find_path(hdlr_argp->adapter_name,
3805 3802 hdlr_argp->adapter_instance, src_hwaddr);
3806 3803
3807 3804 /*
3808 3805 * No path exists or path is not active - drop the message
3809 3806 */
3810 3807 if (path == NULL) {
3811 3808 DBG_PRINTF((category, RSM_DEBUG,
3812 3809 "rsm_proc_sqready done: msg dropped no path\n"));
3813 3810 return;
3814 3811 }
3815 3812
3816 3813 mutex_exit(&path->mutex);
3817 3814
3818 3815 /* drain any tasks from the previous incarnation */
3819 3816 taskq_wait(path->recv_taskq);
3820 3817
3821 3818 mutex_enter(&path->mutex);
3822 3819 /*
3823 3820 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3824 3821 * in the meanwhile we received an SQREADY message, blindly reset
3825 3822 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3826 3823 * and forget about the SQREADY that we sent.
3827 3824 */
3828 3825 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3829 3826
3830 3827 if (path->state != RSMKA_PATH_ACTIVE) {
3831 3828 /* decr refcnt and drop the mutex */
3832 3829 PATH_RELE_NOLOCK(path);
3833 3830 mutex_exit(&path->mutex);
3834 3831 DBG_PRINTF((category, RSM_DEBUG,
3835 3832 "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3836 3833 return;
3837 3834 }
3838 3835
3839 3836 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3840 3837 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3841 3838
3842 3839 /*
3843 3840 * The sender's local incarnation number is our remote incarnation
3844 3841 * number save it in the path data structure
3845 3842 */
3846 3843 path->remote_incn = msg->rsmipc_local_incn;
3847 3844 path->sendq_token.msgbuf_avail = 0;
3848 3845 path->procmsg_cnt = 0;
3849 3846
3850 3847 /*
3851 3848 * path is active - dispatch task to send SQREADY_ACK - remember
3852 3849 * RSMPI calls can't be done in interrupt context
3853 3850 *
3854 3851 * We can use the recv_taskq to send because the remote endpoint
3855 3852 * cannot start sending messages till it receives SQREADY_ACK hence
3856 3853 * at this point there are no tasks on recv_taskq.
3857 3854 *
3858 3855 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3859 3856 */
3860 3857 (void) taskq_dispatch(path->recv_taskq,
3861 3858 rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3862 3859
3863 3860 mutex_exit(&path->mutex);
3864 3861
3865 3862
3866 3863 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3867 3864 }
3868 3865
3869 3866 /*
3870 3867 * Process the SQREADY_ACK message
3871 3868 */
3872 3869 static void
3873 3870 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3874 3871 rsm_intr_hand_arg_t arg)
3875 3872 {
3876 3873 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3877 3874 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3878 3875 path_t *path;
3879 3876 DBG_DEFINE(category,
3880 3877 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3881 3878
3882 3879 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3883 3880 "rsm_proc_sqready_ack enter\n"));
3884 3881
3885 3882 /* look up the path - incr the path refcnt */
3886 3883 path = rsm_find_path(hdlr_argp->adapter_name,
3887 3884 hdlr_argp->adapter_instance, src_hwaddr);
3888 3885
3889 3886 /*
3890 3887 * drop the message if - no path exists or path is not active
3891 3888 * or if its not waiting for SQREADY_ACK message
3892 3889 */
3893 3890 if (path == NULL) {
3894 3891 DBG_PRINTF((category, RSM_DEBUG,
3895 3892 "rsm_proc_sqready_ack done: msg dropped no path\n"));
3896 3893 return;
3897 3894 }
3898 3895
3899 3896 if ((path->state != RSMKA_PATH_ACTIVE) ||
3900 3897 !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3901 3898 /* decrement the refcnt */
3902 3899 PATH_RELE_NOLOCK(path);
3903 3900 mutex_exit(&path->mutex);
3904 3901 DBG_PRINTF((category, RSM_DEBUG,
3905 3902 "rsm_proc_sqready_ack done: msg dropped\n"));
3906 3903 return;
3907 3904 }
3908 3905
3909 3906 /*
3910 3907 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3911 3908 * sent, if not drop it.
3912 3909 */
3913 3910 if (path->local_incn != msghdr->rsmipc_incn) {
3914 3911 /* decrement the refcnt */
3915 3912 PATH_RELE_NOLOCK(path);
3916 3913 mutex_exit(&path->mutex);
3917 3914 DBG_PRINTF((category, RSM_DEBUG,
3918 3915 "rsm_proc_sqready_ack done: msg old incn %lld\n",
3919 3916 msghdr->rsmipc_incn));
3920 3917 return;
3921 3918 }
3922 3919
3923 3920 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3924 3921 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3925 3922
3926 3923 /*
3927 3924 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3928 3925 */
3929 3926 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3930 3927
3931 3928 /* save the remote sendq incn number */
3932 3929 path->remote_incn = msg->rsmipc_local_incn;
3933 3930
3934 3931 /* initialize credits to the max level */
3935 3932 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3936 3933
3937 3934 /* wake up any send that is waiting for credits */
3938 3935 cv_broadcast(&path->sendq_token.sendq_cv);
3939 3936
3940 3937 /* decrement the refcnt */
3941 3938 PATH_RELE_NOLOCK(path);
3942 3939
3943 3940 mutex_exit(&path->mutex);
3944 3941
3945 3942 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3946 3943 "rsm_proc_sqready_ack done\n"));
3947 3944 }
3948 3945
3949 3946 /*
3950 3947 * process the RSMIPC_MSG_CREDIT message
3951 3948 */
3952 3949 static void
3953 3950 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3954 3951 rsm_intr_hand_arg_t arg)
3955 3952 {
3956 3953 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3957 3954 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3958 3955 path_t *path;
3959 3956 DBG_DEFINE(category,
3960 3957 RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3961 3958 RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3962 3959
3963 3960 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3964 3961
3965 3962 /* look up the path - incr the path refcnt */
3966 3963 path = rsm_find_path(hdlr_argp->adapter_name,
3967 3964 hdlr_argp->adapter_instance, src_hwaddr);
3968 3965
3969 3966 if (path == NULL) {
3970 3967 DBG_PRINTF((category, RSM_DEBUG,
3971 3968 "rsm_add_credits enter: path not found\n"));
3972 3969 return;
3973 3970 }
3974 3971
3975 3972 /* the path is not active - discard credits */
3976 3973 if (path->state != RSMKA_PATH_ACTIVE) {
3977 3974 PATH_RELE_NOLOCK(path);
3978 3975 mutex_exit(&path->mutex);
3979 3976 DBG_PRINTF((category, RSM_DEBUG,
3980 3977 "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3981 3978 return;
3982 3979 }
3983 3980
3984 3981 /*
3985 3982 * Check if these credits are for current incarnation of the path.
3986 3983 */
3987 3984 if (path->local_incn != msghdr->rsmipc_incn) {
3988 3985 /* decrement the refcnt */
3989 3986 PATH_RELE_NOLOCK(path);
3990 3987 mutex_exit(&path->mutex);
3991 3988 DBG_PRINTF((category, RSM_DEBUG,
3992 3989 "rsm_add_credits enter: old incn %lld\n",
3993 3990 msghdr->rsmipc_incn));
3994 3991 return;
3995 3992 }
3996 3993
3997 3994 DBG_PRINTF((category, RSM_DEBUG,
3998 3995 "rsm_add_credits:path=%lx new-creds=%d "
3999 3996 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
4000 3997 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4001 3998 src_hwaddr));
4002 3999
4003 4000
4004 4001 /* add credits to the path's sendq */
4005 4002 path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4006 4003
4007 4004 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4008 4005
4009 4006 /* wake up any send that is waiting for credits */
4010 4007 cv_broadcast(&path->sendq_token.sendq_cv);
4011 4008
4012 4009 /* decrement the refcnt */
4013 4010 PATH_RELE_NOLOCK(path);
4014 4011
4015 4012 mutex_exit(&path->mutex);
4016 4013
4017 4014 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4018 4015 }
4019 4016
4020 4017 static void
4021 4018 rsm_intr_event(rsmipc_request_t *msg)
4022 4019 {
4023 4020 rsmseg_t *seg;
4024 4021 rsmresource_t *p;
4025 4022 rsm_node_id_t src_node;
4026 4023 DBG_DEFINE(category,
4027 4024 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4028 4025
4029 4026 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4030 4027
4031 4028 src_node = msg->rsmipc_hdr.rsmipc_src;
4032 4029
4033 4030 if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4034 4031 /* This is for an import segment */
4035 4032 uint_t hashval = rsmhash(msg->rsmipc_key);
4036 4033
4037 4034 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4038 4035
4039 4036 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4040 4037
4041 4038 for (; p; p = p->rsmrc_next) {
4042 4039 if ((p->rsmrc_key == msg->rsmipc_key) &&
4043 4040 (p->rsmrc_node == src_node)) {
4044 4041 seg = (rsmseg_t *)p;
4045 4042 rsmseglock_acquire(seg);
4046 4043
4047 4044 atomic_inc_32(&seg->s_pollevent);
4048 4045
4049 4046 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4050 4047 pollwakeup(&seg->s_poll, POLLRDNORM);
4051 4048
4052 4049 rsmseglock_release(seg);
4053 4050 }
4054 4051 }
4055 4052
4056 4053 rw_exit(&rsm_import_segs.rsmhash_rw);
4057 4054 } else {
4058 4055 /* This is for an export segment */
4059 4056 seg = rsmexport_lookup(msg->rsmipc_key);
4060 4057 if (!seg) {
4061 4058 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4062 4059 "rsm_intr_event done: exp seg not found\n"));
4063 4060 return;
4064 4061 }
4065 4062
4066 4063 ASSERT(rsmseglock_held(seg));
4067 4064
4068 4065 atomic_inc_32(&seg->s_pollevent);
4069 4066
4070 4067 /*
4071 4068 * We must hold the segment lock here, or else the segment
4072 4069 * can be freed while pollwakeup is using it. This implies
4073 4070 * that we MUST NOT grab the segment lock during rsm_chpoll,
4074 4071 * as outlined in the chpoll(2) man page.
4075 4072 */
4076 4073 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4077 4074 pollwakeup(&seg->s_poll, POLLRDNORM);
4078 4075
4079 4076 rsmseglock_release(seg);
4080 4077 }
4081 4078
4082 4079 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4083 4080 }
4084 4081
4085 4082 /*
4086 4083 * The exporter did a republish and changed the ACL - this change is only
4087 4084 * visible to new importers.
4088 4085 */
4089 4086 static void
4090 4087 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4091 4088 rsm_permission_t perm)
4092 4089 {
4093 4090
4094 4091 rsmresource_t *p;
4095 4092 rsmseg_t *seg;
4096 4093 uint_t hashval = rsmhash(key);
4097 4094 DBG_DEFINE(category,
4098 4095 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4099 4096
4100 4097 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4101 4098
4102 4099 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4103 4100
4104 4101 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4105 4102
4106 4103 for (; p; p = p->rsmrc_next) {
4107 4104 /*
4108 4105 * find the importer and update the permission in the shared
4109 4106 * data structure. Any new importers will use the new perms
4110 4107 */
4111 4108 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4112 4109 seg = (rsmseg_t *)p;
4113 4110
4114 4111 rsmseglock_acquire(seg);
4115 4112 rsmsharelock_acquire(seg);
4116 4113 seg->s_share->rsmsi_mode = perm;
4117 4114 rsmsharelock_release(seg);
4118 4115 rsmseglock_release(seg);
4119 4116
4120 4117 break;
4121 4118 }
4122 4119 }
4123 4120
4124 4121 rw_exit(&rsm_import_segs.rsmhash_rw);
4125 4122
4126 4123 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4127 4124 }
4128 4125
4129 4126 void
4130 4127 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4131 4128 {
4132 4129 int done = 1; /* indicate all SUSPENDS have been acked */
4133 4130 list_element_t *elem;
4134 4131 DBG_DEFINE(category,
4135 4132 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4136 4133
4137 4134 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4138 4135 "rsm_suspend_complete enter\n"));
4139 4136
4140 4137 mutex_enter(&rsm_suspend_list.list_lock);
4141 4138
4142 4139 if (rsm_suspend_list.list_head == NULL) {
4143 4140 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4144 4141 "rsm_suspend_complete done: suspend_list is empty\n"));
4145 4142 mutex_exit(&rsm_suspend_list.list_lock);
4146 4143 return;
4147 4144 }
4148 4145
4149 4146 elem = rsm_suspend_list.list_head;
4150 4147 while (elem != NULL) {
4151 4148 if (elem->nodeid == src_node) {
4152 4149 /* clear the pending flag for the node */
4153 4150 elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4154 4151 elem->flags |= flag;
4155 4152 }
4156 4153
4157 4154 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4158 4155 done = 0; /* still some nodes have not yet ACKED */
4159 4156
4160 4157 elem = elem->next;
4161 4158 }
4162 4159
4163 4160 mutex_exit(&rsm_suspend_list.list_lock);
4164 4161
4165 4162 if (!done) {
4166 4163 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4167 4164 "rsm_suspend_complete done: acks pending\n"));
4168 4165 return;
4169 4166 }
4170 4167 /*
4171 4168 * Now that we are done with suspending all the remote importers
4172 4169 * time to quiesce the local exporters
4173 4170 */
4174 4171 exporter_quiesce();
4175 4172
4176 4173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4177 4174 "rsm_suspend_complete done\n"));
4178 4175 }
4179 4176
4180 4177 static void
4181 4178 exporter_quiesce()
4182 4179 {
4183 4180 int i, e;
4184 4181 rsmresource_t *current;
4185 4182 rsmseg_t *seg;
4186 4183 adapter_t *adapter;
4187 4184 DBG_DEFINE(category,
4188 4185 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4189 4186
4190 4187 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4191 4188 /*
4192 4189 * The importers send a SUSPEND_COMPLETE to the exporter node
4193 4190 * Unpublish, unbind the export segment and
4194 4191 * move the segments to the EXPORT_QUIESCED state
4195 4192 */
4196 4193
4197 4194 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4198 4195
4199 4196 for (i = 0; i < rsm_hash_size; i++) {
4200 4197 current = rsm_export_segs.bucket[i];
4201 4198 while (current != NULL) {
4202 4199 seg = (rsmseg_t *)current;
4203 4200 rsmseglock_acquire(seg);
4204 4201 if (current->rsmrc_state ==
4205 4202 RSM_STATE_EXPORT_QUIESCING) {
4206 4203 adapter = seg->s_adapter;
4207 4204 /*
4208 4205 * some local memory handles are not published
4209 4206 * check if it was published
4210 4207 */
4211 4208 if ((seg->s_acl == NULL) ||
4212 4209 (seg->s_acl[0].ae_node != my_nodeid) ||
4213 4210 (seg->s_acl[0].ae_permission != 0)) {
4214 4211
4215 4212 e = adapter->rsmpi_ops->rsm_unpublish(
4216 4213 seg->s_handle.out);
4217 4214 DBG_PRINTF((category, RSM_DEBUG,
4218 4215 "exporter_quiesce:unpub %d\n", e));
4219 4216
4220 4217 e = adapter->rsmpi_ops->rsm_seg_destroy(
4221 4218 seg->s_handle.out);
4222 4219
4223 4220 DBG_PRINTF((category, RSM_DEBUG,
4224 4221 "exporter_quiesce:destroy %d\n",
4225 4222 e));
4226 4223 }
4227 4224
4228 4225 (void) rsm_unbind_pages(seg);
4229 4226 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4230 4227 cv_broadcast(&seg->s_cv);
4231 4228 }
4232 4229 rsmseglock_release(seg);
4233 4230 current = current->rsmrc_next;
4234 4231 }
4235 4232 }
4236 4233 rw_exit(&rsm_export_segs.rsmhash_rw);
4237 4234
4238 4235 /*
4239 4236 * All the local segments we are done with the pre-del processing
4240 4237 * - time to move to PREDEL_COMPLETED.
4241 4238 */
4242 4239
4243 4240 mutex_enter(&rsm_drv_data.drv_lock);
4244 4241
4245 4242 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4246 4243
4247 4244 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4248 4245
4249 4246 cv_broadcast(&rsm_drv_data.drv_cv);
4250 4247
4251 4248 mutex_exit(&rsm_drv_data.drv_lock);
4252 4249
4253 4250 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4254 4251 }
4255 4252
4256 4253 static void
4257 4254 importer_suspend(rsm_node_id_t src_node)
4258 4255 {
4259 4256 int i;
4260 4257 int susp_flg; /* true means already suspended */
4261 4258 int num_importers;
4262 4259 rsmresource_t *p = NULL, *curp;
4263 4260 rsmhash_table_t *rhash = &rsm_import_segs;
4264 4261 rsmseg_t *seg;
4265 4262 rsmipc_request_t request;
4266 4263 DBG_DEFINE(category,
4267 4264 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4268 4265
4269 4266 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4270 4267
4271 4268 rw_enter(&rhash->rsmhash_rw, RW_READER);
4272 4269 for (i = 0; i < rsm_hash_size; i++) {
4273 4270 p = rhash->bucket[i];
4274 4271
4275 4272 /*
4276 4273 * Suspend all importers with same <node, key> pair.
4277 4274 * After the last one of the shared importers has been
4278 4275 * suspended - suspend the shared mappings/connection.
4279 4276 */
4280 4277 for (; p; p = p->rsmrc_next) {
4281 4278 rsmseg_t *first = (rsmseg_t *)p;
4282 4279 if ((first->s_node != src_node) ||
4283 4280 (first->s_state == RSM_STATE_DISCONNECT))
4284 4281 continue; /* go to next entry */
4285 4282 /*
4286 4283 * search the rest of the bucket for
4287 4284 * other siblings (imprtrs with the same key)
4288 4285 * of "first" and suspend them.
4289 4286 * All importers with same key fall in
4290 4287 * the same bucket.
4291 4288 */
4292 4289 num_importers = 0;
4293 4290 for (curp = p; curp; curp = curp->rsmrc_next) {
4294 4291 seg = (rsmseg_t *)curp;
4295 4292
4296 4293 rsmseglock_acquire(seg);
4297 4294
4298 4295 if ((seg->s_node != first->s_node) ||
4299 4296 (seg->s_key != first->s_key) ||
4300 4297 (seg->s_state == RSM_STATE_DISCONNECT)) {
4301 4298 /*
4302 4299 * either not a peer segment or its a
4303 4300 * disconnected segment - skip it
4304 4301 */
4305 4302 rsmseglock_release(seg);
4306 4303 continue;
4307 4304 }
4308 4305
4309 4306 rsmseg_suspend(seg, &susp_flg);
4310 4307
4311 4308 if (susp_flg) { /* seg already suspended */
4312 4309 rsmseglock_release(seg);
4313 4310 break; /* the inner for loop */
4314 4311 }
4315 4312
4316 4313 num_importers++;
4317 4314 rsmsharelock_acquire(seg);
4318 4315 /*
4319 4316 * we've processed all importers that are
4320 4317 * siblings of "first"
4321 4318 */
4322 4319 if (num_importers ==
4323 4320 seg->s_share->rsmsi_refcnt) {
4324 4321 rsmsharelock_release(seg);
4325 4322 rsmseglock_release(seg);
4326 4323 break;
4327 4324 }
4328 4325 rsmsharelock_release(seg);
4329 4326 rsmseglock_release(seg);
4330 4327 }
4331 4328
4332 4329 /*
4333 4330 * All the importers with the same key and
4334 4331 * nodeid as "first" have been suspended.
4335 4332 * Now suspend the shared connect/mapping.
4336 4333 * This is done only once.
4337 4334 */
4338 4335 if (!susp_flg) {
4339 4336 rsmsegshare_suspend(seg);
4340 4337 }
4341 4338 }
4342 4339 }
4343 4340
4344 4341 rw_exit(&rhash->rsmhash_rw);
4345 4342
4346 4343 /* send an ACK for SUSPEND message */
4347 4344 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4348 4345 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4349 4346
4350 4347
4351 4348 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4352 4349
4353 4350 }
4354 4351
4355 4352 static void
4356 4353 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4357 4354 {
4358 4355 int recheck_state;
4359 4356 rsmcookie_t *hdl;
4360 4357 DBG_DEFINE(category,
4361 4358 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4362 4359
4363 4360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4364 4361 "rsmseg_suspend enter: key=%u\n", seg->s_key));
4365 4362
4366 4363 *susp_flg = 0;
4367 4364
4368 4365 ASSERT(rsmseglock_held(seg));
4369 4366 /* wait if putv/getv is in progress */
4370 4367 while (seg->s_rdmacnt > 0)
4371 4368 cv_wait(&seg->s_cv, &seg->s_lock);
4372 4369
4373 4370 do {
4374 4371 recheck_state = 0;
4375 4372
4376 4373 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4377 4374 "rsmseg_suspend:segment %x state=%d\n",
4378 4375 seg->s_key, seg->s_state));
4379 4376
4380 4377 switch (seg->s_state) {
4381 4378 case RSM_STATE_NEW:
4382 4379 /* not a valid state */
4383 4380 break;
4384 4381 case RSM_STATE_CONNECTING:
4385 4382 seg->s_state = RSM_STATE_ABORT_CONNECT;
4386 4383 break;
4387 4384 case RSM_STATE_ABORT_CONNECT:
4388 4385 break;
4389 4386 case RSM_STATE_CONNECT:
4390 4387 seg->s_handle.in = NULL;
4391 4388 seg->s_state = RSM_STATE_CONN_QUIESCE;
4392 4389 break;
4393 4390 case RSM_STATE_MAPPING:
4394 4391 /* wait until segment leaves the mapping state */
4395 4392 while (seg->s_state == RSM_STATE_MAPPING)
4396 4393 cv_wait(&seg->s_cv, &seg->s_lock);
4397 4394 recheck_state = 1;
4398 4395 break;
4399 4396 case RSM_STATE_ACTIVE:
4400 4397 /* unload the mappings */
4401 4398 if (seg->s_ckl != NULL) {
4402 4399 hdl = seg->s_ckl;
4403 4400 for (; hdl != NULL; hdl = hdl->c_next) {
4404 4401 (void) devmap_unload(hdl->c_dhp,
4405 4402 hdl->c_off, hdl->c_len);
4406 4403 }
4407 4404 }
4408 4405 seg->s_mapinfo = NULL;
4409 4406 seg->s_state = RSM_STATE_MAP_QUIESCE;
4410 4407 break;
4411 4408 case RSM_STATE_CONN_QUIESCE:
4412 4409 /* FALLTHRU */
4413 4410 case RSM_STATE_MAP_QUIESCE:
4414 4411 /* rsmseg_suspend already done for seg */
4415 4412 *susp_flg = 1;
4416 4413 break;
4417 4414 case RSM_STATE_DISCONNECT:
4418 4415 break;
4419 4416 default:
4420 4417 ASSERT(0); /* invalid state */
4421 4418 }
4422 4419 } while (recheck_state);
4423 4420
4424 4421 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4425 4422 }
4426 4423
4427 4424 static void
4428 4425 rsmsegshare_suspend(rsmseg_t *seg)
4429 4426 {
4430 4427 int e;
4431 4428 adapter_t *adapter;
4432 4429 rsm_import_share_t *sharedp;
4433 4430 DBG_DEFINE(category,
4434 4431 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4435 4432
4436 4433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4437 4434 "rsmsegshare_suspend enter\n"));
4438 4435
4439 4436 rsmseglock_acquire(seg);
4440 4437 rsmsharelock_acquire(seg);
4441 4438
4442 4439 sharedp = seg->s_share;
4443 4440 adapter = seg->s_adapter;
4444 4441 switch (sharedp->rsmsi_state) {
4445 4442 case RSMSI_STATE_NEW:
4446 4443 break;
4447 4444 case RSMSI_STATE_CONNECTING:
4448 4445 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4449 4446 break;
4450 4447 case RSMSI_STATE_ABORT_CONNECT:
4451 4448 break;
4452 4449 case RSMSI_STATE_CONNECTED:
4453 4450 /* do the rsmpi disconnect */
4454 4451 if (sharedp->rsmsi_node != my_nodeid) {
4455 4452 e = adapter->rsmpi_ops->
4456 4453 rsm_disconnect(sharedp->rsmsi_handle);
4457 4454
4458 4455 DBG_PRINTF((category, RSM_DEBUG,
4459 4456 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4460 4457 sharedp->rsmsi_segid, e));
4461 4458 }
4462 4459
4463 4460 sharedp->rsmsi_handle = NULL;
4464 4461
4465 4462 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4466 4463 break;
4467 4464 case RSMSI_STATE_CONN_QUIESCE:
4468 4465 break;
4469 4466 case RSMSI_STATE_MAPPED:
4470 4467 /* do the rsmpi unmap and disconnect */
4471 4468 if (sharedp->rsmsi_node != my_nodeid) {
4472 4469 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4473 4470
4474 4471 DBG_PRINTF((category, RSM_DEBUG,
4475 4472 "rsmshare_suspend: rsmpi unmap %d\n", e));
4476 4473
4477 4474 e = adapter->rsmpi_ops->
4478 4475 rsm_disconnect(sharedp->rsmsi_handle);
4479 4476 DBG_PRINTF((category, RSM_DEBUG,
4480 4477 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4481 4478 sharedp->rsmsi_segid, e));
4482 4479 }
4483 4480
4484 4481 sharedp->rsmsi_handle = NULL;
4485 4482
4486 4483 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4487 4484 break;
4488 4485 case RSMSI_STATE_MAP_QUIESCE:
4489 4486 break;
4490 4487 case RSMSI_STATE_DISCONNECTED:
4491 4488 break;
4492 4489 default:
4493 4490 ASSERT(0); /* invalid state */
4494 4491 }
4495 4492
4496 4493 rsmsharelock_release(seg);
4497 4494 rsmseglock_release(seg);
4498 4495
4499 4496 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4500 4497 "rsmsegshare_suspend done\n"));
4501 4498 }
4502 4499
4503 4500 /*
4504 4501 * This should get called on receiving a RESUME message or from
4505 4502 * the pathmanger if the node undergoing DR dies.
4506 4503 */
4507 4504 static void
4508 4505 importer_resume(rsm_node_id_t src_node)
4509 4506 {
4510 4507 int i;
4511 4508 rsmresource_t *p = NULL;
4512 4509 rsmhash_table_t *rhash = &rsm_import_segs;
4513 4510 void *cookie;
4514 4511 DBG_DEFINE(category,
4515 4512 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4516 4513
4517 4514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4518 4515
4519 4516 rw_enter(&rhash->rsmhash_rw, RW_READER);
4520 4517
4521 4518 for (i = 0; i < rsm_hash_size; i++) {
4522 4519 p = rhash->bucket[i];
4523 4520
4524 4521 for (; p; p = p->rsmrc_next) {
4525 4522 rsmseg_t *seg = (rsmseg_t *)p;
4526 4523
4527 4524 rsmseglock_acquire(seg);
4528 4525
4529 4526 /* process only importers of node undergoing DR */
4530 4527 if (seg->s_node != src_node) {
4531 4528 rsmseglock_release(seg);
4532 4529 continue;
4533 4530 }
4534 4531
4535 4532 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4536 4533 rsmipc_request_t request;
4537 4534 /*
4538 4535 * rsmpi map/connect failed
4539 4536 * inform the exporter so that it can
4540 4537 * remove the importer.
4541 4538 */
4542 4539 request.rsmipc_hdr.rsmipc_type =
4543 4540 RSMIPC_MSG_NOTIMPORTING;
4544 4541 request.rsmipc_key = seg->s_segid;
4545 4542 request.rsmipc_segment_cookie = cookie;
4546 4543 rsmseglock_release(seg);
4547 4544 (void) rsmipc_send(seg->s_node, &request,
4548 4545 RSM_NO_REPLY);
4549 4546 } else {
4550 4547 rsmseglock_release(seg);
4551 4548 }
4552 4549 }
4553 4550 }
4554 4551
4555 4552 rw_exit(&rhash->rsmhash_rw);
4556 4553
4557 4554 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4558 4555 }
4559 4556
4560 4557 static int
4561 4558 rsmseg_resume(rsmseg_t *seg, void **cookie)
4562 4559 {
4563 4560 int e;
4564 4561 int retc;
4565 4562 off_t dev_offset;
4566 4563 size_t maplen;
4567 4564 uint_t maxprot;
4568 4565 rsm_mapinfo_t *p;
4569 4566 rsmcookie_t *hdl;
4570 4567 rsm_import_share_t *sharedp;
4571 4568 DBG_DEFINE(category,
4572 4569 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4573 4570
4574 4571 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4575 4572 "rsmseg_resume enter: key=%u\n", seg->s_key));
4576 4573
4577 4574 *cookie = NULL;
4578 4575
4579 4576 ASSERT(rsmseglock_held(seg));
4580 4577
4581 4578 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4582 4579 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4583 4580 return (RSM_SUCCESS);
4584 4581 }
4585 4582
4586 4583 sharedp = seg->s_share;
4587 4584
4588 4585 rsmsharelock_acquire(seg);
4589 4586
4590 4587 /* resume the shared connection and/or mapping */
4591 4588 retc = rsmsegshare_resume(seg);
4592 4589
4593 4590 if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4594 4591 /* shared state can either be connected or mapped */
4595 4592 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4596 4593 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4597 4594 ASSERT(retc == RSM_SUCCESS);
4598 4595 seg->s_handle.in = sharedp->rsmsi_handle;
4599 4596 rsmsharelock_release(seg);
4600 4597 seg->s_state = RSM_STATE_CONNECT;
4601 4598
4602 4599 } else { /* error in rsmpi connect during resume */
4603 4600 seg->s_handle.in = NULL;
4604 4601 seg->s_state = RSM_STATE_DISCONNECT;
4605 4602
4606 4603 sharedp->rsmsi_refcnt--;
4607 4604 cookie = (void *)sharedp->rsmsi_cookie;
4608 4605
4609 4606 if (sharedp->rsmsi_refcnt == 0) {
4610 4607 ASSERT(sharedp->rsmsi_mapcnt == 0);
4611 4608 rsmsharelock_release(seg);
4612 4609
4613 4610 /* clean up the shared data structure */
4614 4611 mutex_destroy(&sharedp->rsmsi_lock);
4615 4612 cv_destroy(&sharedp->rsmsi_cv);
4616 4613 kmem_free((void *)(sharedp),
4617 4614 sizeof (rsm_import_share_t));
4618 4615
4619 4616 } else {
4620 4617 rsmsharelock_release(seg);
4621 4618 }
4622 4619 /*
4623 4620 * The following needs to be done after any
4624 4621 * rsmsharelock calls which use seg->s_share.
4625 4622 */
4626 4623 seg->s_share = NULL;
4627 4624 }
4628 4625
4629 4626 /* signal any waiting segment */
4630 4627 cv_broadcast(&seg->s_cv);
4631 4628
4632 4629 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4633 4630 "rsmseg_resume done:state=%d\n", seg->s_state));
4634 4631 return (retc);
4635 4632 }
4636 4633
4637 4634 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4638 4635
4639 4636 /* Setup protections for remap */
4640 4637 maxprot = PROT_USER;
4641 4638 if (seg->s_mode & RSM_PERM_READ) {
4642 4639 maxprot |= PROT_READ;
4643 4640 }
4644 4641 if (seg->s_mode & RSM_PERM_WRITE) {
4645 4642 maxprot |= PROT_WRITE;
4646 4643 }
4647 4644
4648 4645 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4649 4646 /* error in rsmpi connect or map during resume */
4650 4647
4651 4648 /* remap to trash page */
4652 4649 ASSERT(seg->s_ckl != NULL);
4653 4650
4654 4651 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4655 4652 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4656 4653 remap_cookie, hdl->c_off, hdl->c_len,
4657 4654 maxprot, 0, NULL);
4658 4655
4659 4656 DBG_PRINTF((category, RSM_ERR,
4660 4657 "rsmseg_resume:remap=%d\n", e));
4661 4658 }
4662 4659
4663 4660 seg->s_handle.in = NULL;
4664 4661 seg->s_state = RSM_STATE_DISCONNECT;
4665 4662
4666 4663 sharedp->rsmsi_refcnt--;
4667 4664
4668 4665 sharedp->rsmsi_mapcnt--;
4669 4666 seg->s_mapinfo = NULL;
4670 4667
4671 4668 if (sharedp->rsmsi_refcnt == 0) {
4672 4669 ASSERT(sharedp->rsmsi_mapcnt == 0);
4673 4670 rsmsharelock_release(seg);
4674 4671
4675 4672 /* clean up the shared data structure */
4676 4673 mutex_destroy(&sharedp->rsmsi_lock);
4677 4674 cv_destroy(&sharedp->rsmsi_cv);
4678 4675 kmem_free((void *)(sharedp),
4679 4676 sizeof (rsm_import_share_t));
4680 4677
4681 4678 } else {
4682 4679 rsmsharelock_release(seg);
4683 4680 }
4684 4681 /*
4685 4682 * The following needs to be done after any
4686 4683 * rsmsharelock calls which use seg->s_share.
4687 4684 */
4688 4685 seg->s_share = NULL;
4689 4686
4690 4687 /* signal any waiting segment */
4691 4688 cv_broadcast(&seg->s_cv);
4692 4689
4693 4690 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4694 4691 "rsmseg_resume done:seg=%x,err=%d\n",
4695 4692 seg->s_key, retc));
4696 4693 return (retc);
4697 4694
4698 4695 }
4699 4696
4700 4697 seg->s_handle.in = sharedp->rsmsi_handle;
4701 4698
4702 4699 if (seg->s_node == my_nodeid) { /* loopback */
4703 4700 ASSERT(seg->s_mapinfo == NULL);
4704 4701
4705 4702 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4706 4703 e = devmap_umem_remap(hdl->c_dhp,
4707 4704 rsm_dip, seg->s_cookie,
4708 4705 hdl->c_off, hdl->c_len,
4709 4706 maxprot, 0, NULL);
4710 4707
4711 4708 DBG_PRINTF((category, RSM_ERR,
4712 4709 "rsmseg_resume:remap=%d\n", e));
4713 4710 }
4714 4711 } else { /* remote exporter */
4715 4712 /* remap to the new rsmpi maps */
4716 4713 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4717 4714
4718 4715 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4719 4716 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4720 4717 &dev_offset, &maplen);
4721 4718 e = devmap_devmem_remap(hdl->c_dhp,
4722 4719 p->dip, p->dev_register, dev_offset,
4723 4720 maplen, maxprot, 0, NULL);
4724 4721
4725 4722 DBG_PRINTF((category, RSM_ERR,
4726 4723 "rsmseg_resume:remap=%d\n", e));
4727 4724 }
4728 4725 }
4729 4726
4730 4727 rsmsharelock_release(seg);
4731 4728
4732 4729 seg->s_state = RSM_STATE_ACTIVE;
4733 4730 cv_broadcast(&seg->s_cv);
4734 4731
4735 4732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4736 4733
4737 4734 return (retc);
4738 4735 }
4739 4736
4740 4737 static int
4741 4738 rsmsegshare_resume(rsmseg_t *seg)
4742 4739 {
4743 4740 int e = RSM_SUCCESS;
4744 4741 adapter_t *adapter;
4745 4742 rsm_import_share_t *sharedp;
4746 4743 DBG_DEFINE(category,
4747 4744 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4748 4745
4749 4746 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4750 4747
4751 4748 ASSERT(rsmseglock_held(seg));
4752 4749 ASSERT(rsmsharelock_held(seg));
4753 4750
4754 4751 sharedp = seg->s_share;
4755 4752
4756 4753 /*
4757 4754 * If we are not in a xxxx_QUIESCE state that means shared
4758 4755 * connect/mapping processing has been already been done
4759 4756 * so return success.
4760 4757 */
4761 4758 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4762 4759 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4763 4760 return (RSM_SUCCESS);
4764 4761 }
4765 4762
4766 4763 adapter = seg->s_adapter;
4767 4764
4768 4765 if (sharedp->rsmsi_node != my_nodeid) {
4769 4766 rsm_addr_t hwaddr;
4770 4767 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4771 4768
4772 4769 e = adapter->rsmpi_ops->rsm_connect(
4773 4770 adapter->rsmpi_handle, hwaddr,
4774 4771 sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4775 4772
4776 4773 DBG_PRINTF((category, RSM_DEBUG,
4777 4774 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4778 4775 sharedp->rsmsi_segid, e));
4779 4776
4780 4777 if (e != RSM_SUCCESS) {
4781 4778 /* when do we send the NOT_IMPORTING message */
4782 4779 sharedp->rsmsi_handle = NULL;
4783 4780 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4784 4781 /* signal any waiting segment */
4785 4782 cv_broadcast(&sharedp->rsmsi_cv);
4786 4783 return (e);
4787 4784 }
4788 4785 }
4789 4786
4790 4787 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4791 4788 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4792 4789 /* signal any waiting segment */
4793 4790 cv_broadcast(&sharedp->rsmsi_cv);
4794 4791 return (e);
4795 4792 }
4796 4793
4797 4794 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4798 4795
4799 4796 /* do the rsmpi map of the whole segment here */
4800 4797 if (sharedp->rsmsi_node != my_nodeid) {
4801 4798 size_t mapped_len;
4802 4799 rsm_mapinfo_t *p;
4803 4800
4804 4801 /*
4805 4802 * We need to do rsmpi maps with <off, lens> identical to
4806 4803 * the old mapinfo list because the segment mapping handles
4807 4804 * dhp and such need the fragmentation of rsmpi maps to be
4808 4805 * identical to what it was during the mmap of the segment
4809 4806 */
4810 4807 p = sharedp->rsmsi_mapinfo;
4811 4808
4812 4809 while (p != NULL) {
4813 4810 mapped_len = 0;
4814 4811
4815 4812 e = adapter->rsmpi_ops->rsm_map(
4816 4813 sharedp->rsmsi_handle, p->start_offset,
4817 4814 p->individual_len, &mapped_len,
4818 4815 &p->dip, &p->dev_register, &p->dev_offset,
4819 4816 NULL, NULL);
4820 4817
4821 4818 if (e != 0) {
4822 4819 DBG_PRINTF((category, RSM_ERR,
4823 4820 "rsmsegshare_resume: rsmpi map err=%d\n",
4824 4821 e));
4825 4822 break;
4826 4823 }
4827 4824
4828 4825 if (mapped_len != p->individual_len) {
4829 4826 DBG_PRINTF((category, RSM_ERR,
4830 4827 "rsmsegshare_resume: rsmpi maplen"
4831 4828 "< reqlen=%lx\n", mapped_len));
4832 4829 e = RSMERR_BAD_LENGTH;
4833 4830 break;
4834 4831 }
4835 4832
4836 4833 p = p->next;
4837 4834
4838 4835 }
4839 4836
4840 4837
4841 4838 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4842 4839 int err;
4843 4840 /* Check if this is the first rsm_map */
4844 4841 if (p != sharedp->rsmsi_mapinfo) {
4845 4842 /*
4846 4843 * A single rsm_unmap undoes multiple rsm_maps.
4847 4844 */
4848 4845 (void) seg->s_adapter->rsmpi_ops->
4849 4846 rsm_unmap(sharedp->rsmsi_handle);
4850 4847 }
4851 4848
4852 4849 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4853 4850 sharedp->rsmsi_mapinfo = NULL;
4854 4851
4855 4852 err = adapter->rsmpi_ops->
4856 4853 rsm_disconnect(sharedp->rsmsi_handle);
4857 4854
4858 4855 DBG_PRINTF((category, RSM_DEBUG,
4859 4856 "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4860 4857 sharedp->rsmsi_segid, err));
4861 4858
4862 4859 sharedp->rsmsi_handle = NULL;
4863 4860 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4864 4861
4865 4862 /* signal the waiting segments */
4866 4863 cv_broadcast(&sharedp->rsmsi_cv);
4867 4864 DBG_PRINTF((category, RSM_DEBUG,
4868 4865 "rsmsegshare_resume done: rsmpi map err\n"));
4869 4866 return (e);
4870 4867 }
4871 4868 }
4872 4869
4873 4870 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4874 4871
4875 4872 /* signal any waiting segment */
4876 4873 cv_broadcast(&sharedp->rsmsi_cv);
4877 4874
4878 4875 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4879 4876
4880 4877 return (e);
4881 4878 }
4882 4879
4883 4880 /*
4884 4881 * this is the routine that gets called by recv_taskq which is the
4885 4882 * thread that processes messages that are flow-controlled.
4886 4883 */
4887 4884 static void
4888 4885 rsm_intr_proc_deferred(void *arg)
4889 4886 {
4890 4887 path_t *path = (path_t *)arg;
4891 4888 rsmipc_request_t *msg;
4892 4889 rsmipc_msghdr_t *msghdr;
4893 4890 rsm_node_id_t src_node;
4894 4891 msgbuf_elem_t *head;
4895 4892 int e;
4896 4893 DBG_DEFINE(category,
4897 4894 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4898 4895
4899 4896 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4900 4897 "rsm_intr_proc_deferred enter\n"));
4901 4898
4902 4899 mutex_enter(&path->mutex);
4903 4900
4904 4901 /* use the head of the msgbuf_queue */
4905 4902 head = rsmka_gethead_msgbuf(path);
4906 4903
4907 4904 mutex_exit(&path->mutex);
4908 4905
4909 4906 msg = (rsmipc_request_t *)&(head->msg);
4910 4907 msghdr = (rsmipc_msghdr_t *)msg;
4911 4908
4912 4909 src_node = msghdr->rsmipc_src;
4913 4910
4914 4911 /*
4915 4912 * messages that need to send a reply should check the message version
4916 4913 * before processing the message. And all messages that need to
4917 4914 * send a reply should be processed here by the worker thread.
4918 4915 */
4919 4916 switch (msghdr->rsmipc_type) {
4920 4917 case RSMIPC_MSG_SEGCONNECT:
4921 4918 if (msghdr->rsmipc_version != RSM_VERSION) {
4922 4919 rsmipc_reply_t reply;
4923 4920 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4924 4921 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4925 4922 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4926 4923 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4927 4924 } else {
4928 4925 rsm_intr_segconnect(src_node, msg);
4929 4926 }
4930 4927 break;
4931 4928 case RSMIPC_MSG_DISCONNECT:
4932 4929 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4933 4930 break;
4934 4931 case RSMIPC_MSG_SUSPEND:
4935 4932 importer_suspend(src_node);
4936 4933 break;
4937 4934 case RSMIPC_MSG_SUSPEND_DONE:
4938 4935 rsm_suspend_complete(src_node, 0);
4939 4936 break;
4940 4937 case RSMIPC_MSG_RESUME:
4941 4938 importer_resume(src_node);
4942 4939 break;
4943 4940 default:
4944 4941 ASSERT(0);
4945 4942 }
4946 4943
4947 4944 mutex_enter(&path->mutex);
4948 4945
4949 4946 rsmka_dequeue_msgbuf(path);
4950 4947
4951 4948 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4952 4949 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4953 4950 path->procmsg_cnt++;
4954 4951
4955 4952 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4956 4953
4957 4954 /* No need to send credits if path is going down */
4958 4955 if ((path->state == RSMKA_PATH_ACTIVE) &&
4959 4956 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4960 4957 /*
4961 4958 * send credits and reset procmsg_cnt if success otherwise
4962 4959 * credits will be sent after processing the next message
4963 4960 */
4964 4961 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4965 4962 if (e == 0)
4966 4963 path->procmsg_cnt = 0;
4967 4964 else
4968 4965 DBG_PRINTF((category, RSM_ERR,
4969 4966 "rsm_intr_proc_deferred:send credits err=%d\n", e));
4970 4967 }
4971 4968
4972 4969 /*
4973 4970 * decrement the path refcnt since we incremented it in
4974 4971 * rsm_intr_callback_dispatch
4975 4972 */
4976 4973 PATH_RELE_NOLOCK(path);
4977 4974
4978 4975 mutex_exit(&path->mutex);
4979 4976
4980 4977 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4981 4978 "rsm_intr_proc_deferred done\n"));
4982 4979 }
4983 4980
4984 4981 /*
4985 4982 * Flow-controlled messages are enqueued and dispatched onto a taskq here
4986 4983 */
4987 4984 static void
4988 4985 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4989 4986 rsm_intr_hand_arg_t arg)
4990 4987 {
4991 4988 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
4992 4989 path_t *path;
4993 4990 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4994 4991 DBG_DEFINE(category,
4995 4992 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4996 4993
4997 4994 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4998 4995 "rsm_intr_callback_dispatch enter\n"));
4999 4996 ASSERT(data && hdlr_argp);
5000 4997
5001 4998 /* look up the path - incr the path refcnt */
5002 4999 path = rsm_find_path(hdlr_argp->adapter_name,
5003 5000 hdlr_argp->adapter_instance, src_hwaddr);
5004 5001
5005 5002 /* the path has been removed - drop this message */
5006 5003 if (path == NULL) {
5007 5004 DBG_PRINTF((category, RSM_DEBUG,
5008 5005 "rsm_intr_callback_dispatch done: msg dropped\n"));
5009 5006 return;
5010 5007 }
5011 5008 /* the path is not active - don't accept new messages */
5012 5009 if (path->state != RSMKA_PATH_ACTIVE) {
5013 5010 PATH_RELE_NOLOCK(path);
5014 5011 mutex_exit(&path->mutex);
5015 5012 DBG_PRINTF((category, RSM_DEBUG,
5016 5013 "rsm_intr_callback_dispatch done: msg dropped"
5017 5014 " path=%lx !ACTIVE\n", path));
5018 5015 return;
5019 5016 }
5020 5017
5021 5018 /*
5022 5019 * Check if this message was sent to an older incarnation
5023 5020 * of the path/sendq.
5024 5021 */
5025 5022 if (path->local_incn != msghdr->rsmipc_incn) {
5026 5023 /* decrement the refcnt */
5027 5024 PATH_RELE_NOLOCK(path);
5028 5025 mutex_exit(&path->mutex);
5029 5026 DBG_PRINTF((category, RSM_DEBUG,
5030 5027 "rsm_intr_callback_dispatch done: old incn %lld\n",
5031 5028 msghdr->rsmipc_incn));
5032 5029 return;
5033 5030 }
5034 5031
5035 5032 /* copy and enqueue msg on the path's msgbuf queue */
5036 5033 rsmka_enqueue_msgbuf(path, data);
5037 5034
5038 5035 /*
5039 5036 * schedule task to process messages - ignore retval from
5040 5037 * task_dispatch because we sender cannot send more than
5041 5038 * what receiver can handle.
5042 5039 */
5043 5040 (void) taskq_dispatch(path->recv_taskq,
5044 5041 rsm_intr_proc_deferred, path, KM_NOSLEEP);
5045 5042
5046 5043 mutex_exit(&path->mutex);
5047 5044
5048 5045 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5049 5046 "rsm_intr_callback_dispatch done\n"));
5050 5047 }
5051 5048
5052 5049 /*
5053 5050 * This procedure is called from rsm_srv_func when a remote node creates a
5054 5051 * a send queue. This event is used as a hint that an earlier failed
5055 5052 * attempt to create a send queue to that remote node may now succeed and
5056 5053 * should be retried. Indication of an earlier failed attempt is provided
5057 5054 * by the RSMKA_SQCREATE_PENDING flag.
5058 5055 */
5059 5056 static void
5060 5057 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5061 5058 {
5062 5059 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
5063 5060 path_t *path;
5064 5061 DBG_DEFINE(category,
5065 5062 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5066 5063
5067 5064 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5068 5065 "rsm_sqcreateop_callback enter\n"));
5069 5066
5070 5067 /* look up the path - incr the path refcnt */
5071 5068 path = rsm_find_path(hdlr_argp->adapter_name,
5072 5069 hdlr_argp->adapter_instance, src_hwaddr);
5073 5070
5074 5071 if (path == NULL) {
5075 5072 DBG_PRINTF((category, RSM_DEBUG,
5076 5073 "rsm_sqcreateop_callback done: no path\n"));
5077 5074 return;
5078 5075 }
5079 5076
5080 5077 if ((path->state == RSMKA_PATH_UP) &&
5081 5078 (path->flags & RSMKA_SQCREATE_PENDING)) {
5082 5079 /*
5083 5080 * previous attempt to create sendq had failed, retry
5084 5081 * it and move to RSMKA_PATH_ACTIVE state if successful.
5085 5082 * the refcnt will be decremented in the do_deferred_work
5086 5083 */
5087 5084 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5088 5085 } else {
5089 5086 /* decrement the refcnt */
5090 5087 PATH_RELE_NOLOCK(path);
5091 5088 }
5092 5089 mutex_exit(&path->mutex);
5093 5090
5094 5091 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5095 5092 "rsm_sqcreateop_callback done\n"));
5096 5093 }
5097 5094
5098 5095 static void
5099 5096 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5100 5097 {
5101 5098 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5102 5099 rsmipc_request_t *msg = (rsmipc_request_t *)data;
5103 5100 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5104 5101 rsm_node_id_t src_node;
5105 5102 DBG_DEFINE(category,
5106 5103 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5107 5104
5108 5105 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5109 5106 "src=%d, type=%d\n", msghdr->rsmipc_src,
5110 5107 msghdr->rsmipc_type));
5111 5108
5112 5109 /*
5113 5110 * Check for the version number in the msg header. If it is not
5114 5111 * RSM_VERSION, drop the message. In the future, we need to manage
5115 5112 * incompatible version numbers in some way
5116 5113 */
5117 5114 if (msghdr->rsmipc_version != RSM_VERSION) {
5118 5115 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5119 5116 /*
5120 5117 * Drop requests that don't have a reply right here
5121 5118 * Request with reply will send a BAD_VERSION reply
5122 5119 * when they get processed by the worker thread.
5123 5120 */
5124 5121 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5125 5122 return;
5126 5123 }
5127 5124
5128 5125 }
5129 5126
5130 5127 src_node = msghdr->rsmipc_src;
5131 5128
5132 5129 switch (msghdr->rsmipc_type) {
5133 5130 case RSMIPC_MSG_SEGCONNECT:
5134 5131 case RSMIPC_MSG_DISCONNECT:
5135 5132 case RSMIPC_MSG_SUSPEND:
5136 5133 case RSMIPC_MSG_SUSPEND_DONE:
5137 5134 case RSMIPC_MSG_RESUME:
5138 5135 /*
5139 5136 * These message types are handled by a worker thread using
5140 5137 * the flow-control algorithm.
5141 5138 * Any message processing that does one or more of the
5142 5139 * following should be handled in a worker thread.
5143 5140 * - allocates resources and might sleep
5144 5141 * - makes RSMPI calls down to the interconnect driver
5145 5142 * this by defn include requests with reply.
5146 5143 * - takes a long duration of time
5147 5144 */
5148 5145 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5149 5146 break;
5150 5147 case RSMIPC_MSG_NOTIMPORTING:
5151 5148 importer_list_rm(src_node, msg->rsmipc_key,
5152 5149 msg->rsmipc_segment_cookie);
5153 5150 break;
5154 5151 case RSMIPC_MSG_SQREADY:
5155 5152 rsm_proc_sqready(data, src_hwaddr, arg);
5156 5153 break;
5157 5154 case RSMIPC_MSG_SQREADY_ACK:
5158 5155 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5159 5156 break;
5160 5157 case RSMIPC_MSG_CREDIT:
5161 5158 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5162 5159 break;
5163 5160 case RSMIPC_MSG_REPLY:
5164 5161 rsm_intr_reply(msghdr);
5165 5162 break;
5166 5163 case RSMIPC_MSG_BELL:
5167 5164 rsm_intr_event(msg);
5168 5165 break;
5169 5166 case RSMIPC_MSG_IMPORTING:
5170 5167 importer_list_add(src_node, msg->rsmipc_key,
5171 5168 msg->rsmipc_adapter_hwaddr,
5172 5169 msg->rsmipc_segment_cookie);
5173 5170 break;
5174 5171 case RSMIPC_MSG_REPUBLISH:
5175 5172 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5176 5173 break;
5177 5174 default:
5178 5175 DBG_PRINTF((category, RSM_DEBUG,
5179 5176 "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5180 5177 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5181 5178 }
5182 5179
5183 5180 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5184 5181
5185 5182 }
5186 5183
5187 5184 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5188 5185 rsm_intr_q_op_t opcode, rsm_addr_t src,
5189 5186 void *data, size_t size, rsm_intr_hand_arg_t arg)
5190 5187 {
5191 5188 DBG_DEFINE(category,
5192 5189 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5193 5190
5194 5191 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5195 5192
5196 5193 switch (opcode) {
5197 5194 case RSM_INTR_Q_OP_CREATE:
5198 5195 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5199 5196 rsm_sqcreateop_callback(src, arg);
5200 5197 break;
5201 5198 case RSM_INTR_Q_OP_DESTROY:
5202 5199 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5203 5200 break;
5204 5201 case RSM_INTR_Q_OP_RECEIVE:
5205 5202 rsm_intr_callback(data, src, arg);
5206 5203 break;
5207 5204 default:
5208 5205 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5209 5206 "rsm_srv_func: unknown opcode = %x\n", opcode));
5210 5207 }
5211 5208
5212 5209 chd = chd;
5213 5210 size = size;
5214 5211
5215 5212 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5216 5213
5217 5214 return (RSM_INTR_HAND_CLAIMED);
5218 5215 }
5219 5216
5220 5217 /* *************************** IPC slots ************************* */
5221 5218 static rsmipc_slot_t *
5222 5219 rsmipc_alloc()
5223 5220 {
5224 5221 int i;
5225 5222 rsmipc_slot_t *slot;
5226 5223 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5227 5224
5228 5225 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5229 5226
5230 5227 /* try to find a free slot, if not wait */
5231 5228 mutex_enter(&rsm_ipc.lock);
5232 5229
5233 5230 while (rsm_ipc.count == 0) {
5234 5231 rsm_ipc.wanted = 1;
5235 5232 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5236 5233 }
5237 5234
5238 5235 /* An empty slot is available, find it */
5239 5236 slot = &rsm_ipc.slots[0];
5240 5237 for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5241 5238 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5242 5239 RSMIPC_CLEAR(slot, RSMIPC_FREE);
5243 5240 break;
5244 5241 }
5245 5242 }
5246 5243
5247 5244 ASSERT(i < RSMIPC_SZ);
5248 5245 rsm_ipc.count--; /* one less is available */
5249 5246 rsm_ipc.sequence++; /* new sequence */
5250 5247
5251 5248 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5252 5249 slot->rsmipc_cookie.ic.index = (uint_t)i;
5253 5250
5254 5251 mutex_exit(&rsm_ipc.lock);
5255 5252
5256 5253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5257 5254
5258 5255 return (slot);
5259 5256 }
5260 5257
5261 5258 static void
5262 5259 rsmipc_free(rsmipc_slot_t *slot)
5263 5260 {
5264 5261 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5265 5262
5266 5263 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5267 5264
5268 5265 ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5269 5266 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5270 5267
5271 5268 mutex_enter(&rsm_ipc.lock);
5272 5269
5273 5270 RSMIPC_SET(slot, RSMIPC_FREE);
5274 5271
5275 5272 slot->rsmipc_cookie.ic.sequence = 0;
5276 5273
5277 5274 mutex_exit(&slot->rsmipc_lock);
5278 5275 rsm_ipc.count++;
5279 5276 ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5280 5277 if (rsm_ipc.wanted) {
5281 5278 rsm_ipc.wanted = 0;
5282 5279 cv_broadcast(&rsm_ipc.cv);
5283 5280 }
5284 5281
5285 5282 mutex_exit(&rsm_ipc.lock);
5286 5283
5287 5284 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5288 5285 }
5289 5286
5290 5287 static int
5291 5288 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5292 5289 {
5293 5290 int e = 0;
5294 5291 int credit_check = 0;
5295 5292 int retry_cnt = 0;
5296 5293 int min_retry_cnt = 10;
5297 5294 rsm_send_t is;
5298 5295 rsmipc_slot_t *rslot;
5299 5296 adapter_t *adapter;
5300 5297 path_t *path;
5301 5298 sendq_token_t *sendq_token;
5302 5299 sendq_token_t *used_sendq_token = NULL;
5303 5300 rsm_send_q_handle_t ipc_handle;
5304 5301 DBG_DEFINE(category,
5305 5302 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5306 5303
5307 5304 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5308 5305 dest));
5309 5306
5310 5307 /*
5311 5308 * Check if this is a local case
5312 5309 */
5313 5310 if (dest == my_nodeid) {
5314 5311 switch (req->rsmipc_hdr.rsmipc_type) {
5315 5312 case RSMIPC_MSG_SEGCONNECT:
5316 5313 reply->rsmipc_status = (short)rsmsegacl_validate(
5317 5314 req, dest, reply);
5318 5315 break;
5319 5316 case RSMIPC_MSG_BELL:
5320 5317 req->rsmipc_hdr.rsmipc_src = dest;
5321 5318 rsm_intr_event(req);
5322 5319 break;
5323 5320 case RSMIPC_MSG_IMPORTING:
5324 5321 importer_list_add(dest, req->rsmipc_key,
5325 5322 req->rsmipc_adapter_hwaddr,
5326 5323 req->rsmipc_segment_cookie);
5327 5324 break;
5328 5325 case RSMIPC_MSG_NOTIMPORTING:
5329 5326 importer_list_rm(dest, req->rsmipc_key,
5330 5327 req->rsmipc_segment_cookie);
5331 5328 break;
5332 5329 case RSMIPC_MSG_REPUBLISH:
5333 5330 importer_update(dest, req->rsmipc_key,
5334 5331 req->rsmipc_perm);
5335 5332 break;
5336 5333 case RSMIPC_MSG_SUSPEND:
5337 5334 importer_suspend(dest);
5338 5335 break;
5339 5336 case RSMIPC_MSG_SUSPEND_DONE:
5340 5337 rsm_suspend_complete(dest, 0);
5341 5338 break;
5342 5339 case RSMIPC_MSG_RESUME:
5343 5340 importer_resume(dest);
5344 5341 break;
5345 5342 default:
5346 5343 ASSERT(0);
5347 5344 }
5348 5345 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5349 5346 "rsmipc_send done\n"));
5350 5347 return (0);
5351 5348 }
5352 5349
5353 5350 if (dest >= MAX_NODES) {
5354 5351 DBG_PRINTF((category, RSM_ERR,
5355 5352 "rsm: rsmipc_send bad node number %x\n", dest));
5356 5353 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5357 5354 }
5358 5355
5359 5356 /*
5360 5357 * Oh boy! we are going remote.
5361 5358 */
5362 5359
5363 5360 /*
5364 5361 * identify if we need to have credits to send this message
5365 5362 * - only selected requests are flow controlled
5366 5363 */
5367 5364 if (req != NULL) {
5368 5365 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5369 5366 "rsmipc_send:request type=%d\n",
5370 5367 req->rsmipc_hdr.rsmipc_type));
5371 5368
5372 5369 switch (req->rsmipc_hdr.rsmipc_type) {
5373 5370 case RSMIPC_MSG_SEGCONNECT:
5374 5371 case RSMIPC_MSG_DISCONNECT:
5375 5372 case RSMIPC_MSG_IMPORTING:
5376 5373 case RSMIPC_MSG_SUSPEND:
5377 5374 case RSMIPC_MSG_SUSPEND_DONE:
5378 5375 case RSMIPC_MSG_RESUME:
5379 5376 credit_check = 1;
5380 5377 break;
5381 5378 default:
5382 5379 credit_check = 0;
5383 5380 }
5384 5381 }
5385 5382
5386 5383 again:
5387 5384 if (retry_cnt++ == min_retry_cnt) {
5388 5385 /* backoff before further retries for 10ms */
5389 5386 delay(drv_usectohz(10000));
5390 5387 retry_cnt = 0; /* reset retry_cnt */
5391 5388 }
5392 5389 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5393 5390 if (sendq_token == NULL) {
5394 5391 DBG_PRINTF((category, RSM_ERR,
5395 5392 "rsm: rsmipc_send no device to reach node %d\n", dest));
5396 5393 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5397 5394 }
5398 5395
5399 5396 if ((sendq_token == used_sendq_token) &&
5400 5397 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5401 5398 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5402 5399 rele_sendq_token(sendq_token);
5403 5400 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5404 5401 return (RSMERR_CONN_ABORTED);
5405 5402 } else
5406 5403 used_sendq_token = sendq_token;
5407 5404
5408 5405 /* lint -save -e413 */
5409 5406 path = SQ_TOKEN_TO_PATH(sendq_token);
5410 5407 adapter = path->local_adapter;
5411 5408 /* lint -restore */
5412 5409 ipc_handle = sendq_token->rsmpi_sendq_handle;
5413 5410
5414 5411 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5415 5412 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5416 5413
5417 5414 if (reply == NULL) {
5418 5415 /* Send request without ack */
5419 5416 /*
5420 5417 * Set the rsmipc_version number in the msghdr for KA
5421 5418 * communication versioning
5422 5419 */
5423 5420 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5424 5421 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5425 5422 /*
5426 5423 * remote endpoints incn should match the value in our
5427 5424 * path's remote_incn field. No need to grab any lock
5428 5425 * since we have refcnted the path in rsmka_get_sendq_token
5429 5426 */
5430 5427 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5431 5428
5432 5429 is.is_data = (void *)req;
5433 5430 is.is_size = sizeof (*req);
5434 5431 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5435 5432 is.is_wait = 0;
5436 5433
5437 5434 if (credit_check) {
5438 5435 mutex_enter(&path->mutex);
5439 5436 /*
5440 5437 * wait till we recv credits or path goes down. If path
5441 5438 * goes down rsm_send will fail and we handle the error
5442 5439 * then
5443 5440 */
5444 5441 while ((sendq_token->msgbuf_avail == 0) &&
5445 5442 (path->state == RSMKA_PATH_ACTIVE)) {
5446 5443 e = cv_wait_sig(&sendq_token->sendq_cv,
5447 5444 &path->mutex);
5448 5445 if (e == 0) {
5449 5446 mutex_exit(&path->mutex);
5450 5447 no_reply_cnt++;
5451 5448 rele_sendq_token(sendq_token);
5452 5449 DBG_PRINTF((category, RSM_DEBUG,
5453 5450 "rsmipc_send done: "
5454 5451 "cv_wait INTERRUPTED"));
5455 5452 return (RSMERR_INTERRUPTED);
5456 5453 }
5457 5454 }
5458 5455
5459 5456 /*
5460 5457 * path is not active retry on another path.
5461 5458 */
5462 5459 if (path->state != RSMKA_PATH_ACTIVE) {
5463 5460 mutex_exit(&path->mutex);
5464 5461 rele_sendq_token(sendq_token);
5465 5462 e = RSMERR_CONN_ABORTED;
5466 5463 DBG_PRINTF((category, RSM_ERR,
5467 5464 "rsm: rsmipc_send: path !ACTIVE"));
5468 5465 goto again;
5469 5466 }
5470 5467
5471 5468 ASSERT(sendq_token->msgbuf_avail > 0);
5472 5469
5473 5470 /*
5474 5471 * reserve a msgbuf
5475 5472 */
5476 5473 sendq_token->msgbuf_avail--;
5477 5474
5478 5475 mutex_exit(&path->mutex);
5479 5476
5480 5477 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5481 5478 NULL);
5482 5479
5483 5480 if (e != RSM_SUCCESS) {
5484 5481 mutex_enter(&path->mutex);
5485 5482 /*
5486 5483 * release the reserved msgbuf since
5487 5484 * the send failed
5488 5485 */
5489 5486 sendq_token->msgbuf_avail++;
5490 5487 cv_broadcast(&sendq_token->sendq_cv);
5491 5488 mutex_exit(&path->mutex);
5492 5489 }
5493 5490 } else
5494 5491 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5495 5492 NULL);
5496 5493
5497 5494 no_reply_cnt++;
5498 5495 rele_sendq_token(sendq_token);
5499 5496 if (e != RSM_SUCCESS) {
5500 5497 DBG_PRINTF((category, RSM_ERR,
5501 5498 "rsm: rsmipc_send no reply send"
5502 5499 " err = %d no reply count = %d\n",
5503 5500 e, no_reply_cnt));
5504 5501 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5505 5502 e != RSMERR_BAD_BARRIER_HNDL);
5506 5503 atomic_inc_64(&rsm_ipcsend_errcnt);
5507 5504 goto again;
5508 5505 } else {
5509 5506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5510 5507 "rsmipc_send done\n"));
5511 5508 return (e);
5512 5509 }
5513 5510
5514 5511 }
5515 5512
5516 5513 if (req == NULL) {
5517 5514 /* Send reply - No flow control is done for reply */
5518 5515 /*
5519 5516 * Set the version in the msg header for KA communication
5520 5517 * versioning
5521 5518 */
5522 5519 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5523 5520 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5524 5521 /* incn number is not used for reply msgs currently */
5525 5522 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5526 5523
5527 5524 is.is_data = (void *)reply;
5528 5525 is.is_size = sizeof (*reply);
5529 5526 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5530 5527 is.is_wait = 0;
5531 5528 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5532 5529 rele_sendq_token(sendq_token);
5533 5530 if (e != RSM_SUCCESS) {
5534 5531 DBG_PRINTF((category, RSM_ERR,
5535 5532 "rsm: rsmipc_send reply send"
5536 5533 " err = %d\n", e));
5537 5534 atomic_inc_64(&rsm_ipcsend_errcnt);
5538 5535 goto again;
5539 5536 } else {
5540 5537 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5541 5538 "rsmipc_send done\n"));
5542 5539 return (e);
5543 5540 }
5544 5541 }
5545 5542
5546 5543 /* Reply needed */
5547 5544 rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5548 5545
5549 5546 mutex_enter(&rslot->rsmipc_lock);
5550 5547
5551 5548 rslot->rsmipc_data = (void *)reply;
5552 5549 RSMIPC_SET(rslot, RSMIPC_PENDING);
5553 5550
5554 5551 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5555 5552 /*
5556 5553 * Set the rsmipc_version number in the msghdr for KA
5557 5554 * communication versioning
5558 5555 */
5559 5556 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5560 5557 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5561 5558 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5562 5559 /*
5563 5560 * remote endpoints incn should match the value in our
5564 5561 * path's remote_incn field. No need to grab any lock
5565 5562 * since we have refcnted the path in rsmka_get_sendq_token
5566 5563 */
5567 5564 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5568 5565
5569 5566 is.is_data = (void *)req;
5570 5567 is.is_size = sizeof (*req);
5571 5568 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5572 5569 is.is_wait = 0;
5573 5570 if (credit_check) {
5574 5571
5575 5572 mutex_enter(&path->mutex);
5576 5573 /*
5577 5574 * wait till we recv credits or path goes down. If path
5578 5575 * goes down rsm_send will fail and we handle the error
5579 5576 * then.
5580 5577 */
5581 5578 while ((sendq_token->msgbuf_avail == 0) &&
5582 5579 (path->state == RSMKA_PATH_ACTIVE)) {
5583 5580 e = cv_wait_sig(&sendq_token->sendq_cv,
5584 5581 &path->mutex);
5585 5582 if (e == 0) {
5586 5583 mutex_exit(&path->mutex);
5587 5584 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5588 5585 rsmipc_free(rslot);
5589 5586 rele_sendq_token(sendq_token);
5590 5587 DBG_PRINTF((category, RSM_DEBUG,
5591 5588 "rsmipc_send done: "
5592 5589 "cv_wait INTERRUPTED"));
5593 5590 return (RSMERR_INTERRUPTED);
5594 5591 }
5595 5592 }
5596 5593
5597 5594 /*
5598 5595 * path is not active retry on another path.
5599 5596 */
5600 5597 if (path->state != RSMKA_PATH_ACTIVE) {
5601 5598 mutex_exit(&path->mutex);
5602 5599 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5603 5600 rsmipc_free(rslot);
5604 5601 rele_sendq_token(sendq_token);
5605 5602 e = RSMERR_CONN_ABORTED;
5606 5603 DBG_PRINTF((category, RSM_ERR,
5607 5604 "rsm: rsmipc_send: path !ACTIVE"));
5608 5605 goto again;
5609 5606 }
5610 5607
5611 5608 ASSERT(sendq_token->msgbuf_avail > 0);
5612 5609
5613 5610 /*
5614 5611 * reserve a msgbuf
5615 5612 */
5616 5613 sendq_token->msgbuf_avail--;
5617 5614
5618 5615 mutex_exit(&path->mutex);
5619 5616
5620 5617 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5621 5618 NULL);
5622 5619
5623 5620 if (e != RSM_SUCCESS) {
5624 5621 mutex_enter(&path->mutex);
5625 5622 /*
5626 5623 * release the reserved msgbuf since
5627 5624 * the send failed
5628 5625 */
5629 5626 sendq_token->msgbuf_avail++;
5630 5627 cv_broadcast(&sendq_token->sendq_cv);
5631 5628 mutex_exit(&path->mutex);
5632 5629 }
5633 5630 } else
5634 5631 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5635 5632 NULL);
5636 5633
5637 5634 if (e != RSM_SUCCESS) {
5638 5635 DBG_PRINTF((category, RSM_ERR,
5639 5636 "rsm: rsmipc_send rsmpi send err = %d\n", e));
5640 5637 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5641 5638 rsmipc_free(rslot);
5642 5639 rele_sendq_token(sendq_token);
5643 5640 atomic_inc_64(&rsm_ipcsend_errcnt);
5644 5641 goto again;
5645 5642 }
5646 5643
5647 5644 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5648 5645 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5649 5646 drv_usectohz(5000000), TR_CLOCK_TICK);
5650 5647 if (e < 0) {
5651 5648 /* timed out - retry */
5652 5649 e = RSMERR_TIMEOUT;
5653 5650 } else if (e == 0) {
5654 5651 /* signalled - return error */
5655 5652 e = RSMERR_INTERRUPTED;
5656 5653 break;
5657 5654 } else {
5658 5655 e = RSM_SUCCESS;
5659 5656 }
5660 5657 }
5661 5658
5662 5659 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5663 5660 rsmipc_free(rslot);
5664 5661 rele_sendq_token(sendq_token);
5665 5662
5666 5663 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5667 5664 return (e);
5668 5665 }
5669 5666
5670 5667 static int
5671 5668 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie)
5672 5669 {
5673 5670 rsmipc_request_t request;
5674 5671
5675 5672 /*
5676 5673 * inform the exporter to delete this importer
5677 5674 */
5678 5675 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5679 5676 request.rsmipc_key = segid;
5680 5677 request.rsmipc_segment_cookie = cookie;
5681 5678 return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5682 5679 }
5683 5680
5684 5681 static void
5685 5682 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5686 5683 int acl_len, rsm_permission_t default_permission)
5687 5684 {
5688 5685 int i;
5689 5686 importing_token_t *token;
5690 5687 rsmipc_request_t request;
5691 5688 republish_token_t *republish_list = NULL;
5692 5689 republish_token_t *rp;
5693 5690 rsm_permission_t permission;
5694 5691 int index;
5695 5692
5696 5693 /*
5697 5694 * send the new access mode to all the nodes that have imported
5698 5695 * this segment.
5699 5696 * If the new acl does not have a node that was present in
5700 5697 * the old acl a access permission of 0 is sent.
5701 5698 */
5702 5699
5703 5700 index = rsmhash(segid);
5704 5701
5705 5702 /*
5706 5703 * create a list of node/permissions to send the republish message
5707 5704 */
5708 5705 mutex_enter(&importer_list.lock);
5709 5706
5710 5707 token = importer_list.bucket[index];
5711 5708 while (token != NULL) {
5712 5709 if (segid == token->key) {
5713 5710 permission = default_permission;
5714 5711
5715 5712 for (i = 0; i < acl_len; i++) {
5716 5713 if (token->importing_node == acl[i].ae_node) {
5717 5714 permission = acl[i].ae_permission;
5718 5715 break;
5719 5716 }
5720 5717 }
5721 5718 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5722 5719
5723 5720 rp->key = segid;
5724 5721 rp->importing_node = token->importing_node;
5725 5722 rp->permission = permission;
5726 5723 rp->next = republish_list;
5727 5724 republish_list = rp;
5728 5725 }
5729 5726 token = token->next;
5730 5727 }
5731 5728
5732 5729 mutex_exit(&importer_list.lock);
5733 5730
5734 5731 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5735 5732 request.rsmipc_key = segid;
5736 5733
5737 5734 while (republish_list != NULL) {
5738 5735 request.rsmipc_perm = republish_list->permission;
5739 5736 (void) rsmipc_send(republish_list->importing_node,
5740 5737 &request, RSM_NO_REPLY);
5741 5738 rp = republish_list;
5742 5739 republish_list = republish_list->next;
5743 5740 kmem_free(rp, sizeof (republish_token_t));
5744 5741 }
5745 5742 }
5746 5743
5747 5744 static void
5748 5745 rsm_send_suspend()
5749 5746 {
5750 5747 int i, e;
5751 5748 rsmipc_request_t request;
5752 5749 list_element_t *tokp;
5753 5750 list_element_t *head = NULL;
5754 5751 importing_token_t *token;
5755 5752 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5756 5753 "rsm_send_suspend enter\n"));
5757 5754
5758 5755 /*
5759 5756 * create a list of node to send the suspend message
5760 5757 *
5761 5758 * Currently the whole importer list is scanned and we obtain
5762 5759 * all the nodes - this basically gets all nodes that at least
5763 5760 * import one segment from the local node.
5764 5761 *
5765 5762 * no need to grab the rsm_suspend_list lock here since we are
5766 5763 * single threaded when suspend is called.
5767 5764 */
5768 5765
5769 5766 mutex_enter(&importer_list.lock);
5770 5767 for (i = 0; i < rsm_hash_size; i++) {
5771 5768
5772 5769 token = importer_list.bucket[i];
5773 5770
5774 5771 while (token != NULL) {
5775 5772
5776 5773 tokp = head;
5777 5774
5778 5775 /*
5779 5776 * make sure that the token's node
5780 5777 * is not already on the suspend list
5781 5778 */
5782 5779 while (tokp != NULL) {
5783 5780 if (tokp->nodeid == token->importing_node) {
5784 5781 break;
5785 5782 }
5786 5783 tokp = tokp->next;
5787 5784 }
5788 5785
5789 5786 if (tokp == NULL) { /* not in suspend list */
5790 5787 tokp = kmem_zalloc(sizeof (list_element_t),
5791 5788 KM_SLEEP);
5792 5789 tokp->nodeid = token->importing_node;
5793 5790 tokp->next = head;
5794 5791 head = tokp;
5795 5792 }
5796 5793
5797 5794 token = token->next;
5798 5795 }
5799 5796 }
5800 5797 mutex_exit(&importer_list.lock);
5801 5798
5802 5799 if (head == NULL) { /* no importers so go ahead and quiesce segments */
5803 5800 exporter_quiesce();
5804 5801 return;
5805 5802 }
5806 5803
5807 5804 mutex_enter(&rsm_suspend_list.list_lock);
5808 5805 ASSERT(rsm_suspend_list.list_head == NULL);
5809 5806 /*
5810 5807 * update the suspend list righaway so that if a node dies the
5811 5808 * pathmanager can set the NODE dead flag
5812 5809 */
5813 5810 rsm_suspend_list.list_head = head;
5814 5811 mutex_exit(&rsm_suspend_list.list_lock);
5815 5812
5816 5813 tokp = head;
5817 5814
5818 5815 while (tokp != NULL) {
5819 5816 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5820 5817 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5821 5818 /*
5822 5819 * Error in rsmipc_send currently happens due to inaccessibility
5823 5820 * of the remote node.
5824 5821 */
5825 5822 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5826 5823 tokp->flags |= RSM_SUSPEND_ACKPENDING;
5827 5824 }
5828 5825
5829 5826 tokp = tokp->next;
5830 5827 }
5831 5828
5832 5829 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5833 5830 "rsm_send_suspend done\n"));
5834 5831
5835 5832 }
5836 5833
5837 5834 static void
5838 5835 rsm_send_resume()
5839 5836 {
5840 5837 rsmipc_request_t request;
5841 5838 list_element_t *elem, *head;
5842 5839
5843 5840 /*
5844 5841 * save the suspend list so that we know where to send
5845 5842 * the resume messages and make the suspend list head
5846 5843 * NULL.
5847 5844 */
5848 5845 mutex_enter(&rsm_suspend_list.list_lock);
5849 5846 head = rsm_suspend_list.list_head;
5850 5847 rsm_suspend_list.list_head = NULL;
5851 5848 mutex_exit(&rsm_suspend_list.list_lock);
5852 5849
5853 5850 while (head != NULL) {
5854 5851 elem = head;
5855 5852 head = head->next;
5856 5853
5857 5854 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5858 5855
5859 5856 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5860 5857
5861 5858 kmem_free((void *)elem, sizeof (list_element_t));
5862 5859
5863 5860 }
5864 5861
5865 5862 }
5866 5863
5867 5864 /*
5868 5865 * This function takes path and sends a message using the sendq
5869 5866 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5870 5867 * and RSMIPC_MSG_CREDIT are sent using this function.
5871 5868 */
5872 5869 int
5873 5870 rsmipc_send_controlmsg(path_t *path, int msgtype)
5874 5871 {
5875 5872 int e;
5876 5873 int retry_cnt = 0;
5877 5874 int min_retry_cnt = 10;
5878 5875 adapter_t *adapter;
5879 5876 rsm_send_t is;
5880 5877 rsm_send_q_handle_t ipc_handle;
5881 5878 rsmipc_controlmsg_t msg;
5882 5879 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5883 5880
5884 5881 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5885 5882 "rsmipc_send_controlmsg enter\n"));
5886 5883
5887 5884 ASSERT(MUTEX_HELD(&path->mutex));
5888 5885
5889 5886 adapter = path->local_adapter;
5890 5887
5891 5888 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5892 5889 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5893 5890 my_nodeid, adapter->hwaddr, path->remote_node,
5894 5891 path->remote_hwaddr, path->procmsg_cnt));
5895 5892
5896 5893 if (path->state != RSMKA_PATH_ACTIVE) {
5897 5894 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5898 5895 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5899 5896 return (1);
5900 5897 }
5901 5898
5902 5899 ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5903 5900
5904 5901 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5905 5902 msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5906 5903 msg.rsmipc_hdr.rsmipc_type = msgtype;
5907 5904 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5908 5905
5909 5906 if (msgtype == RSMIPC_MSG_CREDIT)
5910 5907 msg.rsmipc_credits = path->procmsg_cnt;
5911 5908
5912 5909 msg.rsmipc_local_incn = path->local_incn;
5913 5910
5914 5911 msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5915 5912 /* incr the sendq, path refcnt */
5916 5913 PATH_HOLD_NOLOCK(path);
5917 5914 SENDQ_TOKEN_HOLD(path);
5918 5915
5919 5916 do {
5920 5917 /* drop the path lock before doing the rsm_send */
5921 5918 mutex_exit(&path->mutex);
5922 5919
5923 5920 is.is_data = (void *)&msg;
5924 5921 is.is_size = sizeof (msg);
5925 5922 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5926 5923 is.is_wait = 0;
5927 5924
5928 5925 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5929 5926
5930 5927 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5931 5928 e != RSMERR_BAD_BARRIER_HNDL);
5932 5929
5933 5930 mutex_enter(&path->mutex);
5934 5931
5935 5932 if (e == RSM_SUCCESS) {
5936 5933 break;
5937 5934 }
5938 5935 /* error counter for statistics */
5939 5936 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5940 5937
5941 5938 DBG_PRINTF((category, RSM_ERR,
5942 5939 "rsmipc_send_controlmsg:rsm_send error=%d", e));
5943 5940
5944 5941 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5945 5942 (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5946 5943 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5947 5944 retry_cnt = 0;
5948 5945 }
5949 5946 } while (path->state == RSMKA_PATH_ACTIVE);
5950 5947
5951 5948 /* decrement the sendq,path refcnt that we incr before rsm_send */
5952 5949 SENDQ_TOKEN_RELE(path);
5953 5950 PATH_RELE_NOLOCK(path);
5954 5951
5955 5952 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5956 5953 "rsmipc_send_controlmsg done=%d", e));
5957 5954 return (e);
5958 5955 }
5959 5956
5960 5957 /*
5961 5958 * Called from rsm_force_unload and path_importer_disconnect. The memory
5962 5959 * mapping for the imported segment is removed and the segment is
5963 5960 * disconnected at the interconnect layer if disconnect_flag is TRUE.
5964 5961 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5965 5962 * and FALSE from rsm_rebind.
5966 5963 *
5967 5964 * When subsequent accesses cause page faulting, the dummy page is mapped
5968 5965 * to resolve the fault, and the mapping generation number is incremented
5969 5966 * so that the application can be notified on a close barrier operation.
5970 5967 *
5971 5968 * It is important to note that the caller of rsmseg_unload is responsible for
5972 5969 * acquiring the segment lock before making a call to rsmseg_unload. This is
5973 5970 * required to make the caller and rsmseg_unload thread safe. The segment lock
5974 5971 * will be released by the rsmseg_unload function.
5975 5972 */
5976 5973 void
5977 5974 rsmseg_unload(rsmseg_t *im_seg)
5978 5975 {
5979 5976 rsmcookie_t *hdl;
5980 5977 void *shared_cookie;
5981 5978 rsmipc_request_t request;
5982 5979 uint_t maxprot;
5983 5980
5984 5981 DBG_DEFINE(category,
5985 5982 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5986 5983
5987 5984 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5988 5985
5989 5986 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5990 5987
5991 5988 /* wait until segment leaves the mapping state */
5992 5989 while (im_seg->s_state == RSM_STATE_MAPPING)
5993 5990 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5994 5991 /*
5995 5992 * An unload is only necessary if the segment is connected. However,
5996 5993 * if the segment was on the import list in state RSM_STATE_CONNECTING
5997 5994 * then a connection was in progress. Change to RSM_STATE_NEW
5998 5995 * here to cause an early exit from the connection process.
5999 5996 */
6000 5997 if (im_seg->s_state == RSM_STATE_NEW) {
6001 5998 rsmseglock_release(im_seg);
6002 5999 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6003 6000 "rsmseg_unload done: RSM_STATE_NEW\n"));
6004 6001 return;
6005 6002 } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6006 6003 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6007 6004 rsmsharelock_acquire(im_seg);
6008 6005 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6009 6006 rsmsharelock_release(im_seg);
6010 6007 rsmseglock_release(im_seg);
6011 6008 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6012 6009 "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6013 6010 return;
6014 6011 }
6015 6012
6016 6013 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6017 6014 if (im_seg->s_ckl != NULL) {
6018 6015 int e;
6019 6016 /* Setup protections for remap */
6020 6017 maxprot = PROT_USER;
6021 6018 if (im_seg->s_mode & RSM_PERM_READ) {
6022 6019 maxprot |= PROT_READ;
6023 6020 }
6024 6021 if (im_seg->s_mode & RSM_PERM_WRITE) {
6025 6022 maxprot |= PROT_WRITE;
6026 6023 }
6027 6024 hdl = im_seg->s_ckl;
6028 6025 for (; hdl != NULL; hdl = hdl->c_next) {
6029 6026 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6030 6027 remap_cookie,
6031 6028 hdl->c_off, hdl->c_len,
6032 6029 maxprot, 0, NULL);
6033 6030
6034 6031 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6035 6032 "remap returns %d\n", e));
6036 6033 }
6037 6034 }
6038 6035
6039 6036 (void) rsm_closeconnection(im_seg, &shared_cookie);
6040 6037
6041 6038 if (shared_cookie != NULL) {
6042 6039 /*
6043 6040 * inform the exporting node so this import
6044 6041 * can be deleted from the list of importers.
6045 6042 */
6046 6043 request.rsmipc_hdr.rsmipc_type =
6047 6044 RSMIPC_MSG_NOTIMPORTING;
6048 6045 request.rsmipc_key = im_seg->s_segid;
6049 6046 request.rsmipc_segment_cookie = shared_cookie;
6050 6047 rsmseglock_release(im_seg);
6051 6048 (void) rsmipc_send(im_seg->s_node, &request,
6052 6049 RSM_NO_REPLY);
6053 6050 } else {
6054 6051 rsmseglock_release(im_seg);
6055 6052 }
6056 6053 }
6057 6054 else
6058 6055 rsmseglock_release(im_seg);
6059 6056
6060 6057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6061 6058
6062 6059 }
6063 6060
6064 6061 /* ****************************** Importer Calls ************************ */
6065 6062
6066 6063 static int
6067 6064 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6068 6065 {
6069 6066 int shifts = 0;
6070 6067
6071 6068 if (crgetuid(cr) != owner) {
6072 6069 shifts += 3;
6073 6070 if (!groupmember(group, cr))
6074 6071 shifts += 3;
6075 6072 }
6076 6073
6077 6074 mode &= ~(perm << shifts);
6078 6075
6079 6076 if (mode == 0)
6080 6077 return (0);
6081 6078
6082 6079 return (secpolicy_rsm_access(cr, owner, mode));
6083 6080 }
6084 6081
6085 6082
6086 6083 static int
6087 6084 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6088 6085 intptr_t dataptr, int mode)
6089 6086 {
6090 6087 int e;
6091 6088 int recheck_state = 0;
6092 6089 void *shared_cookie;
6093 6090 rsmipc_request_t request;
6094 6091 rsmipc_reply_t reply;
6095 6092 rsm_permission_t access;
6096 6093 adapter_t *adapter;
6097 6094 rsm_addr_t addr = 0;
6098 6095 rsm_import_share_t *sharedp;
6099 6096 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6100 6097
6101 6098 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6102 6099
6103 6100 adapter = rsm_getadapter(msg, mode);
6104 6101 if (adapter == NULL) {
6105 6102 DBG_PRINTF((category, RSM_ERR,
6106 6103 "rsm_connect done:ENODEV adapter=NULL\n"));
6107 6104 return (RSMERR_CTLR_NOT_PRESENT);
6108 6105 }
6109 6106
6110 6107 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6111 6108 rsmka_release_adapter(adapter);
6112 6109 DBG_PRINTF((category, RSM_ERR,
6113 6110 "rsm_connect done:ENODEV loopback\n"));
6114 6111 return (RSMERR_CTLR_NOT_PRESENT);
6115 6112 }
6116 6113
6117 6114
6118 6115 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6119 6116 ASSERT(seg->s_state == RSM_STATE_NEW);
6120 6117
6121 6118 /*
6122 6119 * Translate perm to access
6123 6120 */
6124 6121 if (msg->perm & ~RSM_PERM_RDWR) {
6125 6122 rsmka_release_adapter(adapter);
6126 6123 DBG_PRINTF((category, RSM_ERR,
6127 6124 "rsm_connect done:EINVAL invalid perms\n"));
6128 6125 return (RSMERR_BAD_PERMS);
6129 6126 }
6130 6127 access = 0;
6131 6128 if (msg->perm & RSM_PERM_READ)
6132 6129 access |= RSM_ACCESS_READ;
6133 6130 if (msg->perm & RSM_PERM_WRITE)
6134 6131 access |= RSM_ACCESS_WRITE;
6135 6132
6136 6133 seg->s_node = msg->nodeid;
6137 6134
6138 6135 /*
6139 6136 * Adding to the import list locks the segment; release the segment
6140 6137 * lock so we can get the reply for the send.
6141 6138 */
6142 6139 e = rsmimport_add(seg, msg->key);
6143 6140 if (e) {
6144 6141 rsmka_release_adapter(adapter);
6145 6142 DBG_PRINTF((category, RSM_ERR,
6146 6143 "rsm_connect done:rsmimport_add failed %d\n", e));
6147 6144 return (e);
6148 6145 }
6149 6146 seg->s_state = RSM_STATE_CONNECTING;
6150 6147
6151 6148 /*
6152 6149 * Set the s_adapter field here so as to have a valid comparison of
6153 6150 * the adapter and the s_adapter value during rsmshare_get. For
6154 6151 * any error, set s_adapter to NULL before doing a release_adapter
6155 6152 */
6156 6153 seg->s_adapter = adapter;
6157 6154
6158 6155 rsmseglock_release(seg);
6159 6156
6160 6157 /*
6161 6158 * get the pointer to the shared data structure; the
6162 6159 * shared data is locked and refcount has been incremented
6163 6160 */
6164 6161 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6165 6162
6166 6163 ASSERT(rsmsharelock_held(seg));
6167 6164
6168 6165 do {
6169 6166 /* flag indicates whether we need to recheck the state */
6170 6167 recheck_state = 0;
6171 6168 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6172 6169 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6173 6170 switch (sharedp->rsmsi_state) {
6174 6171 case RSMSI_STATE_NEW:
6175 6172 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6176 6173 break;
6177 6174 case RSMSI_STATE_CONNECTING:
6178 6175 /* FALLTHRU */
6179 6176 case RSMSI_STATE_CONN_QUIESCE:
6180 6177 /* FALLTHRU */
6181 6178 case RSMSI_STATE_MAP_QUIESCE:
6182 6179 /* wait for the state to change */
6183 6180 while ((sharedp->rsmsi_state ==
6184 6181 RSMSI_STATE_CONNECTING) ||
6185 6182 (sharedp->rsmsi_state ==
6186 6183 RSMSI_STATE_CONN_QUIESCE) ||
6187 6184 (sharedp->rsmsi_state ==
6188 6185 RSMSI_STATE_MAP_QUIESCE)) {
6189 6186 if (cv_wait_sig(&sharedp->rsmsi_cv,
6190 6187 &sharedp->rsmsi_lock) == 0) {
6191 6188 /* signalled - clean up and return */
6192 6189 rsmsharelock_release(seg);
6193 6190 rsmimport_rm(seg);
6194 6191 seg->s_adapter = NULL;
6195 6192 rsmka_release_adapter(adapter);
6196 6193 seg->s_state = RSM_STATE_NEW;
6197 6194 DBG_PRINTF((category, RSM_ERR,
6198 6195 "rsm_connect done: INTERRUPTED\n"));
6199 6196 return (RSMERR_INTERRUPTED);
6200 6197 }
6201 6198 }
6202 6199 /*
6203 6200 * the state changed, loop back and check what it is
6204 6201 */
6205 6202 recheck_state = 1;
6206 6203 break;
6207 6204 case RSMSI_STATE_ABORT_CONNECT:
6208 6205 /* exit the loop and clean up further down */
6209 6206 break;
6210 6207 case RSMSI_STATE_CONNECTED:
6211 6208 /* already connected, good - fall through */
6212 6209 case RSMSI_STATE_MAPPED:
6213 6210 /* already mapped, wow - fall through */
6214 6211 /* access validation etc is done further down */
6215 6212 break;
6216 6213 case RSMSI_STATE_DISCONNECTED:
6217 6214 /* disconnected - so reconnect now */
6218 6215 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6219 6216 break;
6220 6217 default:
6221 6218 ASSERT(0); /* Invalid State */
6222 6219 }
6223 6220 } while (recheck_state);
6224 6221
6225 6222 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6226 6223 /* we are the first to connect */
6227 6224 rsmsharelock_release(seg);
6228 6225
6229 6226 if (msg->nodeid != my_nodeid) {
6230 6227 addr = get_remote_hwaddr(adapter, msg->nodeid);
6231 6228
6232 6229 if ((int64_t)addr < 0) {
6233 6230 rsmsharelock_acquire(seg);
6234 6231 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6235 6232 RSMSI_STATE_NEW);
6236 6233 rsmsharelock_release(seg);
6237 6234 rsmimport_rm(seg);
6238 6235 seg->s_adapter = NULL;
6239 6236 rsmka_release_adapter(adapter);
6240 6237 seg->s_state = RSM_STATE_NEW;
6241 6238 DBG_PRINTF((category, RSM_ERR,
6242 6239 "rsm_connect done: hwaddr<0\n"));
6243 6240 return (RSMERR_INTERNAL_ERROR);
6244 6241 }
6245 6242 } else {
6246 6243 addr = adapter->hwaddr;
6247 6244 }
6248 6245
6249 6246 /*
6250 6247 * send request to node [src, dest, key, msgid] and get back
6251 6248 * [status, msgid, cookie]
6252 6249 */
6253 6250 request.rsmipc_key = msg->key;
6254 6251 /*
6255 6252 * we need the s_mode of the exporter so pass
6256 6253 * RSM_ACCESS_TRUSTED
6257 6254 */
6258 6255 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6259 6256 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6260 6257 request.rsmipc_adapter_hwaddr = addr;
6261 6258 request.rsmipc_segment_cookie = sharedp;
6262 6259
6263 6260 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6264 6261 if (e) {
6265 6262 rsmsharelock_acquire(seg);
6266 6263 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6267 6264 RSMSI_STATE_NEW);
6268 6265 rsmsharelock_release(seg);
6269 6266 rsmimport_rm(seg);
6270 6267 seg->s_adapter = NULL;
6271 6268 rsmka_release_adapter(adapter);
6272 6269 seg->s_state = RSM_STATE_NEW;
6273 6270 DBG_PRINTF((category, RSM_ERR,
6274 6271 "rsm_connect done:rsmipc_send failed %d\n", e));
6275 6272 return (e);
6276 6273 }
6277 6274
6278 6275 if (reply.rsmipc_status != RSM_SUCCESS) {
6279 6276 rsmsharelock_acquire(seg);
6280 6277 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6281 6278 RSMSI_STATE_NEW);
6282 6279 rsmsharelock_release(seg);
6283 6280 rsmimport_rm(seg);
6284 6281 seg->s_adapter = NULL;
6285 6282 rsmka_release_adapter(adapter);
6286 6283 seg->s_state = RSM_STATE_NEW;
6287 6284 DBG_PRINTF((category, RSM_ERR,
6288 6285 "rsm_connect done:rsmipc_send reply err %d\n",
6289 6286 reply.rsmipc_status));
6290 6287 return (reply.rsmipc_status);
6291 6288 }
6292 6289
6293 6290 rsmsharelock_acquire(seg);
6294 6291 /* store the information recvd into the shared data struct */
6295 6292 sharedp->rsmsi_mode = reply.rsmipc_mode;
6296 6293 sharedp->rsmsi_uid = reply.rsmipc_uid;
6297 6294 sharedp->rsmsi_gid = reply.rsmipc_gid;
6298 6295 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6299 6296 sharedp->rsmsi_cookie = sharedp;
6300 6297 }
6301 6298
6302 6299 rsmsharelock_release(seg);
6303 6300
6304 6301 /*
6305 6302 * Get the segment lock and check for a force disconnect
6306 6303 * from the export side which would have changed the state
6307 6304 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6308 6305 * force disconnect will be held off until the connection
6309 6306 * has completed.
6310 6307 */
6311 6308 rsmseglock_acquire(seg);
6312 6309 rsmsharelock_acquire(seg);
6313 6310 ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6314 6311 seg->s_state == RSM_STATE_ABORT_CONNECT);
6315 6312
6316 6313 shared_cookie = sharedp->rsmsi_cookie;
6317 6314
6318 6315 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6319 6316 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6320 6317 seg->s_state = RSM_STATE_NEW;
6321 6318 seg->s_adapter = NULL;
6322 6319 rsmsharelock_release(seg);
6323 6320 rsmseglock_release(seg);
6324 6321 rsmimport_rm(seg);
6325 6322 rsmka_release_adapter(adapter);
6326 6323
6327 6324 rsmsharelock_acquire(seg);
6328 6325 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6329 6326 /*
6330 6327 * set a flag indicating abort handling has been
6331 6328 * done
6332 6329 */
6333 6330 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6334 6331 rsmsharelock_release(seg);
6335 6332 /* send a message to exporter - only once */
6336 6333 (void) rsm_send_notimporting(msg->nodeid,
6337 6334 msg->key, shared_cookie);
6338 6335 rsmsharelock_acquire(seg);
6339 6336 /*
6340 6337 * wake up any waiting importers and inform that
6341 6338 * connection has been aborted
6342 6339 */
6343 6340 cv_broadcast(&sharedp->rsmsi_cv);
6344 6341 }
6345 6342 rsmsharelock_release(seg);
6346 6343
6347 6344 DBG_PRINTF((category, RSM_ERR,
6348 6345 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6349 6346 return (RSMERR_INTERRUPTED);
6350 6347 }
6351 6348
6352 6349
6353 6350 /*
6354 6351 * We need to verify that this process has access
6355 6352 */
6356 6353 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6357 6354 access & sharedp->rsmsi_mode,
6358 6355 (int)(msg->perm & RSM_PERM_RDWR), cred);
6359 6356 if (e) {
6360 6357 rsmsharelock_release(seg);
6361 6358 seg->s_state = RSM_STATE_NEW;
6362 6359 seg->s_adapter = NULL;
6363 6360 rsmseglock_release(seg);
6364 6361 rsmimport_rm(seg);
6365 6362 rsmka_release_adapter(adapter);
6366 6363 /*
6367 6364 * No need to lock segment it has been removed
6368 6365 * from the hash table
6369 6366 */
6370 6367 rsmsharelock_acquire(seg);
6371 6368 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6372 6369 rsmsharelock_release(seg);
6373 6370 /* this is the first importer */
6374 6371
6375 6372 (void) rsm_send_notimporting(msg->nodeid, msg->key,
6376 6373 shared_cookie);
6377 6374 rsmsharelock_acquire(seg);
6378 6375 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6379 6376 cv_broadcast(&sharedp->rsmsi_cv);
6380 6377 }
6381 6378 rsmsharelock_release(seg);
6382 6379
6383 6380 DBG_PRINTF((category, RSM_ERR,
6384 6381 "rsm_connect done: ipcaccess failed\n"));
6385 6382 return (RSMERR_PERM_DENIED);
6386 6383 }
6387 6384
6388 6385 /* update state and cookie */
6389 6386 seg->s_segid = sharedp->rsmsi_segid;
6390 6387 seg->s_len = sharedp->rsmsi_seglen;
6391 6388 seg->s_mode = access & sharedp->rsmsi_mode;
6392 6389 seg->s_pid = ddi_get_pid();
6393 6390 seg->s_mapinfo = NULL;
6394 6391
6395 6392 if (seg->s_node != my_nodeid) {
6396 6393 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6397 6394 e = adapter->rsmpi_ops->rsm_connect(
6398 6395 adapter->rsmpi_handle,
6399 6396 addr, seg->s_segid, &sharedp->rsmsi_handle);
6400 6397
6401 6398 if (e != RSM_SUCCESS) {
6402 6399 seg->s_state = RSM_STATE_NEW;
6403 6400 seg->s_adapter = NULL;
6404 6401 rsmsharelock_release(seg);
6405 6402 rsmseglock_release(seg);
6406 6403 rsmimport_rm(seg);
6407 6404 rsmka_release_adapter(adapter);
6408 6405 /*
6409 6406 * inform the exporter to delete this importer
6410 6407 */
6411 6408 (void) rsm_send_notimporting(msg->nodeid,
6412 6409 msg->key, shared_cookie);
6413 6410
6414 6411 /*
6415 6412 * Now inform any waiting importers to
6416 6413 * retry connect. This needs to be done
6417 6414 * after sending notimporting so that
6418 6415 * the notimporting is sent before a waiting
6419 6416 * importer sends a segconnect while retrying
6420 6417 *
6421 6418 * No need to lock segment it has been removed
6422 6419 * from the hash table
6423 6420 */
6424 6421
6425 6422 rsmsharelock_acquire(seg);
6426 6423 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6427 6424 cv_broadcast(&sharedp->rsmsi_cv);
6428 6425 rsmsharelock_release(seg);
6429 6426
6430 6427 DBG_PRINTF((category, RSM_ERR,
6431 6428 "rsm_connect error %d\n", e));
6432 6429 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6433 6430 return (
6434 6431 RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6435 6432 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6436 6433 (e == RSMERR_UNKNOWN_RSM_ADDR))
6437 6434 return (RSMERR_REMOTE_NODE_UNREACHABLE);
6438 6435 else
6439 6436 return (e);
6440 6437 }
6441 6438
6442 6439 }
6443 6440 seg->s_handle.in = sharedp->rsmsi_handle;
6444 6441
6445 6442 }
6446 6443
6447 6444 seg->s_state = RSM_STATE_CONNECT;
6448 6445
6449 6446
6450 6447 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */
6451 6448 if (bar_va) {
6452 6449 /* increment generation number on barrier page */
6453 6450 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6454 6451 /* return user off into barrier page where status will be */
6455 6452 msg->off = (int)seg->s_hdr.rsmrc_num;
6456 6453 msg->gnum = bar_va[msg->off]; /* gnum race */
6457 6454 } else {
6458 6455 msg->off = 0;
6459 6456 msg->gnum = 0; /* gnum race */
6460 6457 }
6461 6458
6462 6459 msg->len = (int)sharedp->rsmsi_seglen;
6463 6460 msg->rnum = seg->s_minor;
6464 6461 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6465 6462 rsmsharelock_release(seg);
6466 6463 rsmseglock_release(seg);
6467 6464
6468 6465 /* Return back to user the segment size & perm in case it's needed */
6469 6466
6470 6467 #ifdef _MULTI_DATAMODEL
6471 6468 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6472 6469 rsm_ioctlmsg32_t msg32;
6473 6470
6474 6471 if (msg->len > UINT_MAX)
6475 6472 msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6476 6473 else
6477 6474 msg32.len = msg->len;
6478 6475 msg32.off = msg->off;
6479 6476 msg32.perm = msg->perm;
6480 6477 msg32.gnum = msg->gnum;
6481 6478 msg32.rnum = msg->rnum;
6482 6479
6483 6480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6484 6481 "rsm_connect done\n"));
6485 6482
6486 6483 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6487 6484 sizeof (msg32), mode))
6488 6485 return (RSMERR_BAD_ADDR);
6489 6486 else
6490 6487 return (RSM_SUCCESS);
6491 6488 }
6492 6489 #endif
6493 6490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6494 6491
6495 6492 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6496 6493 mode))
6497 6494 return (RSMERR_BAD_ADDR);
6498 6495 else
6499 6496 return (RSM_SUCCESS);
6500 6497 }
6501 6498
6502 6499 static int
6503 6500 rsm_unmap(rsmseg_t *seg)
6504 6501 {
6505 6502 int err;
6506 6503 adapter_t *adapter;
6507 6504 rsm_import_share_t *sharedp;
6508 6505 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6509 6506
6510 6507 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6511 6508 "rsm_unmap enter %u\n", seg->s_segid));
6512 6509
6513 6510 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6514 6511
6515 6512 /* assert seg is locked */
6516 6513 ASSERT(rsmseglock_held(seg));
6517 6514 ASSERT(seg->s_state != RSM_STATE_MAPPING);
6518 6515
6519 6516 if ((seg->s_state != RSM_STATE_ACTIVE) &&
6520 6517 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6521 6518 /* segment unmap has already been done */
6522 6519 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6523 6520 return (RSM_SUCCESS);
6524 6521 }
6525 6522
6526 6523 sharedp = seg->s_share;
6527 6524
6528 6525 rsmsharelock_acquire(seg);
6529 6526
6530 6527 /*
6531 6528 * - shared data struct is in MAPPED or MAP_QUIESCE state
6532 6529 */
6533 6530
6534 6531 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6535 6532 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6536 6533
6537 6534 /*
6538 6535 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6539 6536 * the segment cookie list was NULL; but it is always NULL when
6540 6537 * called from rsmmap_unmap and won't be NULL when called for
6541 6538 * a force disconnect - so the check for NULL cookie list was removed
6542 6539 */
6543 6540
6544 6541 ASSERT(sharedp->rsmsi_mapcnt > 0);
6545 6542
6546 6543 sharedp->rsmsi_mapcnt--;
6547 6544
6548 6545 if (sharedp->rsmsi_mapcnt == 0) {
6549 6546 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6550 6547 /* unmap the shared RSMPI mapping */
6551 6548 adapter = seg->s_adapter;
6552 6549 if (seg->s_node != my_nodeid) {
6553 6550 ASSERT(sharedp->rsmsi_handle != NULL);
6554 6551 err = adapter->rsmpi_ops->
6555 6552 rsm_unmap(sharedp->rsmsi_handle);
6556 6553 DBG_PRINTF((category, RSM_DEBUG,
6557 6554 "rsm_unmap: rsmpi unmap %d\n", err));
6558 6555 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6559 6556 sharedp->rsmsi_mapinfo = NULL;
6560 6557 }
6561 6558 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6562 6559 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6563 6560 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6564 6561 }
6565 6562 }
6566 6563
6567 6564 rsmsharelock_release(seg);
6568 6565
6569 6566 /*
6570 6567 * The s_cookie field is used to store the cookie returned from the
6571 6568 * ddi_umem_lock when binding the pages for an export segment. This
6572 6569 * is the primary use of the s_cookie field and does not normally
6573 6570 * pertain to any importing segment except in the loopback case.
6574 6571 * For the loopback case, the import segment and export segment are
6575 6572 * on the same node, the s_cookie field of the segment structure for
6576 6573 * the importer is initialized to the s_cookie field in the exported
6577 6574 * segment during the map operation and is used during the call to
6578 6575 * devmap_umem_setup for the import mapping.
6579 6576 * Thus, during unmap, we simply need to set s_cookie to NULL to
6580 6577 * indicate that the mapping no longer exists.
6581 6578 */
6582 6579 seg->s_cookie = NULL;
6583 6580
6584 6581 seg->s_mapinfo = NULL;
6585 6582
6586 6583 if (seg->s_state == RSM_STATE_ACTIVE)
6587 6584 seg->s_state = RSM_STATE_CONNECT;
6588 6585 else
6589 6586 seg->s_state = RSM_STATE_CONN_QUIESCE;
6590 6587
6591 6588 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6592 6589
6593 6590 return (RSM_SUCCESS);
6594 6591 }
6595 6592
6596 6593 /*
6597 6594 * cookie returned here if not null indicates that it is
6598 6595 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6599 6596 * message.
6600 6597 */
6601 6598 static int
6602 6599 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6603 6600 {
6604 6601 int e;
6605 6602 adapter_t *adapter;
6606 6603 rsm_import_share_t *sharedp;
6607 6604 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6608 6605
6609 6606 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6610 6607 "rsm_closeconnection enter\n"));
6611 6608
6612 6609 *cookie = (void *)NULL;
6613 6610
6614 6611 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6615 6612
6616 6613 /* assert seg is locked */
6617 6614 ASSERT(rsmseglock_held(seg));
6618 6615
6619 6616 if (seg->s_state == RSM_STATE_DISCONNECT) {
6620 6617 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6621 6618 "rsm_closeconnection done: already disconnected\n"));
6622 6619 return (RSM_SUCCESS);
6623 6620 }
6624 6621
6625 6622 /* wait for all putv/getv ops to get done */
6626 6623 while (seg->s_rdmacnt > 0) {
6627 6624 cv_wait(&seg->s_cv, &seg->s_lock);
6628 6625 }
6629 6626
6630 6627 (void) rsm_unmap(seg);
6631 6628
6632 6629 ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6633 6630 seg->s_state == RSM_STATE_CONN_QUIESCE);
6634 6631
6635 6632 adapter = seg->s_adapter;
6636 6633 sharedp = seg->s_share;
6637 6634
6638 6635 ASSERT(sharedp != NULL);
6639 6636
6640 6637 rsmsharelock_acquire(seg);
6641 6638
6642 6639 /*
6643 6640 * Disconnect on adapter
6644 6641 *
6645 6642 * The current algorithm is stateless, I don't have to contact
6646 6643 * server when I go away. It only gives me permissions. Of course,
6647 6644 * the adapters will talk to terminate the connect.
6648 6645 *
6649 6646 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6650 6647 */
6651 6648 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6652 6649 (sharedp->rsmsi_node != my_nodeid)) {
6653 6650
6654 6651 if (sharedp->rsmsi_refcnt == 1) {
6655 6652 /* this is the last importer */
6656 6653 ASSERT(sharedp->rsmsi_mapcnt == 0);
6657 6654
6658 6655 e = adapter->rsmpi_ops->
6659 6656 rsm_disconnect(sharedp->rsmsi_handle);
6660 6657 if (e != RSM_SUCCESS) {
6661 6658 DBG_PRINTF((category, RSM_DEBUG,
6662 6659 "rsm:disconnect failed seg=%x:err=%d\n",
6663 6660 seg->s_key, e));
6664 6661 }
6665 6662 }
6666 6663 }
6667 6664
6668 6665 seg->s_handle.in = NULL;
6669 6666
6670 6667 sharedp->rsmsi_refcnt--;
6671 6668
6672 6669 if (sharedp->rsmsi_refcnt == 0) {
6673 6670 *cookie = (void *)sharedp->rsmsi_cookie;
6674 6671 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6675 6672 sharedp->rsmsi_handle = NULL;
6676 6673 rsmsharelock_release(seg);
6677 6674
6678 6675 /* clean up the shared data structure */
6679 6676 mutex_destroy(&sharedp->rsmsi_lock);
6680 6677 cv_destroy(&sharedp->rsmsi_cv);
6681 6678 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6682 6679
6683 6680 } else {
6684 6681 rsmsharelock_release(seg);
6685 6682 }
6686 6683
6687 6684 /* increment generation number on barrier page */
6688 6685 if (bar_va) {
6689 6686 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6690 6687 }
6691 6688
6692 6689 /*
6693 6690 * The following needs to be done after any
6694 6691 * rsmsharelock calls which use seg->s_share.
6695 6692 */
6696 6693 seg->s_share = NULL;
6697 6694
6698 6695 seg->s_state = RSM_STATE_DISCONNECT;
6699 6696 /* signal anyone waiting in the CONN_QUIESCE state */
6700 6697 cv_broadcast(&seg->s_cv);
6701 6698
6702 6699 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6703 6700 "rsm_closeconnection done\n"));
6704 6701
6705 6702 return (RSM_SUCCESS);
6706 6703 }
6707 6704
6708 6705 int
6709 6706 rsm_disconnect(rsmseg_t *seg)
6710 6707 {
6711 6708 rsmipc_request_t request;
6712 6709 void *shared_cookie;
6713 6710 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6714 6711
6715 6712 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6716 6713
6717 6714 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6718 6715
6719 6716 /* assert seg isn't locked */
6720 6717 ASSERT(!rsmseglock_held(seg));
6721 6718
6722 6719
6723 6720 /* Remove segment from imported list */
6724 6721 rsmimport_rm(seg);
6725 6722
6726 6723 /* acquire the segment */
6727 6724 rsmseglock_acquire(seg);
6728 6725
6729 6726 /* wait until segment leaves the mapping state */
6730 6727 while (seg->s_state == RSM_STATE_MAPPING)
6731 6728 cv_wait(&seg->s_cv, &seg->s_lock);
6732 6729
6733 6730 if (seg->s_state == RSM_STATE_DISCONNECT) {
6734 6731 seg->s_state = RSM_STATE_NEW;
6735 6732 rsmseglock_release(seg);
6736 6733 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6737 6734 "rsm_disconnect done: already disconnected\n"));
6738 6735 return (RSM_SUCCESS);
6739 6736 }
6740 6737
6741 6738 (void) rsm_closeconnection(seg, &shared_cookie);
6742 6739
6743 6740 /* update state */
6744 6741 seg->s_state = RSM_STATE_NEW;
6745 6742
6746 6743 if (shared_cookie != NULL) {
6747 6744 /*
6748 6745 * This is the last importer so inform the exporting node
6749 6746 * so this import can be deleted from the list of importers.
6750 6747 */
6751 6748 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6752 6749 request.rsmipc_key = seg->s_segid;
6753 6750 request.rsmipc_segment_cookie = shared_cookie;
6754 6751 rsmseglock_release(seg);
|
↓ open down ↓ |
3094 lines elided |
↑ open up ↑ |
6755 6752 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6756 6753 } else {
6757 6754 rsmseglock_release(seg);
6758 6755 }
6759 6756
6760 6757 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6761 6758
6762 6759 return (DDI_SUCCESS);
6763 6760 }
6764 6761
6765 -/*ARGSUSED*/
6766 6762 static int
6767 6763 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6768 6764 struct pollhead **phpp)
6769 6765 {
6770 6766 minor_t rnum;
6771 6767 rsmresource_t *res;
6772 6768 rsmseg_t *seg;
6773 6769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6774 6770
6775 6771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6776 6772
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
6777 6773 /* find minor, no lock */
6778 6774 rnum = getminor(dev);
6779 6775 res = rsmresource_lookup(rnum, RSM_NOLOCK);
6780 6776
6781 6777 /* poll is supported only for export/import segments */
6782 6778 if ((res == NULL) || (res == RSMRC_RESERVED) ||
6783 6779 (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6784 6780 return (ENXIO);
6785 6781 }
6786 6782
6787 - *reventsp = 0;
6788 -
6789 6783 /*
6790 6784 * An exported segment must be in state RSM_STATE_EXPORT; an
6791 6785 * imported segment must be in state RSM_STATE_ACTIVE.
6792 6786 */
6793 6787 seg = (rsmseg_t *)res;
6794 6788
6795 6789 if (seg->s_pollevent) {
6796 6790 *reventsp = POLLRDNORM;
6797 - } else if (!anyyet) {
6791 + } else {
6792 + *reventsp = 0;
6793 + }
6794 +
6795 + if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
6798 6796 /* cannot take segment lock here */
6799 6797 *phpp = &seg->s_poll;
6800 6798 seg->s_pollflag |= RSM_SEGMENT_POLL;
6801 6799 }
6802 6800 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6803 6801 return (0);
6804 6802 }
6805 6803
6806 6804
6807 6805
6808 6806 /* ************************* IOCTL Commands ********************* */
6809 6807
6810 6808 static rsmseg_t *
6811 6809 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6812 6810 rsm_resource_type_t type)
6813 6811 {
6814 6812 /* get segment from resource handle */
6815 6813 rsmseg_t *seg;
6816 6814 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6817 6815
6818 6816 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6819 6817
6820 6818
6821 6819 if (res != RSMRC_RESERVED) {
6822 6820 seg = (rsmseg_t *)res;
6823 6821 } else {
6824 6822 /* Allocate segment now and bind it */
6825 6823 seg = rsmseg_alloc(rnum, credp);
6826 6824
6827 6825 /*
6828 6826 * if DR pre-processing is going on or DR is in progress
6829 6827 * then the new export segments should be in the NEW_QSCD state
6830 6828 */
6831 6829 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6832 6830 mutex_enter(&rsm_drv_data.drv_lock);
6833 6831 if ((rsm_drv_data.drv_state ==
6834 6832 RSM_DRV_PREDEL_STARTED) ||
6835 6833 (rsm_drv_data.drv_state ==
6836 6834 RSM_DRV_PREDEL_COMPLETED) ||
6837 6835 (rsm_drv_data.drv_state ==
6838 6836 RSM_DRV_DR_IN_PROGRESS)) {
6839 6837 seg->s_state = RSM_STATE_NEW_QUIESCED;
6840 6838 }
6841 6839 mutex_exit(&rsm_drv_data.drv_lock);
6842 6840 }
6843 6841
6844 6842 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6845 6843 }
6846 6844
6847 6845 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6848 6846
6849 6847 return (seg);
6850 6848 }
6851 6849
6852 6850 static int
6853 6851 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6854 6852 int mode, cred_t *credp)
6855 6853 {
6856 6854 int error;
6857 6855 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6858 6856
6859 6857 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6860 6858
6861 6859 arg = arg;
6862 6860 credp = credp;
6863 6861
6864 6862 ASSERT(seg != NULL);
6865 6863
6866 6864 switch (cmd) {
6867 6865 case RSM_IOCTL_BIND:
6868 6866 error = rsm_bind(seg, msg, arg, mode);
6869 6867 break;
6870 6868 case RSM_IOCTL_REBIND:
6871 6869 error = rsm_rebind(seg, msg);
6872 6870 break;
6873 6871 case RSM_IOCTL_UNBIND:
6874 6872 error = ENOTSUP;
6875 6873 break;
6876 6874 case RSM_IOCTL_PUBLISH:
6877 6875 error = rsm_publish(seg, msg, arg, mode);
6878 6876 break;
6879 6877 case RSM_IOCTL_REPUBLISH:
6880 6878 error = rsm_republish(seg, msg, mode);
6881 6879 break;
6882 6880 case RSM_IOCTL_UNPUBLISH:
6883 6881 error = rsm_unpublish(seg, 1);
6884 6882 break;
6885 6883 default:
6886 6884 error = EINVAL;
6887 6885 break;
6888 6886 }
6889 6887
6890 6888 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6891 6889 error));
6892 6890
6893 6891 return (error);
6894 6892 }
6895 6893 static int
6896 6894 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6897 6895 int mode, cred_t *credp)
6898 6896 {
6899 6897 int error;
6900 6898 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6901 6899
6902 6900 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6903 6901
6904 6902 ASSERT(seg);
6905 6903
6906 6904 switch (cmd) {
6907 6905 case RSM_IOCTL_CONNECT:
6908 6906 error = rsm_connect(seg, msg, credp, arg, mode);
6909 6907 break;
6910 6908 default:
6911 6909 error = EINVAL;
6912 6910 break;
6913 6911 }
6914 6912
6915 6913 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6916 6914 error));
6917 6915 return (error);
6918 6916 }
6919 6917
6920 6918 static int
6921 6919 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6922 6920 int mode)
6923 6921 {
6924 6922 int e;
6925 6923 adapter_t *adapter;
6926 6924 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6927 6925
6928 6926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6929 6927
6930 6928
6931 6929 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6932 6930 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6933 6931 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6934 6932 return (RSMERR_CONN_ABORTED);
6935 6933 } else if (seg->s_node == my_nodeid) {
6936 6934 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6937 6935 "rsmbar_ioctl done: loopback\n"));
6938 6936 return (RSM_SUCCESS);
6939 6937 }
6940 6938
6941 6939 adapter = seg->s_adapter;
6942 6940
6943 6941 switch (cmd) {
6944 6942 case RSM_IOCTL_BAR_CHECK:
6945 6943 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6946 6944 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6947 6945 return (bar_va ? RSM_SUCCESS : EINVAL);
6948 6946 case RSM_IOCTL_BAR_OPEN:
6949 6947 e = adapter->rsmpi_ops->
6950 6948 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6951 6949 break;
6952 6950 case RSM_IOCTL_BAR_ORDER:
6953 6951 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6954 6952 break;
6955 6953 case RSM_IOCTL_BAR_CLOSE:
6956 6954 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6957 6955 break;
6958 6956 default:
6959 6957 e = EINVAL;
6960 6958 break;
6961 6959 }
6962 6960
6963 6961 if (e == RSM_SUCCESS) {
6964 6962 #ifdef _MULTI_DATAMODEL
6965 6963 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6966 6964 rsm_ioctlmsg32_t msg32;
6967 6965 int i;
6968 6966
6969 6967 for (i = 0; i < 4; i++) {
6970 6968 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6971 6969 }
6972 6970
6973 6971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6974 6972 "rsmbar_ioctl done\n"));
6975 6973 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6976 6974 sizeof (msg32), mode))
6977 6975 return (RSMERR_BAD_ADDR);
6978 6976 else
6979 6977 return (RSM_SUCCESS);
6980 6978 }
6981 6979 #endif
6982 6980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6983 6981 "rsmbar_ioctl done\n"));
6984 6982 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6985 6983 sizeof (*msg), mode))
6986 6984 return (RSMERR_BAD_ADDR);
6987 6985 else
6988 6986 return (RSM_SUCCESS);
6989 6987 }
6990 6988
6991 6989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6992 6990 "rsmbar_ioctl done: error=%d\n", e));
6993 6991
6994 6992 return (e);
6995 6993 }
6996 6994
6997 6995 /*
6998 6996 * Ring the doorbell of the export segment to which this segment is
6999 6997 * connected.
7000 6998 */
7001 6999 static int
7002 7000 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7003 7001 {
7004 7002 int e = 0;
7005 7003 rsmipc_request_t request;
7006 7004
7007 7005 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7008 7006
7009 7007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7010 7008
7011 7009 request.rsmipc_key = seg->s_segid;
7012 7010 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7013 7011 request.rsmipc_segment_cookie = NULL;
7014 7012 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7015 7013
7016 7014 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7017 7015 "exportbell_ioctl done: %d\n", e));
7018 7016
7019 7017 return (e);
7020 7018 }
7021 7019
7022 7020 /*
7023 7021 * Ring the doorbells of all segments importing this segment
7024 7022 */
7025 7023 static int
7026 7024 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7027 7025 {
7028 7026 importing_token_t *token = NULL;
7029 7027 rsmipc_request_t request;
7030 7028 int index;
7031 7029
7032 7030 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7033 7031
7034 7032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7035 7033
7036 7034 ASSERT(seg->s_state != RSM_STATE_NEW &&
7037 7035 seg->s_state != RSM_STATE_NEW_QUIESCED);
7038 7036
7039 7037 request.rsmipc_key = seg->s_segid;
7040 7038 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7041 7039
7042 7040 index = rsmhash(seg->s_segid);
7043 7041
7044 7042 token = importer_list.bucket[index];
7045 7043
7046 7044 while (token != NULL) {
7047 7045 if (seg->s_key == token->key) {
7048 7046 request.rsmipc_segment_cookie =
7049 7047 token->import_segment_cookie;
7050 7048 (void) rsmipc_send(token->importing_node,
7051 7049 &request, RSM_NO_REPLY);
7052 7050 }
7053 7051 token = token->next;
7054 7052 }
7055 7053
7056 7054 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7057 7055 "importbell_ioctl done\n"));
7058 7056 return (RSM_SUCCESS);
7059 7057 }
7060 7058
7061 7059 static int
7062 7060 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7063 7061 rsm_poll_event_t **eventspp, int mode)
7064 7062 {
7065 7063 rsm_poll_event_t *evlist = NULL;
7066 7064 size_t evlistsz;
7067 7065 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7068 7066
7069 7067 #ifdef _MULTI_DATAMODEL
7070 7068 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7071 7069 int i;
7072 7070 rsm_consume_event_msg32_t cemsg32 = {0};
7073 7071 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7074 7072 rsm_poll_event32_t *evlist32;
7075 7073 size_t evlistsz32;
7076 7074
7077 7075 /* copyin the ioctl message */
7078 7076 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7079 7077 sizeof (rsm_consume_event_msg32_t), mode)) {
7080 7078 DBG_PRINTF((category, RSM_ERR,
7081 7079 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7082 7080 return (RSMERR_BAD_ADDR);
7083 7081 }
7084 7082 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7085 7083 msgp->numents = (int)cemsg32.numents;
7086 7084
7087 7085 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7088 7086 /*
7089 7087 * If numents is large alloc events list on heap otherwise
7090 7088 * use the address of array that was passed in.
7091 7089 */
7092 7090 if (msgp->numents > RSM_MAX_POLLFDS) {
7093 7091 if (msgp->numents > max_segs) { /* validate numents */
7094 7092 DBG_PRINTF((category, RSM_ERR,
7095 7093 "consumeevent_copyin: "
7096 7094 "RSMERR_BAD_ARGS_ERRORS\n"));
7097 7095 return (RSMERR_BAD_ARGS_ERRORS);
7098 7096 }
7099 7097 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7100 7098 } else {
7101 7099 evlist32 = event32;
7102 7100 }
7103 7101
7104 7102 /* copyin the seglist into the rsm_poll_event32_t array */
7105 7103 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7106 7104 evlistsz32, mode)) {
7107 7105 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7108 7106 kmem_free(evlist32, evlistsz32);
7109 7107 }
7110 7108 DBG_PRINTF((category, RSM_ERR,
7111 7109 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7112 7110 return (RSMERR_BAD_ADDR);
7113 7111 }
7114 7112
7115 7113 /* evlist and evlistsz are based on rsm_poll_event_t type */
7116 7114 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7117 7115
7118 7116 if (msgp->numents > RSM_MAX_POLLFDS) {
7119 7117 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7120 7118 *eventspp = evlist;
7121 7119 } else {
7122 7120 evlist = *eventspp;
7123 7121 }
7124 7122 /*
7125 7123 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7126 7124 * array
7127 7125 */
7128 7126 for (i = 0; i < msgp->numents; i++) {
7129 7127 evlist[i].rnum = evlist32[i].rnum;
7130 7128 evlist[i].fdsidx = evlist32[i].fdsidx;
7131 7129 evlist[i].revent = evlist32[i].revent;
7132 7130 }
7133 7131 /* free the temp 32-bit event list */
7134 7132 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7135 7133 kmem_free(evlist32, evlistsz32);
7136 7134 }
7137 7135
7138 7136 return (RSM_SUCCESS);
7139 7137 }
7140 7138 #endif
7141 7139 /* copyin the ioctl message */
7142 7140 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7143 7141 mode)) {
7144 7142 DBG_PRINTF((category, RSM_ERR,
7145 7143 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7146 7144 return (RSMERR_BAD_ADDR);
7147 7145 }
7148 7146 /*
7149 7147 * If numents is large alloc events list on heap otherwise
7150 7148 * use the address of array that was passed in.
7151 7149 */
7152 7150 if (msgp->numents > RSM_MAX_POLLFDS) {
7153 7151 if (msgp->numents > max_segs) { /* validate numents */
7154 7152 DBG_PRINTF((category, RSM_ERR,
7155 7153 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7156 7154 return (RSMERR_BAD_ARGS_ERRORS);
7157 7155 }
7158 7156 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7159 7157 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7160 7158 *eventspp = evlist;
7161 7159 }
7162 7160
7163 7161 /* copyin the seglist */
7164 7162 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7165 7163 sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7166 7164 if (evlist) {
7167 7165 kmem_free(evlist, evlistsz);
7168 7166 *eventspp = NULL;
7169 7167 }
7170 7168 DBG_PRINTF((category, RSM_ERR,
7171 7169 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7172 7170 return (RSMERR_BAD_ADDR);
7173 7171 }
7174 7172
7175 7173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7176 7174 "consumeevent_copyin done\n"));
7177 7175 return (RSM_SUCCESS);
7178 7176 }
7179 7177
7180 7178 static int
7181 7179 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7182 7180 rsm_poll_event_t *eventsp, int mode)
7183 7181 {
7184 7182 size_t evlistsz;
7185 7183 int err = RSM_SUCCESS;
7186 7184 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7187 7185
7188 7186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7189 7187 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7190 7188 msgp->numents, eventsp));
7191 7189
7192 7190 #ifdef _MULTI_DATAMODEL
7193 7191 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7194 7192 int i;
7195 7193 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7196 7194 rsm_poll_event32_t *evlist32;
7197 7195 size_t evlistsz32;
7198 7196
7199 7197 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7200 7198 if (msgp->numents > RSM_MAX_POLLFDS) {
7201 7199 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7202 7200 } else {
7203 7201 evlist32 = event32;
7204 7202 }
7205 7203
7206 7204 /*
7207 7205 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7208 7206 * array
7209 7207 */
7210 7208 for (i = 0; i < msgp->numents; i++) {
7211 7209 evlist32[i].rnum = eventsp[i].rnum;
7212 7210 evlist32[i].fdsidx = eventsp[i].fdsidx;
7213 7211 evlist32[i].revent = eventsp[i].revent;
7214 7212 }
7215 7213
7216 7214 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7217 7215 evlistsz32, mode)) {
7218 7216 err = RSMERR_BAD_ADDR;
7219 7217 }
7220 7218
7221 7219 if (msgp->numents > RSM_MAX_POLLFDS) {
7222 7220 if (evlist32) { /* free the temp 32-bit event list */
7223 7221 kmem_free(evlist32, evlistsz32);
7224 7222 }
7225 7223 /*
7226 7224 * eventsp and evlistsz are based on rsm_poll_event_t
7227 7225 * type
7228 7226 */
7229 7227 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7230 7228 /* event list on the heap and needs to be freed here */
7231 7229 if (eventsp) {
7232 7230 kmem_free(eventsp, evlistsz);
7233 7231 }
7234 7232 }
7235 7233
7236 7234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7237 7235 "consumeevent_copyout done: err=%d\n", err));
7238 7236 return (err);
7239 7237 }
7240 7238 #endif
7241 7239 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7242 7240
7243 7241 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7244 7242 mode)) {
7245 7243 err = RSMERR_BAD_ADDR;
7246 7244 }
7247 7245
7248 7246 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7249 7247 /* event list on the heap and needs to be freed here */
7250 7248 kmem_free(eventsp, evlistsz);
7251 7249 }
7252 7250
7253 7251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7254 7252 "consumeevent_copyout done: err=%d\n", err));
7255 7253 return (err);
7256 7254 }
7257 7255
7258 7256 static int
7259 7257 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7260 7258 {
7261 7259 int rc;
7262 7260 int i;
7263 7261 minor_t rnum;
7264 7262 rsm_consume_event_msg_t msg = {0};
7265 7263 rsmseg_t *seg;
7266 7264 rsm_poll_event_t *event_list;
7267 7265 rsm_poll_event_t events[RSM_MAX_POLLFDS];
7268 7266 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7269 7267
7270 7268 event_list = events;
7271 7269
7272 7270 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7273 7271 RSM_SUCCESS) {
7274 7272 return (rc);
7275 7273 }
7276 7274
7277 7275 for (i = 0; i < msg.numents; i++) {
7278 7276 rnum = event_list[i].rnum;
7279 7277 event_list[i].revent = 0;
7280 7278 /* get the segment structure */
7281 7279 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7282 7280 if (seg) {
7283 7281 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7284 7282 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7285 7283 seg));
7286 7284 if (seg->s_pollevent) {
7287 7285 /* consume the event */
7288 7286 atomic_dec_32(&seg->s_pollevent);
7289 7287 event_list[i].revent = POLLRDNORM;
7290 7288 }
7291 7289 rsmseglock_release(seg);
7292 7290 }
7293 7291 }
7294 7292
7295 7293 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7296 7294 RSM_SUCCESS) {
7297 7295 return (rc);
7298 7296 }
7299 7297
7300 7298 return (RSM_SUCCESS);
7301 7299 }
7302 7300
7303 7301 static int
7304 7302 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7305 7303 {
7306 7304 int size;
7307 7305 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7308 7306
7309 7307 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7310 7308
7311 7309 #ifdef _MULTI_DATAMODEL
7312 7310 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7313 7311 rsmka_iovec32_t *iovec32, *iovec32_base;
7314 7312 int i;
7315 7313
7316 7314 size = count * sizeof (rsmka_iovec32_t);
7317 7315 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7318 7316 if (ddi_copyin((caddr_t)user_vec,
7319 7317 (caddr_t)iovec32, size, mode)) {
7320 7318 kmem_free(iovec32, size);
7321 7319 DBG_PRINTF((category, RSM_DEBUG,
7322 7320 "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7323 7321 return (RSMERR_BAD_ADDR);
7324 7322 }
7325 7323
7326 7324 for (i = 0; i < count; i++, iovec++, iovec32++) {
7327 7325 iovec->io_type = (int)iovec32->io_type;
7328 7326 if (iovec->io_type == RSM_HANDLE_TYPE)
7329 7327 iovec->local.segid = (rsm_memseg_id_t)
7330 7328 iovec32->local;
7331 7329 else
7332 7330 iovec->local.vaddr =
7333 7331 (caddr_t)(uintptr_t)iovec32->local;
7334 7332 iovec->local_offset = (size_t)iovec32->local_offset;
7335 7333 iovec->remote_offset = (size_t)iovec32->remote_offset;
7336 7334 iovec->transfer_len = (size_t)iovec32->transfer_len;
7337 7335
7338 7336 }
7339 7337 kmem_free(iovec32_base, size);
7340 7338 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7341 7339 "iovec_copyin done\n"));
7342 7340 return (DDI_SUCCESS);
7343 7341 }
7344 7342 #endif
7345 7343
7346 7344 size = count * sizeof (rsmka_iovec_t);
7347 7345 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7348 7346 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7349 7347 "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7350 7348 return (RSMERR_BAD_ADDR);
7351 7349 }
7352 7350
7353 7351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7354 7352
7355 7353 return (DDI_SUCCESS);
7356 7354 }
7357 7355
7358 7356
7359 7357 static int
7360 7358 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7361 7359 {
7362 7360 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7363 7361
7364 7362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7365 7363
7366 7364 #ifdef _MULTI_DATAMODEL
7367 7365 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7368 7366 rsmka_scat_gath32_t sg_io32;
7369 7367
7370 7368 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7371 7369 mode)) {
7372 7370 DBG_PRINTF((category, RSM_DEBUG,
7373 7371 "sgio_copyin done: returning EFAULT\n"));
7374 7372 return (RSMERR_BAD_ADDR);
7375 7373 }
7376 7374 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7377 7375 sg_io->io_request_count = (size_t)sg_io32.io_request_count;
7378 7376 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7379 7377 sg_io->flags = (size_t)sg_io32.flags;
7380 7378 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7381 7379 (uintptr_t)sg_io32.remote_handle;
7382 7380 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7383 7381 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7384 7382 "sgio_copyin done\n"));
7385 7383 return (DDI_SUCCESS);
7386 7384 }
7387 7385 #endif
7388 7386 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7389 7387 mode)) {
7390 7388 DBG_PRINTF((category, RSM_DEBUG,
7391 7389 "sgio_copyin done: returning EFAULT\n"));
7392 7390 return (RSMERR_BAD_ADDR);
7393 7391 }
7394 7392 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7395 7393 return (DDI_SUCCESS);
7396 7394 }
7397 7395
7398 7396 static int
7399 7397 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7400 7398 {
7401 7399 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7402 7400
7403 7401 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7404 7402 "sgio_resid_copyout enter\n"));
7405 7403
7406 7404 #ifdef _MULTI_DATAMODEL
7407 7405 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7408 7406 rsmka_scat_gath32_t sg_io32;
7409 7407
7410 7408 sg_io32.io_residual_count = sg_io->io_residual_count;
7411 7409 sg_io32.flags = sg_io->flags;
7412 7410
7413 7411 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7414 7412 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7415 7413 sizeof (uint32_t), mode)) {
7416 7414
7417 7415 DBG_PRINTF((category, RSM_ERR,
7418 7416 "sgio_resid_copyout error: rescnt\n"));
7419 7417 return (RSMERR_BAD_ADDR);
7420 7418 }
7421 7419
7422 7420 if (ddi_copyout((caddr_t)&sg_io32.flags,
7423 7421 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7424 7422 sizeof (uint32_t), mode)) {
7425 7423
7426 7424 DBG_PRINTF((category, RSM_ERR,
7427 7425 "sgio_resid_copyout error: flags\n"));
7428 7426 return (RSMERR_BAD_ADDR);
7429 7427 }
7430 7428 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7431 7429 "sgio_resid_copyout done\n"));
7432 7430 return (DDI_SUCCESS);
7433 7431 }
7434 7432 #endif
7435 7433 if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7436 7434 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7437 7435 sizeof (ulong_t), mode)) {
7438 7436
7439 7437 DBG_PRINTF((category, RSM_ERR,
7440 7438 "sgio_resid_copyout error:rescnt\n"));
7441 7439 return (RSMERR_BAD_ADDR);
7442 7440 }
7443 7441
7444 7442 if (ddi_copyout((caddr_t)&sg_io->flags,
7445 7443 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7446 7444 sizeof (uint_t), mode)) {
7447 7445
7448 7446 DBG_PRINTF((category, RSM_ERR,
7449 7447 "sgio_resid_copyout error:flags\n"));
7450 7448 return (RSMERR_BAD_ADDR);
7451 7449 }
7452 7450
7453 7451 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7454 7452 return (DDI_SUCCESS);
7455 7453 }
7456 7454
7457 7455
7458 7456 static int
7459 7457 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7460 7458 {
7461 7459 rsmka_scat_gath_t sg_io;
7462 7460 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN];
7463 7461 rsmka_iovec_t *ka_iovec;
7464 7462 rsmka_iovec_t *ka_iovec_start;
7465 7463 rsmpi_scat_gath_t rsmpi_sg_io;
7466 7464 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN];
7467 7465 rsmpi_iovec_t *iovec;
7468 7466 rsmpi_iovec_t *iovec_start = NULL;
7469 7467 rsmapi_access_entry_t *acl;
7470 7468 rsmresource_t *res;
7471 7469 minor_t rnum;
7472 7470 rsmseg_t *im_seg, *ex_seg;
7473 7471 int e;
7474 7472 int error = 0;
7475 7473 uint_t i;
7476 7474 uint_t iov_proc = 0; /* num of iovecs processed */
7477 7475 size_t size = 0;
7478 7476 size_t ka_size;
7479 7477
7480 7478 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7481 7479
7482 7480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7483 7481
7484 7482 credp = credp;
7485 7483
7486 7484 /*
7487 7485 * Copyin the scatter/gather structure and build new structure
7488 7486 * for rsmpi.
7489 7487 */
7490 7488 e = sgio_copyin(arg, &sg_io, mode);
7491 7489 if (e != DDI_SUCCESS) {
7492 7490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7493 7491 "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7494 7492 return (e);
7495 7493 }
7496 7494
7497 7495 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7498 7496 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7499 7497 "rsm_iovec_ioctl done: request_count(%d) too large\n",
7500 7498 sg_io.io_request_count));
7501 7499 return (RSMERR_BAD_SGIO);
7502 7500 }
7503 7501
7504 7502 rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7505 7503 rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7506 7504 rsmpi_sg_io.io_segflg = 0;
7507 7505
7508 7506 /* Allocate memory and copyin io vector array */
7509 7507 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7510 7508 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t);
7511 7509 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7512 7510 } else {
7513 7511 ka_iovec_start = ka_iovec = ka_iovec_arr;
7514 7512 }
7515 7513 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7516 7514 sg_io.io_request_count, mode);
7517 7515 if (e != DDI_SUCCESS) {
7518 7516 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7519 7517 kmem_free(ka_iovec, ka_size);
7520 7518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7521 7519 "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7522 7520 return (e);
7523 7521 }
7524 7522
7525 7523 /* get the import segment descriptor */
7526 7524 rnum = getminor(dev);
7527 7525 res = rsmresource_lookup(rnum, RSM_LOCK);
7528 7526
7529 7527 /*
7530 7528 * The following sequence of locking may (or MAY NOT) cause a
7531 7529 * deadlock but this is currently not addressed here since the
7532 7530 * implementation will be changed to incorporate the use of
7533 7531 * reference counting for both the import and the export segments.
7534 7532 */
7535 7533
7536 7534 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7537 7535
7538 7536 im_seg = (rsmseg_t *)res;
7539 7537
7540 7538 if (im_seg == NULL) {
7541 7539 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7542 7540 kmem_free(ka_iovec, ka_size);
7543 7541 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7544 7542 "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7545 7543 return (EINVAL);
7546 7544 }
7547 7545 /* putv/getv supported is supported only on import segments */
7548 7546 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7549 7547 rsmseglock_release(im_seg);
7550 7548 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7551 7549 kmem_free(ka_iovec, ka_size);
7552 7550 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7553 7551 "rsm_iovec_ioctl done: not an import segment\n"));
7554 7552 return (EINVAL);
7555 7553 }
7556 7554
7557 7555 /*
7558 7556 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7559 7557 * as well as wait for a local DR to complete.
7560 7558 */
7561 7559 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7562 7560 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7563 7561 (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7564 7562 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7565 7563 DBG_PRINTF((category, RSM_DEBUG,
7566 7564 "rsm_iovec_ioctl done: cv_wait INTR"));
7567 7565 rsmseglock_release(im_seg);
7568 7566 return (RSMERR_INTERRUPTED);
7569 7567 }
7570 7568 }
7571 7569
7572 7570 if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7573 7571 (im_seg->s_state != RSM_STATE_ACTIVE)) {
7574 7572
7575 7573 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7576 7574 im_seg->s_state == RSM_STATE_NEW);
7577 7575
7578 7576 DBG_PRINTF((category, RSM_DEBUG,
7579 7577 "rsm_iovec_ioctl done: im_seg not conn/map"));
7580 7578 rsmseglock_release(im_seg);
7581 7579 e = RSMERR_BAD_SGIO;
7582 7580 goto out;
7583 7581 }
7584 7582
7585 7583 im_seg->s_rdmacnt++;
7586 7584 rsmseglock_release(im_seg);
7587 7585
7588 7586 /*
7589 7587 * Allocate and set up the io vector for rsmpi
7590 7588 */
7591 7589 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7592 7590 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7593 7591 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7594 7592 } else {
7595 7593 iovec_start = iovec = iovec_arr;
7596 7594 }
7597 7595
7598 7596 rsmpi_sg_io.iovec = iovec;
7599 7597 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7600 7598 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7601 7599 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7602 7600
7603 7601 if (ex_seg == NULL) {
7604 7602 e = RSMERR_BAD_SGIO;
7605 7603 break;
7606 7604 }
7607 7605 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7608 7606
7609 7607 acl = ex_seg->s_acl;
7610 7608 if (acl[0].ae_permission == 0) {
7611 7609 struct buf *xbuf;
7612 7610 dev_t sdev = 0;
7613 7611
7614 7612 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7615 7613 0, ex_seg->s_len, B_WRITE,
7616 7614 sdev, 0, NULL, DDI_UMEM_SLEEP);
7617 7615
7618 7616 ASSERT(xbuf != NULL);
7619 7617
7620 7618 iovec->local_mem.ms_type = RSM_MEM_BUF;
7621 7619 iovec->local_mem.ms_memory.bp = xbuf;
7622 7620 } else {
7623 7621 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7624 7622 iovec->local_mem.ms_memory.handle =
7625 7623 ex_seg->s_handle.out;
7626 7624 }
7627 7625 ex_seg->s_rdmacnt++; /* refcnt the handle */
7628 7626 rsmseglock_release(ex_seg);
7629 7627 } else {
7630 7628 iovec->local_mem.ms_type = RSM_MEM_VADDR;
7631 7629 iovec->local_mem.ms_memory.vr.vaddr =
7632 7630 ka_iovec->local.vaddr;
7633 7631 }
7634 7632
7635 7633 iovec->local_offset = ka_iovec->local_offset;
7636 7634 iovec->remote_handle = im_seg->s_handle.in;
7637 7635 iovec->remote_offset = ka_iovec->remote_offset;
7638 7636 iovec->transfer_length = ka_iovec->transfer_len;
7639 7637 iovec++;
7640 7638 ka_iovec++;
7641 7639 }
7642 7640
7643 7641 if (iov_proc < sg_io.io_request_count) {
7644 7642 /* error while processing handle */
7645 7643 rsmseglock_acquire(im_seg);
7646 7644 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */
7647 7645 if (im_seg->s_rdmacnt == 0) {
7648 7646 cv_broadcast(&im_seg->s_cv);
7649 7647 }
7650 7648 rsmseglock_release(im_seg);
7651 7649 goto out;
7652 7650 }
7653 7651
7654 7652 /* call rsmpi */
7655 7653 if (cmd == RSM_IOCTL_PUTV)
7656 7654 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7657 7655 im_seg->s_adapter->rsmpi_handle,
7658 7656 &rsmpi_sg_io);
7659 7657 else if (cmd == RSM_IOCTL_GETV)
7660 7658 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7661 7659 im_seg->s_adapter->rsmpi_handle,
7662 7660 &rsmpi_sg_io);
7663 7661 else {
7664 7662 e = EINVAL;
7665 7663 DBG_PRINTF((category, RSM_DEBUG,
7666 7664 "iovec_ioctl: bad command = %x\n", cmd));
7667 7665 }
7668 7666
7669 7667
7670 7668 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7671 7669 "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7672 7670
7673 7671 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7674 7672
7675 7673 /*
7676 7674 * Check for implicit signal post flag and do the signal
7677 7675 * post if needed
7678 7676 */
7679 7677 if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7680 7678 e == RSM_SUCCESS) {
7681 7679 rsmipc_request_t request;
7682 7680
7683 7681 request.rsmipc_key = im_seg->s_segid;
7684 7682 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7685 7683 request.rsmipc_segment_cookie = NULL;
7686 7684 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7687 7685 /*
7688 7686 * Reset the implicit signal post flag to 0 to indicate
7689 7687 * that the signal post has been done and need not be
7690 7688 * done in the RSMAPI library
7691 7689 */
7692 7690 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7693 7691 }
7694 7692
7695 7693 rsmseglock_acquire(im_seg);
7696 7694 im_seg->s_rdmacnt--;
7697 7695 if (im_seg->s_rdmacnt == 0) {
7698 7696 cv_broadcast(&im_seg->s_cv);
7699 7697 }
7700 7698 rsmseglock_release(im_seg);
7701 7699 error = sgio_resid_copyout(arg, &sg_io, mode);
7702 7700 out:
7703 7701 iovec = iovec_start;
7704 7702 ka_iovec = ka_iovec_start;
7705 7703 for (i = 0; i < iov_proc; i++) {
7706 7704 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7707 7705 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7708 7706
7709 7707 ASSERT(ex_seg != NULL);
7710 7708 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7711 7709
7712 7710 ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7713 7711 if (ex_seg->s_rdmacnt == 0) {
7714 7712 cv_broadcast(&ex_seg->s_cv);
7715 7713 }
7716 7714 rsmseglock_release(ex_seg);
7717 7715 }
7718 7716
7719 7717 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7720 7718
7721 7719 /*
7722 7720 * At present there is no dependency on the existence of xbufs
7723 7721 * created by ddi_umem_iosetup for each of the iovecs. So we
7724 7722 * can these xbufs here.
7725 7723 */
7726 7724 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7727 7725 freerbuf(iovec->local_mem.ms_memory.bp);
7728 7726 }
7729 7727
7730 7728 iovec++;
7731 7729 ka_iovec++;
7732 7730 }
7733 7731
7734 7732 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7735 7733 if (iovec_start)
7736 7734 kmem_free(iovec_start, size);
7737 7735 kmem_free(ka_iovec_start, ka_size);
7738 7736 }
7739 7737
7740 7738 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7741 7739 "rsm_iovec_ioctl done %d\n", e));
7742 7740 /* if RSMPI call fails return that else return copyout's retval */
7743 7741 return ((e != RSM_SUCCESS) ? e : error);
7744 7742
7745 7743 }
7746 7744
7747 7745
7748 7746 static int
7749 7747 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7750 7748 {
7751 7749 adapter_t *adapter;
7752 7750 rsm_addr_t addr;
7753 7751 rsm_node_id_t node;
7754 7752 int rval = DDI_SUCCESS;
7755 7753 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7756 7754
7757 7755 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7758 7756
7759 7757 adapter = rsm_getadapter(msg, mode);
7760 7758 if (adapter == NULL) {
7761 7759 DBG_PRINTF((category, RSM_DEBUG,
7762 7760 "rsmaddr_ioctl done: adapter not found\n"));
7763 7761 return (RSMERR_CTLR_NOT_PRESENT);
7764 7762 }
7765 7763
7766 7764 switch (cmd) {
7767 7765 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7768 7766 /* returns the hwaddr in msg->hwaddr */
7769 7767 if (msg->nodeid == my_nodeid) {
7770 7768 msg->hwaddr = adapter->hwaddr;
7771 7769 } else {
7772 7770 addr = get_remote_hwaddr(adapter, msg->nodeid);
7773 7771 if ((int64_t)addr < 0) {
7774 7772 rval = RSMERR_INTERNAL_ERROR;
7775 7773 } else {
7776 7774 msg->hwaddr = addr;
7777 7775 }
7778 7776 }
7779 7777 break;
7780 7778 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7781 7779 /* returns the nodeid in msg->nodeid */
7782 7780 if (msg->hwaddr == adapter->hwaddr) {
7783 7781 msg->nodeid = my_nodeid;
7784 7782 } else {
7785 7783 node = get_remote_nodeid(adapter, msg->hwaddr);
7786 7784 if ((int)node < 0) {
7787 7785 rval = RSMERR_INTERNAL_ERROR;
7788 7786 } else {
7789 7787 msg->nodeid = (rsm_node_id_t)node;
7790 7788 }
7791 7789 }
7792 7790 break;
7793 7791 default:
7794 7792 rval = EINVAL;
7795 7793 break;
7796 7794 }
7797 7795
7798 7796 rsmka_release_adapter(adapter);
7799 7797 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7800 7798 "rsmaddr_ioctl done: %d\n", rval));
7801 7799 return (rval);
7802 7800 }
7803 7801
7804 7802 static int
7805 7803 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7806 7804 {
7807 7805 DBG_DEFINE(category,
7808 7806 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7809 7807
7810 7808 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7811 7809
7812 7810 #ifdef _MULTI_DATAMODEL
7813 7811
7814 7812 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7815 7813 rsm_ioctlmsg32_t msg32;
7816 7814 int i;
7817 7815
7818 7816 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7819 7817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7820 7818 "rsm_ddi_copyin done: EFAULT\n"));
7821 7819 return (RSMERR_BAD_ADDR);
7822 7820 }
7823 7821 msg->len = msg32.len;
7824 7822 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7825 7823 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7826 7824 msg->key = msg32.key;
7827 7825 msg->acl_len = msg32.acl_len;
7828 7826 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7829 7827 msg->cnum = msg32.cnum;
7830 7828 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7831 7829 msg->cname_len = msg32.cname_len;
7832 7830 msg->nodeid = msg32.nodeid;
7833 7831 msg->hwaddr = msg32.hwaddr;
7834 7832 msg->perm = msg32.perm;
7835 7833 for (i = 0; i < 4; i++) {
7836 7834 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7837 7835 }
7838 7836 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7839 7837 "rsm_ddi_copyin done\n"));
7840 7838 return (RSM_SUCCESS);
7841 7839 }
7842 7840 #endif
7843 7841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7844 7842 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7845 7843 return (RSMERR_BAD_ADDR);
7846 7844 else
7847 7845 return (RSM_SUCCESS);
7848 7846 }
7849 7847
7850 7848 static int
7851 7849 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7852 7850 {
7853 7851 rsmka_int_controller_attr_t rsm_cattr;
7854 7852 DBG_DEFINE(category,
7855 7853 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7856 7854
7857 7855 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7858 7856 "rsmattr_ddi_copyout enter\n"));
7859 7857 /*
7860 7858 * need to copy appropriate data from rsm_controller_attr_t
7861 7859 * to rsmka_int_controller_attr_t
7862 7860 */
7863 7861 #ifdef _MULTI_DATAMODEL
7864 7862 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7865 7863 rsmka_int_controller_attr32_t rsm_cattr32;
7866 7864
7867 7865 rsm_cattr32.attr_direct_access_sizes =
7868 7866 adapter->rsm_attr.attr_direct_access_sizes;
7869 7867 rsm_cattr32.attr_atomic_sizes =
7870 7868 adapter->rsm_attr.attr_atomic_sizes;
7871 7869 rsm_cattr32.attr_page_size =
7872 7870 adapter->rsm_attr.attr_page_size;
7873 7871 if (adapter->rsm_attr.attr_max_export_segment_size >
7874 7872 UINT_MAX)
7875 7873 rsm_cattr32.attr_max_export_segment_size =
7876 7874 RSM_MAXSZ_PAGE_ALIGNED;
7877 7875 else
7878 7876 rsm_cattr32.attr_max_export_segment_size =
7879 7877 adapter->rsm_attr.attr_max_export_segment_size;
7880 7878 if (adapter->rsm_attr.attr_tot_export_segment_size >
7881 7879 UINT_MAX)
7882 7880 rsm_cattr32.attr_tot_export_segment_size =
7883 7881 RSM_MAXSZ_PAGE_ALIGNED;
7884 7882 else
7885 7883 rsm_cattr32.attr_tot_export_segment_size =
7886 7884 adapter->rsm_attr.attr_tot_export_segment_size;
7887 7885 if (adapter->rsm_attr.attr_max_export_segments >
7888 7886 UINT_MAX)
7889 7887 rsm_cattr32.attr_max_export_segments =
7890 7888 UINT_MAX;
7891 7889 else
7892 7890 rsm_cattr32.attr_max_export_segments =
7893 7891 adapter->rsm_attr.attr_max_export_segments;
7894 7892 if (adapter->rsm_attr.attr_max_import_map_size >
7895 7893 UINT_MAX)
7896 7894 rsm_cattr32.attr_max_import_map_size =
7897 7895 RSM_MAXSZ_PAGE_ALIGNED;
7898 7896 else
7899 7897 rsm_cattr32.attr_max_import_map_size =
7900 7898 adapter->rsm_attr.attr_max_import_map_size;
7901 7899 if (adapter->rsm_attr.attr_tot_import_map_size >
7902 7900 UINT_MAX)
7903 7901 rsm_cattr32.attr_tot_import_map_size =
7904 7902 RSM_MAXSZ_PAGE_ALIGNED;
7905 7903 else
7906 7904 rsm_cattr32.attr_tot_import_map_size =
7907 7905 adapter->rsm_attr.attr_tot_import_map_size;
7908 7906 if (adapter->rsm_attr.attr_max_import_segments >
7909 7907 UINT_MAX)
7910 7908 rsm_cattr32.attr_max_import_segments =
7911 7909 UINT_MAX;
7912 7910 else
7913 7911 rsm_cattr32.attr_max_import_segments =
7914 7912 adapter->rsm_attr.attr_max_import_segments;
7915 7913 rsm_cattr32.attr_controller_addr =
7916 7914 adapter->rsm_attr.attr_controller_addr;
7917 7915
7918 7916 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7919 7917 "rsmattr_ddi_copyout done\n"));
7920 7918 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7921 7919 sizeof (rsmka_int_controller_attr32_t), mode)) {
7922 7920 return (RSMERR_BAD_ADDR);
7923 7921 }
7924 7922 else
7925 7923 return (RSM_SUCCESS);
7926 7924 }
7927 7925 #endif
7928 7926 rsm_cattr.attr_direct_access_sizes =
7929 7927 adapter->rsm_attr.attr_direct_access_sizes;
7930 7928 rsm_cattr.attr_atomic_sizes =
7931 7929 adapter->rsm_attr.attr_atomic_sizes;
7932 7930 rsm_cattr.attr_page_size =
7933 7931 adapter->rsm_attr.attr_page_size;
7934 7932 rsm_cattr.attr_max_export_segment_size =
7935 7933 adapter->rsm_attr.attr_max_export_segment_size;
7936 7934 rsm_cattr.attr_tot_export_segment_size =
7937 7935 adapter->rsm_attr.attr_tot_export_segment_size;
7938 7936 rsm_cattr.attr_max_export_segments =
7939 7937 adapter->rsm_attr.attr_max_export_segments;
7940 7938 rsm_cattr.attr_max_import_map_size =
7941 7939 adapter->rsm_attr.attr_max_import_map_size;
7942 7940 rsm_cattr.attr_tot_import_map_size =
7943 7941 adapter->rsm_attr.attr_tot_import_map_size;
7944 7942 rsm_cattr.attr_max_import_segments =
7945 7943 adapter->rsm_attr.attr_max_import_segments;
7946 7944 rsm_cattr.attr_controller_addr =
7947 7945 adapter->rsm_attr.attr_controller_addr;
7948 7946 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7949 7947 "rsmattr_ddi_copyout done\n"));
7950 7948 if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7951 7949 sizeof (rsmka_int_controller_attr_t), mode)) {
7952 7950 return (RSMERR_BAD_ADDR);
7953 7951 }
7954 7952 else
7955 7953 return (RSM_SUCCESS);
7956 7954 }
7957 7955
7958 7956 /*ARGSUSED*/
7959 7957 static int
7960 7958 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7961 7959 int *rvalp)
7962 7960 {
7963 7961 rsmseg_t *seg;
7964 7962 rsmresource_t *res;
7965 7963 minor_t rnum;
7966 7964 rsm_ioctlmsg_t msg = {0};
7967 7965 int error;
7968 7966 adapter_t *adapter;
7969 7967 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7970 7968
7971 7969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7972 7970
7973 7971 if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7974 7972 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7975 7973 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7976 7974 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7977 7975 return (error);
7978 7976 }
7979 7977
7980 7978 /* topology cmd does not use the arg common to other cmds */
7981 7979 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7982 7980 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7983 7981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7984 7982 "rsm_ioctl done: %d\n", error));
7985 7983 return (error);
7986 7984 }
7987 7985
7988 7986 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7989 7987 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7990 7988 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7991 7989 "rsm_ioctl done: %d\n", error));
7992 7990 return (error);
7993 7991 }
7994 7992
7995 7993 /*
7996 7994 * try to load arguments
7997 7995 */
7998 7996 if (cmd != RSM_IOCTL_RING_BELL &&
7999 7997 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
8000 7998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8001 7999 "rsm_ioctl done: EFAULT\n"));
8002 8000 return (RSMERR_BAD_ADDR);
8003 8001 }
8004 8002
8005 8003 if (cmd == RSM_IOCTL_ATTR) {
8006 8004 adapter = rsm_getadapter(&msg, mode);
8007 8005 if (adapter == NULL) {
8008 8006 DBG_PRINTF((category, RSM_DEBUG,
8009 8007 "rsm_ioctl done: ENODEV\n"));
8010 8008 return (RSMERR_CTLR_NOT_PRESENT);
8011 8009 }
8012 8010 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8013 8011 rsmka_release_adapter(adapter);
8014 8012 DBG_PRINTF((category, RSM_DEBUG,
8015 8013 "rsm_ioctl:after copyout %d\n", error));
8016 8014 return (error);
8017 8015 }
8018 8016
8019 8017 if (cmd == RSM_IOCTL_BAR_INFO) {
8020 8018 /* Return library off,len of barrier page */
8021 8019 msg.off = barrier_offset;
8022 8020 msg.len = (int)barrier_size;
8023 8021 #ifdef _MULTI_DATAMODEL
8024 8022 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8025 8023 rsm_ioctlmsg32_t msg32;
8026 8024
8027 8025 if (msg.len > UINT_MAX)
8028 8026 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8029 8027 else
8030 8028 msg32.len = (int32_t)msg.len;
8031 8029 msg32.off = (int32_t)msg.off;
8032 8030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8033 8031 "rsm_ioctl done\n"));
8034 8032 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8035 8033 sizeof (msg32), mode))
8036 8034 return (RSMERR_BAD_ADDR);
8037 8035 else
8038 8036 return (RSM_SUCCESS);
8039 8037 }
8040 8038 #endif
8041 8039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8042 8040 "rsm_ioctl done\n"));
8043 8041 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8044 8042 sizeof (msg), mode))
8045 8043 return (RSMERR_BAD_ADDR);
8046 8044 else
8047 8045 return (RSM_SUCCESS);
8048 8046 }
8049 8047
8050 8048 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8051 8049 /* map the nodeid or hwaddr */
8052 8050 error = rsmaddr_ioctl(cmd, &msg, mode);
8053 8051 if (error == RSM_SUCCESS) {
8054 8052 #ifdef _MULTI_DATAMODEL
8055 8053 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8056 8054 rsm_ioctlmsg32_t msg32;
8057 8055
8058 8056 msg32.hwaddr = (uint64_t)msg.hwaddr;
8059 8057 msg32.nodeid = (uint32_t)msg.nodeid;
8060 8058
8061 8059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8062 8060 "rsm_ioctl done\n"));
8063 8061 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8064 8062 sizeof (msg32), mode))
8065 8063 return (RSMERR_BAD_ADDR);
8066 8064 else
8067 8065 return (RSM_SUCCESS);
8068 8066 }
8069 8067 #endif
8070 8068 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8071 8069 "rsm_ioctl done\n"));
8072 8070 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8073 8071 sizeof (msg), mode))
8074 8072 return (RSMERR_BAD_ADDR);
8075 8073 else
8076 8074 return (RSM_SUCCESS);
8077 8075 }
8078 8076 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8079 8077 "rsm_ioctl done: %d\n", error));
8080 8078 return (error);
8081 8079 }
8082 8080
8083 8081 /* Find resource and look it in read mode */
8084 8082 rnum = getminor(dev);
8085 8083 res = rsmresource_lookup(rnum, RSM_NOLOCK);
8086 8084 ASSERT(res != NULL);
8087 8085
8088 8086 /*
8089 8087 * Find command group
8090 8088 */
8091 8089 switch (RSM_IOCTL_CMDGRP(cmd)) {
8092 8090 case RSM_IOCTL_EXPORT_SEG:
8093 8091 /*
8094 8092 * Export list is searched during publish, loopback and
8095 8093 * remote lookup call.
8096 8094 */
8097 8095 seg = rsmresource_seg(res, rnum, credp,
8098 8096 RSM_RESOURCE_EXPORT_SEGMENT);
8099 8097 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8100 8098 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8101 8099 credp);
8102 8100 } else { /* export ioctl on an import/barrier resource */
8103 8101 error = RSMERR_BAD_SEG_HNDL;
8104 8102 }
8105 8103 break;
8106 8104 case RSM_IOCTL_IMPORT_SEG:
8107 8105 /* Import list is searched during remote unmap call. */
8108 8106 seg = rsmresource_seg(res, rnum, credp,
8109 8107 RSM_RESOURCE_IMPORT_SEGMENT);
8110 8108 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8111 8109 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8112 8110 credp);
8113 8111 } else { /* import ioctl on an export/barrier resource */
8114 8112 error = RSMERR_BAD_SEG_HNDL;
8115 8113 }
8116 8114 break;
8117 8115 case RSM_IOCTL_BAR:
8118 8116 if (res != RSMRC_RESERVED &&
8119 8117 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8120 8118 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8121 8119 mode);
8122 8120 } else { /* invalid res value */
8123 8121 error = RSMERR_BAD_SEG_HNDL;
8124 8122 }
8125 8123 break;
8126 8124 case RSM_IOCTL_BELL:
8127 8125 if (res != RSMRC_RESERVED) {
8128 8126 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8129 8127 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8130 8128 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8131 8129 error = importbell_ioctl((rsmseg_t *)res, cmd);
8132 8130 else /* RSM_RESOURCE_BAR */
8133 8131 error = RSMERR_BAD_SEG_HNDL;
8134 8132 } else { /* invalid res value */
8135 8133 error = RSMERR_BAD_SEG_HNDL;
8136 8134 }
8137 8135 break;
8138 8136 default:
8139 8137 error = EINVAL;
8140 8138 }
8141 8139
8142 8140 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8143 8141 error));
8144 8142 return (error);
8145 8143 }
8146 8144
8147 8145
8148 8146 /* **************************** Segment Mapping Operations ********* */
8149 8147 static rsm_mapinfo_t *
8150 8148 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8151 8149 size_t *map_len)
8152 8150 {
8153 8151 rsm_mapinfo_t *p;
8154 8152 /*
8155 8153 * Find the correct mapinfo structure to use during the mapping
8156 8154 * from the seg->s_mapinfo list.
8157 8155 * The seg->s_mapinfo list contains in reverse order the mappings
8158 8156 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8159 8157 * access the correct entry within this list for the mapping
8160 8158 * requested.
8161 8159 *
8162 8160 * The algorithm for selecting a list entry is as follows:
8163 8161 *
8164 8162 * When start_offset of an entry <= off we have found the entry
8165 8163 * we were looking for. Adjust the dev_offset and map_len (needs
8166 8164 * to be PAGESIZE aligned).
8167 8165 */
8168 8166 p = seg->s_mapinfo;
8169 8167 for (; p; p = p->next) {
8170 8168 if (p->start_offset <= off) {
8171 8169 *dev_offset = p->dev_offset + off - p->start_offset;
8172 8170 *map_len = (len > p->individual_len) ?
8173 8171 p->individual_len : ptob(btopr(len));
8174 8172 return (p);
8175 8173 }
8176 8174 p = p->next;
8177 8175 }
8178 8176
8179 8177 return (NULL);
8180 8178 }
8181 8179
8182 8180 static void
8183 8181 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo)
8184 8182 {
8185 8183 rsm_mapinfo_t *p;
8186 8184
8187 8185 while (mapinfo != NULL) {
8188 8186 p = mapinfo;
8189 8187 mapinfo = mapinfo->next;
8190 8188 kmem_free(p, sizeof (*p));
8191 8189 }
8192 8190 }
8193 8191
8194 8192 static int
8195 8193 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8196 8194 size_t len, void **pvtp)
8197 8195 {
8198 8196 rsmcookie_t *p;
8199 8197 rsmresource_t *res;
8200 8198 rsmseg_t *seg;
8201 8199 minor_t rnum;
8202 8200 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8203 8201
8204 8202 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8205 8203
8206 8204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8207 8205 "rsmmap_map: dhp = %x\n", dhp));
8208 8206
8209 8207 flags = flags;
8210 8208
8211 8209 rnum = getminor(dev);
8212 8210 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8213 8211 ASSERT(res != NULL);
8214 8212
8215 8213 seg = (rsmseg_t *)res;
8216 8214
8217 8215 rsmseglock_acquire(seg);
8218 8216
8219 8217 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8220 8218
8221 8219 /*
8222 8220 * Allocate structure and add cookie to segment list
8223 8221 */
8224 8222 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8225 8223
8226 8224 p->c_dhp = dhp;
8227 8225 p->c_off = off;
8228 8226 p->c_len = len;
8229 8227 p->c_next = seg->s_ckl;
8230 8228 seg->s_ckl = p;
8231 8229
8232 8230 *pvtp = (void *)seg;
8233 8231
8234 8232 rsmseglock_release(seg);
8235 8233
8236 8234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8237 8235 return (DDI_SUCCESS);
8238 8236 }
8239 8237
8240 8238 /*
8241 8239 * Page fault handling is done here. The prerequisite mapping setup
8242 8240 * has been done in rsm_devmap with calls to ddi_devmem_setup or
8243 8241 * ddi_umem_setup
8244 8242 */
8245 8243 static int
8246 8244 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8247 8245 uint_t type, uint_t rw)
8248 8246 {
8249 8247 int e;
8250 8248 rsmseg_t *seg = (rsmseg_t *)pvt;
8251 8249 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8252 8250
8253 8251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8254 8252
8255 8253 rsmseglock_acquire(seg);
8256 8254
8257 8255 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8258 8256
8259 8257 while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8260 8258 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8261 8259 DBG_PRINTF((category, RSM_DEBUG,
8262 8260 "rsmmap_access done: cv_wait INTR"));
8263 8261 rsmseglock_release(seg);
8264 8262 return (RSMERR_INTERRUPTED);
8265 8263 }
8266 8264 }
8267 8265
8268 8266 ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8269 8267 seg->s_state == RSM_STATE_ACTIVE);
8270 8268
8271 8269 if (seg->s_state == RSM_STATE_DISCONNECT)
8272 8270 seg->s_flags |= RSM_IMPORT_DUMMY;
8273 8271
8274 8272 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8275 8273 "rsmmap_access: dhp = %x\n", dhp));
8276 8274
8277 8275 rsmseglock_release(seg);
8278 8276
8279 8277 if (e = devmap_load(dhp, offset, len, type, rw)) {
8280 8278 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
|
↓ open down ↓ |
1473 lines elided |
↑ open up ↑ |
8281 8279 }
8282 8280
8283 8281
8284 8282 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8285 8283
8286 8284 return (e);
8287 8285 }
8288 8286
8289 8287 static int
8290 8288 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8291 - void **newpvt)
8289 + void **newpvt)
8292 8290 {
8293 8291 rsmseg_t *seg = (rsmseg_t *)oldpvt;
8294 8292 rsmcookie_t *p, *old;
8295 8293 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8296 8294
8297 8295 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8298 8296
8299 8297 /*
8300 8298 * Same as map, create an entry to hold cookie and add it to
8301 8299 * connect segment list. The oldpvt is a pointer to segment.
8302 8300 * Return segment pointer in newpvt.
8303 8301 */
8304 8302 rsmseglock_acquire(seg);
8305 8303
8306 8304 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8307 8305
8308 8306 /*
8309 8307 * Find old cookie
8310 8308 */
8311 8309 for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8312 8310 if (old->c_dhp == dhp) {
8313 8311 break;
8314 8312 }
8315 8313 }
8316 8314 if (old == NULL) {
8317 8315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8318 8316 "rsmmap_dup done: EINVAL\n"));
8319 8317 rsmseglock_release(seg);
8320 8318 return (EINVAL);
8321 8319 }
8322 8320
8323 8321 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8324 8322
8325 8323 p->c_dhp = new_dhp;
8326 8324 p->c_off = old->c_off;
8327 8325 p->c_len = old->c_len;
8328 8326 p->c_next = seg->s_ckl;
8329 8327 seg->s_ckl = p;
8330 8328
8331 8329 *newpvt = (void *)seg;
|
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
8332 8330
8333 8331 rsmseglock_release(seg);
8334 8332
8335 8333 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8336 8334
8337 8335 return (DDI_SUCCESS);
8338 8336 }
8339 8337
8340 8338 static void
8341 8339 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8342 - devmap_cookie_t new_dhp1, void **pvtp1,
8343 - devmap_cookie_t new_dhp2, void **pvtp2)
8340 + devmap_cookie_t new_dhp1, void **pvtp1,
8341 + devmap_cookie_t new_dhp2, void **pvtp2)
8344 8342 {
8345 8343 /*
8346 8344 * Remove pvtp structure from segment list.
8347 8345 */
8348 8346 rsmseg_t *seg = (rsmseg_t *)pvtp;
8349 8347 int freeflag;
8350 8348
8351 8349 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8352 8350
8353 8351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8354 8352
8355 8353 off = off; len = len;
8356 8354 pvtp1 = pvtp1; pvtp2 = pvtp2;
8357 8355
8358 8356 rsmseglock_acquire(seg);
8359 8357
8360 8358 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8361 8359
8362 8360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8363 8361 "rsmmap_unmap: dhp = %x\n", dhp));
8364 8362 /*
8365 8363 * We can go ahead and remove the dhps even if we are in
8366 8364 * the MAPPING state because the dhps being removed here
8367 8365 * belong to a different mmap and we are holding the segment
8368 8366 * lock.
8369 8367 */
8370 8368 if (new_dhp1 == NULL && new_dhp2 == NULL) {
8371 8369 /* find and remove dhp handle */
8372 8370 rsmcookie_t *tmp, **back = &seg->s_ckl;
8373 8371
8374 8372 while (*back != NULL) {
8375 8373 tmp = *back;
8376 8374 if (tmp->c_dhp == dhp) {
8377 8375 *back = tmp->c_next;
8378 8376 kmem_free(tmp, sizeof (*tmp));
8379 8377 break;
8380 8378 }
8381 8379 back = &tmp->c_next;
8382 8380 }
8383 8381 } else {
8384 8382 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8385 8383 "rsmmap_unmap:parital unmap"
8386 8384 "new_dhp1 %lx, new_dhp2 %lx\n",
8387 8385 (size_t)new_dhp1, (size_t)new_dhp2));
8388 8386 }
8389 8387
8390 8388 /*
8391 8389 * rsmmap_unmap is called for each mapping cookie on the list.
8392 8390 * When the list becomes empty and we are not in the MAPPING
8393 8391 * state then unmap in the rsmpi driver.
8394 8392 */
8395 8393 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8396 8394 (void) rsm_unmap(seg);
8397 8395
8398 8396 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8399 8397 freeflag = 1;
8400 8398 } else {
8401 8399 freeflag = 0;
8402 8400 }
8403 8401
8404 8402 rsmseglock_release(seg);
8405 8403
8406 8404 if (freeflag) {
8407 8405 /* Free the segment structure */
8408 8406 rsmseg_free(seg);
8409 8407 }
8410 8408 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8411 8409
8412 8410 }
8413 8411
8414 8412 static struct devmap_callback_ctl rsmmap_ops = {
8415 8413 DEVMAP_OPS_REV, /* devmap_ops version number */
8416 8414 rsmmap_map, /* devmap_ops map routine */
8417 8415 rsmmap_access, /* devmap_ops access routine */
8418 8416 rsmmap_dup, /* devmap_ops dup routine */
8419 8417 rsmmap_unmap, /* devmap_ops unmap routine */
8420 8418 };
8421 8419
8422 8420 static int
8423 8421 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8424 8422 size_t *maplen, uint_t model /*ARGSUSED*/)
8425 8423 {
8426 8424 struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8427 8425 int err;
8428 8426 uint_t maxprot;
8429 8427 minor_t rnum;
8430 8428 rsmseg_t *seg;
8431 8429 off_t dev_offset;
8432 8430 size_t cur_len;
8433 8431 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8434 8432
8435 8433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8436 8434
8437 8435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8438 8436 "rsm_devmap: off = %lx, len = %lx\n", off, len));
8439 8437 rnum = getminor(dev);
8440 8438 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8441 8439 ASSERT(seg != NULL);
8442 8440
8443 8441 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8444 8442 if ((off == barrier_offset) &&
8445 8443 (len == barrier_size)) {
8446 8444
8447 8445 ASSERT(bar_va != NULL && bar_cookie != NULL);
8448 8446
8449 8447 /*
8450 8448 * The offset argument in devmap_umem_setup represents
8451 8449 * the offset within the kernel memory defined by the
8452 8450 * cookie. We use this offset as barrier_offset.
8453 8451 */
8454 8452 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8455 8453 barrier_offset, len, PROT_USER|PROT_READ,
8456 8454 DEVMAP_DEFAULTS, 0);
8457 8455
8458 8456 if (err != 0) {
8459 8457 DBG_PRINTF((category, RSM_ERR,
8460 8458 "rsm_devmap done: %d\n", err));
8461 8459 return (RSMERR_MAP_FAILED);
8462 8460 }
8463 8461 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8464 8462 "rsm_devmap done: %d\n", err));
8465 8463
8466 8464 *maplen = barrier_size;
8467 8465
8468 8466 return (err);
8469 8467 } else {
8470 8468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8471 8469 "rsm_devmap done: %d\n", err));
8472 8470 return (RSMERR_MAP_FAILED);
8473 8471 }
8474 8472 }
8475 8473
8476 8474 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8477 8475 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8478 8476
8479 8477 /*
8480 8478 * Make sure we still have permission for the map operation.
8481 8479 */
8482 8480 maxprot = PROT_USER;
8483 8481 if (seg->s_mode & RSM_PERM_READ) {
8484 8482 maxprot |= PROT_READ;
8485 8483 }
8486 8484
8487 8485 if (seg->s_mode & RSM_PERM_WRITE) {
8488 8486 maxprot |= PROT_WRITE;
8489 8487 }
8490 8488
8491 8489 /*
8492 8490 * For each devmap call, rsmmap_map is called. This maintains driver
8493 8491 * private information for the mapping. Thus, if there are multiple
8494 8492 * devmap calls there will be multiple rsmmap_map calls and for each
8495 8493 * call, the mapping information will be stored.
8496 8494 * In case of an error during the processing of the devmap call, error
8497 8495 * will be returned. This error return causes the caller of rsm_devmap
8498 8496 * to undo all the mappings by calling rsmmap_unmap for each one.
8499 8497 * rsmmap_unmap will free up the private information for the requested
8500 8498 * mapping.
8501 8499 */
8502 8500 if (seg->s_node != my_nodeid) {
8503 8501 rsm_mapinfo_t *p;
8504 8502
8505 8503 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8506 8504 if (p == NULL) {
8507 8505 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8508 8506 "rsm_devmap: incorrect mapping info\n"));
8509 8507 return (RSMERR_MAP_FAILED);
8510 8508 }
8511 8509 err = devmap_devmem_setup(dhc, p->dip,
8512 8510 callbackops, p->dev_register,
8513 8511 dev_offset, cur_len, maxprot,
8514 8512 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8515 8513
8516 8514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8517 8515 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8518 8516 "off=%lx,len=%lx\n",
8519 8517 p->dip, p->dev_register, dev_offset, off, cur_len));
8520 8518
8521 8519 if (err != 0) {
8522 8520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8523 8521 "rsm_devmap: devmap_devmem_setup failed %d\n",
8524 8522 err));
8525 8523 return (RSMERR_MAP_FAILED);
8526 8524 }
8527 8525 /* cur_len is always an integral multiple pagesize */
8528 8526 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8529 8527 *maplen = cur_len;
8530 8528 return (err);
8531 8529
8532 8530 } else {
8533 8531 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8534 8532 seg->s_cookie, off, len, maxprot,
8535 8533 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8536 8534 if (err != 0) {
8537 8535 DBG_PRINTF((category, RSM_DEBUG,
8538 8536 "rsm_devmap: devmap_umem_setup failed %d\n",
8539 8537 err));
8540 8538 return (RSMERR_MAP_FAILED);
8541 8539 }
8542 8540 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8543 8541 "rsm_devmap: loopback done\n"));
8544 8542
8545 8543 *maplen = ptob(btopr(len));
8546 8544
8547 8545 return (err);
8548 8546 }
8549 8547 }
8550 8548
8551 8549 /*
8552 8550 * We can use the devmap framework for mapping device memory to user space by
8553 8551 * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8554 8552 * processing calls this entry point and devmap_setup is called within this
8555 8553 * function, which eventually calls rsm_devmap
8556 8554 */
8557 8555 static int
8558 8556 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8559 8557 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8560 8558 {
8561 8559 int error = 0;
8562 8560 int old_state;
8563 8561 minor_t rnum;
8564 8562 rsmseg_t *seg, *eseg;
8565 8563 adapter_t *adapter;
8566 8564 rsm_import_share_t *sharedp;
8567 8565 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8568 8566
8569 8567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8570 8568
8571 8569 /*
8572 8570 * find segment
8573 8571 */
8574 8572 rnum = getminor(dev);
8575 8573 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8576 8574
8577 8575 if (seg == NULL) {
8578 8576 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8579 8577 "rsm_segmap done: invalid segment\n"));
8580 8578 return (EINVAL);
8581 8579 }
8582 8580
8583 8581 /*
8584 8582 * the user is trying to map a resource that has not been
8585 8583 * defined yet. The library uses this to map in the
8586 8584 * barrier page.
8587 8585 */
8588 8586 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8589 8587 rsmseglock_release(seg);
8590 8588
8591 8589 /*
8592 8590 * The mapping for the barrier page is identified
8593 8591 * by the special offset barrier_offset
8594 8592 */
8595 8593
8596 8594 if (off == (off_t)barrier_offset ||
8597 8595 len == (off_t)barrier_size) {
8598 8596 if (bar_cookie == NULL || bar_va == NULL) {
8599 8597 DBG_PRINTF((category, RSM_DEBUG,
8600 8598 "rsm_segmap: bar cookie/va is NULL\n"));
8601 8599 return (EINVAL);
8602 8600 }
8603 8601
8604 8602 error = devmap_setup(dev, (offset_t)off, as, addrp,
8605 8603 (size_t)len, prot, maxprot, flags, cred);
8606 8604
8607 8605 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8608 8606 "rsm_segmap done: %d\n", error));
8609 8607 return (error);
8610 8608 } else {
8611 8609 DBG_PRINTF((category, RSM_DEBUG,
8612 8610 "rsm_segmap: bad offset/length\n"));
8613 8611 return (EINVAL);
8614 8612 }
8615 8613 }
8616 8614
8617 8615 /* Make sure you can only map imported segments */
8618 8616 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8619 8617 rsmseglock_release(seg);
8620 8618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8621 8619 "rsm_segmap done: not an import segment\n"));
8622 8620 return (EINVAL);
8623 8621 }
8624 8622 /* check means library is broken */
8625 8623 ASSERT(seg->s_hdr.rsmrc_num == rnum);
8626 8624
8627 8625 /* wait for the segment to become unquiesced */
8628 8626 while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8629 8627 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8630 8628 rsmseglock_release(seg);
8631 8629 DBG_PRINTF((category, RSM_DEBUG,
8632 8630 "rsm_segmap done: cv_wait INTR"));
8633 8631 return (ENODEV);
8634 8632 }
8635 8633 }
8636 8634
8637 8635 /* wait until segment leaves the mapping state */
8638 8636 while (seg->s_state == RSM_STATE_MAPPING)
8639 8637 cv_wait(&seg->s_cv, &seg->s_lock);
8640 8638
8641 8639 /*
8642 8640 * we allow multiple maps of the same segment in the KA
8643 8641 * and it works because we do an rsmpi map of the whole
8644 8642 * segment during the first map and all the device mapping
8645 8643 * information needed in rsm_devmap is in the mapinfo list.
8646 8644 */
8647 8645 if ((seg->s_state != RSM_STATE_CONNECT) &&
8648 8646 (seg->s_state != RSM_STATE_ACTIVE)) {
8649 8647 rsmseglock_release(seg);
8650 8648 DBG_PRINTF((category, RSM_DEBUG,
8651 8649 "rsm_segmap done: segment not connected\n"));
8652 8650 return (ENODEV);
8653 8651 }
8654 8652
8655 8653 /*
8656 8654 * Make sure we are not mapping a larger segment than what's
8657 8655 * exported
8658 8656 */
8659 8657 if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8660 8658 rsmseglock_release(seg);
8661 8659 DBG_PRINTF((category, RSM_DEBUG,
8662 8660 "rsm_segmap done: off+len>seg size\n"));
8663 8661 return (ENXIO);
8664 8662 }
8665 8663
8666 8664 /*
8667 8665 * Make sure we still have permission for the map operation.
8668 8666 */
8669 8667 maxprot = PROT_USER;
8670 8668 if (seg->s_mode & RSM_PERM_READ) {
8671 8669 maxprot |= PROT_READ;
8672 8670 }
8673 8671
8674 8672 if (seg->s_mode & RSM_PERM_WRITE) {
8675 8673 maxprot |= PROT_WRITE;
8676 8674 }
8677 8675
8678 8676 if ((prot & maxprot) != prot) {
8679 8677 /* No permission */
8680 8678 rsmseglock_release(seg);
8681 8679 DBG_PRINTF((category, RSM_DEBUG,
8682 8680 "rsm_segmap done: no permission\n"));
8683 8681 return (EACCES);
8684 8682 }
8685 8683
8686 8684 old_state = seg->s_state;
8687 8685
8688 8686 ASSERT(seg->s_share != NULL);
8689 8687
8690 8688 rsmsharelock_acquire(seg);
8691 8689
8692 8690 sharedp = seg->s_share;
8693 8691
8694 8692 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8695 8693 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8696 8694
8697 8695 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8698 8696 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8699 8697 rsmsharelock_release(seg);
8700 8698 rsmseglock_release(seg);
8701 8699 DBG_PRINTF((category, RSM_DEBUG,
8702 8700 "rsm_segmap done:RSMSI_STATE %d invalid\n",
8703 8701 sharedp->rsmsi_state));
8704 8702 return (ENODEV);
8705 8703 }
8706 8704
8707 8705 /*
8708 8706 * Do the map - since we want importers to share mappings
8709 8707 * we do the rsmpi map for the whole segment
8710 8708 */
8711 8709 if (seg->s_node != my_nodeid) {
8712 8710 uint_t dev_register;
8713 8711 off_t dev_offset;
8714 8712 dev_info_t *dip;
8715 8713 size_t tmp_len;
8716 8714 size_t total_length_mapped = 0;
8717 8715 size_t length_to_map = seg->s_len;
8718 8716 off_t tmp_off = 0;
8719 8717 rsm_mapinfo_t *p;
8720 8718
8721 8719 /*
8722 8720 * length_to_map = seg->s_len is always an integral
8723 8721 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8724 8722 * list is a multiple of PAGESIZE - RSMPI map ensures this
8725 8723 */
8726 8724
8727 8725 adapter = seg->s_adapter;
8728 8726 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8729 8727 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8730 8728
8731 8729 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8732 8730 error = 0;
8733 8731 /* map the whole segment */
8734 8732 while (total_length_mapped < seg->s_len) {
8735 8733 tmp_len = 0;
8736 8734
8737 8735 error = adapter->rsmpi_ops->rsm_map(
8738 8736 seg->s_handle.in, tmp_off,
8739 8737 length_to_map, &tmp_len,
8740 8738 &dip, &dev_register, &dev_offset,
8741 8739 NULL, NULL);
8742 8740
8743 8741 if (error != 0)
8744 8742 break;
8745 8743
8746 8744 /*
8747 8745 * Store the mapping info obtained from rsm_map
8748 8746 */
8749 8747 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8750 8748 p->dev_register = dev_register;
8751 8749 p->dev_offset = dev_offset;
8752 8750 p->dip = dip;
8753 8751 p->individual_len = tmp_len;
8754 8752 p->start_offset = tmp_off;
8755 8753 p->next = sharedp->rsmsi_mapinfo;
8756 8754 sharedp->rsmsi_mapinfo = p;
8757 8755
8758 8756 total_length_mapped += tmp_len;
8759 8757 length_to_map -= tmp_len;
8760 8758 tmp_off += tmp_len;
8761 8759 }
8762 8760 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8763 8761
8764 8762 if (error != RSM_SUCCESS) {
8765 8763 /* Check if this is the the first rsm_map */
8766 8764 if (sharedp->rsmsi_mapinfo != NULL) {
8767 8765 /*
8768 8766 * A single rsm_unmap undoes
8769 8767 * multiple rsm_maps.
8770 8768 */
8771 8769 (void) seg->s_adapter->rsmpi_ops->
8772 8770 rsm_unmap(sharedp->rsmsi_handle);
8773 8771 rsm_free_mapinfo(sharedp->
8774 8772 rsmsi_mapinfo);
8775 8773 }
8776 8774 sharedp->rsmsi_mapinfo = NULL;
8777 8775 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8778 8776 rsmsharelock_release(seg);
8779 8777 rsmseglock_release(seg);
8780 8778 DBG_PRINTF((category, RSM_DEBUG,
8781 8779 "rsm_segmap done: rsmpi map err %d\n",
8782 8780 error));
8783 8781 ASSERT(error != RSMERR_BAD_LENGTH &&
8784 8782 error != RSMERR_BAD_MEM_ALIGNMENT &&
8785 8783 error != RSMERR_BAD_SEG_HNDL);
8786 8784 if (error == RSMERR_UNSUPPORTED_OPERATION)
8787 8785 return (ENOTSUP);
8788 8786 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8789 8787 return (EAGAIN);
8790 8788 else if (error == RSMERR_CONN_ABORTED)
8791 8789 return (ENODEV);
8792 8790 else
8793 8791 return (error);
8794 8792 } else {
8795 8793 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8796 8794 }
8797 8795 } else {
8798 8796 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8799 8797 }
8800 8798
8801 8799 sharedp->rsmsi_mapcnt++;
8802 8800
8803 8801 rsmsharelock_release(seg);
8804 8802
8805 8803 /* move to an intermediate mapping state */
8806 8804 seg->s_state = RSM_STATE_MAPPING;
8807 8805 rsmseglock_release(seg);
8808 8806
8809 8807 error = devmap_setup(dev, (offset_t)off, as, addrp,
8810 8808 len, prot, maxprot, flags, cred);
8811 8809
8812 8810 rsmseglock_acquire(seg);
8813 8811 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8814 8812
8815 8813 if (error == DDI_SUCCESS) {
8816 8814 seg->s_state = RSM_STATE_ACTIVE;
8817 8815 } else {
8818 8816 rsmsharelock_acquire(seg);
8819 8817
8820 8818 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8821 8819
8822 8820 sharedp->rsmsi_mapcnt--;
8823 8821 if (sharedp->rsmsi_mapcnt == 0) {
8824 8822 /* unmap the shared RSMPI mapping */
8825 8823 ASSERT(sharedp->rsmsi_handle != NULL);
8826 8824 (void) adapter->rsmpi_ops->
8827 8825 rsm_unmap(sharedp->rsmsi_handle);
8828 8826 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8829 8827 sharedp->rsmsi_mapinfo = NULL;
8830 8828 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8831 8829 }
8832 8830
8833 8831 rsmsharelock_release(seg);
8834 8832 seg->s_state = old_state;
8835 8833 DBG_PRINTF((category, RSM_ERR,
8836 8834 "rsm: devmap_setup failed %d\n", error));
8837 8835 }
8838 8836 cv_broadcast(&seg->s_cv);
8839 8837 rsmseglock_release(seg);
8840 8838 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8841 8839 error));
8842 8840 return (error);
8843 8841 } else {
8844 8842 /*
8845 8843 * For loopback, the export segment mapping cookie (s_cookie)
8846 8844 * is also used as the s_cookie value for its import segments
8847 8845 * during mapping.
8848 8846 * Note that reference counting for s_cookie of the export
8849 8847 * segment is not required due to the following:
8850 8848 * We never have a case of the export segment being destroyed,
8851 8849 * leaving the import segments with a stale value for the
8852 8850 * s_cookie field, since a force disconnect is done prior to a
8853 8851 * destroy of an export segment. The force disconnect causes
8854 8852 * the s_cookie value to be reset to NULL. Also for the
8855 8853 * rsm_rebind operation, we change the s_cookie value of the
8856 8854 * export segment as well as of all its local (loopback)
8857 8855 * importers.
8858 8856 */
8859 8857 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8860 8858
8861 8859 rsmsharelock_release(seg);
8862 8860 /*
8863 8861 * In order to maintain the lock ordering between the export
8864 8862 * and import segment locks, we need to acquire the export
8865 8863 * segment lock first and only then acquire the import
8866 8864 * segment lock.
8867 8865 * The above is necessary to avoid any deadlock scenarios
8868 8866 * with rsm_rebind which also acquires both the export
8869 8867 * and import segment locks in the above mentioned order.
8870 8868 * Based on code inspection, there seem to be no other
8871 8869 * situations in which both the export and import segment
8872 8870 * locks are acquired either in the same or opposite order
8873 8871 * as mentioned above.
8874 8872 * Thus in order to conform to the above lock order, we
8875 8873 * need to change the state of the import segment to
8876 8874 * RSM_STATE_MAPPING, release the lock. Once this is done we
8877 8875 * can now safely acquire the export segment lock first
8878 8876 * followed by the import segment lock which is as per
8879 8877 * the lock order mentioned above.
8880 8878 */
8881 8879 /* move to an intermediate mapping state */
8882 8880 seg->s_state = RSM_STATE_MAPPING;
8883 8881 rsmseglock_release(seg);
8884 8882
8885 8883 eseg = rsmexport_lookup(seg->s_key);
8886 8884
8887 8885 if (eseg == NULL) {
8888 8886 rsmseglock_acquire(seg);
8889 8887 /*
8890 8888 * Revert to old_state and signal any waiters
8891 8889 * The shared state is not changed
8892 8890 */
8893 8891
8894 8892 seg->s_state = old_state;
8895 8893 cv_broadcast(&seg->s_cv);
8896 8894 rsmseglock_release(seg);
8897 8895 DBG_PRINTF((category, RSM_DEBUG,
8898 8896 "rsm_segmap done: key %d not found\n", seg->s_key));
8899 8897 return (ENODEV);
8900 8898 }
8901 8899
8902 8900 rsmsharelock_acquire(seg);
8903 8901 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8904 8902 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8905 8903
8906 8904 sharedp->rsmsi_mapcnt++;
8907 8905 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8908 8906 rsmsharelock_release(seg);
8909 8907
8910 8908 ASSERT(eseg->s_cookie != NULL);
8911 8909
8912 8910 /*
8913 8911 * It is not required or necessary to acquire the import
8914 8912 * segment lock here to change the value of s_cookie since
8915 8913 * no one will touch the import segment as long as it is
8916 8914 * in the RSM_STATE_MAPPING state.
8917 8915 */
8918 8916 seg->s_cookie = eseg->s_cookie;
8919 8917
8920 8918 rsmseglock_release(eseg);
8921 8919
8922 8920 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8923 8921 prot, maxprot, flags, cred);
8924 8922
8925 8923 rsmseglock_acquire(seg);
8926 8924 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8927 8925 if (error == 0) {
8928 8926 seg->s_state = RSM_STATE_ACTIVE;
8929 8927 } else {
8930 8928 rsmsharelock_acquire(seg);
8931 8929
8932 8930 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8933 8931
8934 8932 sharedp->rsmsi_mapcnt--;
8935 8933 if (sharedp->rsmsi_mapcnt == 0) {
8936 8934 sharedp->rsmsi_mapinfo = NULL;
8937 8935 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8938 8936 }
8939 8937 rsmsharelock_release(seg);
8940 8938 seg->s_state = old_state;
8941 8939 seg->s_cookie = NULL;
8942 8940 }
8943 8941 cv_broadcast(&seg->s_cv);
8944 8942 rsmseglock_release(seg);
8945 8943 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8946 8944 "rsm_segmap done: %d\n", error));
8947 8945 return (error);
8948 8946 }
8949 8947 }
8950 8948
8951 8949 int
8952 8950 rsmka_null_seg_create(
8953 8951 rsm_controller_handle_t argcp,
8954 8952 rsm_memseg_export_handle_t *handle,
8955 8953 size_t size,
8956 8954 uint_t flags,
8957 8955 rsm_memory_local_t *memory,
8958 8956 rsm_resource_callback_t callback,
8959 8957 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8960 8958 {
8961 8959 return (RSM_SUCCESS);
8962 8960 }
8963 8961
8964 8962
8965 8963 int
8966 8964 rsmka_null_seg_destroy(
8967 8965 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
8968 8966 {
8969 8967 return (RSM_SUCCESS);
8970 8968 }
8971 8969
8972 8970
8973 8971 int
8974 8972 rsmka_null_bind(
8975 8973 rsm_memseg_export_handle_t argmemseg,
8976 8974 off_t offset,
8977 8975 rsm_memory_local_t *argmemory,
8978 8976 rsm_resource_callback_t callback,
8979 8977 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8980 8978 {
8981 8979 return (RSM_SUCCESS);
8982 8980 }
8983 8981
8984 8982
8985 8983 int
8986 8984 rsmka_null_unbind(
8987 8985 rsm_memseg_export_handle_t argmemseg,
8988 8986 off_t offset,
8989 8987 size_t length /*ARGSUSED*/)
8990 8988 {
8991 8989 return (DDI_SUCCESS);
8992 8990 }
8993 8991
8994 8992 int
8995 8993 rsmka_null_rebind(
8996 8994 rsm_memseg_export_handle_t argmemseg,
8997 8995 off_t offset,
8998 8996 rsm_memory_local_t *memory,
8999 8997 rsm_resource_callback_t callback,
9000 8998 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9001 8999 {
9002 9000 return (RSM_SUCCESS);
9003 9001 }
9004 9002
9005 9003 int
9006 9004 rsmka_null_publish(
9007 9005 rsm_memseg_export_handle_t argmemseg,
9008 9006 rsm_access_entry_t access_list[],
9009 9007 uint_t access_list_length,
9010 9008 rsm_memseg_id_t segment_id,
9011 9009 rsm_resource_callback_t callback,
9012 9010 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9013 9011 {
9014 9012 return (RSM_SUCCESS);
9015 9013 }
9016 9014
9017 9015
9018 9016 int
9019 9017 rsmka_null_republish(
9020 9018 rsm_memseg_export_handle_t memseg,
9021 9019 rsm_access_entry_t access_list[],
9022 9020 uint_t access_list_length,
9023 9021 rsm_resource_callback_t callback,
9024 9022 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9025 9023 {
9026 9024 return (RSM_SUCCESS);
9027 9025 }
9028 9026
9029 9027 int
9030 9028 rsmka_null_unpublish(
9031 9029 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
9032 9030 {
9033 9031 return (RSM_SUCCESS);
9034 9032 }
9035 9033
9036 9034
9037 9035 void
9038 9036 rsmka_init_loopback()
9039 9037 {
9040 9038 rsm_ops_t *ops = &null_rsmpi_ops;
9041 9039 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9042 9040
9043 9041 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9044 9042 "rsmka_init_loopback enter\n"));
9045 9043
9046 9044 /* initialize null ops vector */
9047 9045 ops->rsm_seg_create = rsmka_null_seg_create;
9048 9046 ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9049 9047 ops->rsm_bind = rsmka_null_bind;
9050 9048 ops->rsm_unbind = rsmka_null_unbind;
9051 9049 ops->rsm_rebind = rsmka_null_rebind;
9052 9050 ops->rsm_publish = rsmka_null_publish;
9053 9051 ops->rsm_unpublish = rsmka_null_unpublish;
9054 9052 ops->rsm_republish = rsmka_null_republish;
9055 9053
9056 9054 /* initialize attributes for loopback adapter */
9057 9055 loopback_attr.attr_name = loopback_str;
9058 9056 loopback_attr.attr_page_size = 0x8; /* 8K */
9059 9057
9060 9058 /* initialize loopback adapter */
9061 9059 loopback_adapter.rsm_attr = loopback_attr;
9062 9060 loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9063 9061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9064 9062 "rsmka_init_loopback done\n"));
9065 9063 }
9066 9064
9067 9065 /* ************** DR functions ********************************** */
9068 9066 static void
9069 9067 rsm_quiesce_exp_seg(rsmresource_t *resp)
9070 9068 {
9071 9069 int recheck_state;
9072 9070 rsmseg_t *segp = (rsmseg_t *)resp;
9073 9071 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9074 9072 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9075 9073
9076 9074 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9077 9075 "%s enter: key=%u\n", function, segp->s_key));
9078 9076
9079 9077 rsmseglock_acquire(segp);
9080 9078 do {
9081 9079 recheck_state = 0;
9082 9080 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9083 9081 (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9084 9082 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9085 9083 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9086 9084 rsmseglock_release(segp);
9087 9085 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9088 9086 "%s done:state =%d\n", function,
9089 9087 segp->s_state));
9090 9088 return;
9091 9089 }
9092 9090
9093 9091 if (segp->s_state == RSM_STATE_NEW) {
9094 9092 segp->s_state = RSM_STATE_NEW_QUIESCED;
9095 9093 rsmseglock_release(segp);
9096 9094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9097 9095 "%s done:state =%d\n", function,
9098 9096 segp->s_state));
9099 9097 return;
9100 9098 }
9101 9099
9102 9100 if (segp->s_state == RSM_STATE_BIND) {
9103 9101 /* unbind */
9104 9102 (void) rsm_unbind_pages(segp);
9105 9103 segp->s_state = RSM_STATE_BIND_QUIESCED;
9106 9104 rsmseglock_release(segp);
9107 9105 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9108 9106 "%s done:state =%d\n", function,
9109 9107 segp->s_state));
9110 9108 return;
9111 9109 }
9112 9110
9113 9111 if (segp->s_state == RSM_STATE_EXPORT) {
9114 9112 /*
9115 9113 * wait for putv/getv to complete if the segp is
9116 9114 * a local memory handle
9117 9115 */
9118 9116 while ((segp->s_state == RSM_STATE_EXPORT) &&
9119 9117 (segp->s_rdmacnt != 0)) {
9120 9118 cv_wait(&segp->s_cv, &segp->s_lock);
9121 9119 }
9122 9120
9123 9121 if (segp->s_state != RSM_STATE_EXPORT) {
9124 9122 /*
9125 9123 * state changed need to see what it
9126 9124 * should be changed to.
9127 9125 */
9128 9126 recheck_state = 1;
9129 9127 continue;
9130 9128 }
9131 9129
9132 9130 segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9133 9131 rsmseglock_release(segp);
9134 9132 /*
9135 9133 * send SUSPEND messages - currently it will be
9136 9134 * done at the end
9137 9135 */
9138 9136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9139 9137 "%s done:state =%d\n", function,
9140 9138 segp->s_state));
9141 9139 return;
9142 9140 }
9143 9141 } while (recheck_state);
9144 9142
9145 9143 rsmseglock_release(segp);
9146 9144 }
9147 9145
9148 9146 static void
9149 9147 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9150 9148 {
9151 9149 int ret;
9152 9150 rsmseg_t *segp = (rsmseg_t *)resp;
9153 9151 rsmapi_access_entry_t *acl;
9154 9152 rsm_access_entry_t *rsmpi_acl;
9155 9153 int acl_len;
9156 9154 int create_flags = 0;
9157 9155 struct buf *xbuf;
9158 9156 rsm_memory_local_t mem;
9159 9157 adapter_t *adapter;
9160 9158 dev_t sdev = 0;
9161 9159 rsm_resource_callback_t callback_flag;
9162 9160 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9163 9161 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9164 9162
9165 9163 rsmseglock_acquire(segp);
9166 9164
9167 9165 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9168 9166 "%s enter: key=%u, state=%d\n", function, segp->s_key,
9169 9167 segp->s_state));
9170 9168
9171 9169 if ((segp->s_state == RSM_STATE_NEW) ||
9172 9170 (segp->s_state == RSM_STATE_BIND) ||
9173 9171 (segp->s_state == RSM_STATE_EXPORT)) {
9174 9172 rsmseglock_release(segp);
9175 9173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9176 9174 function, segp->s_state));
9177 9175 return;
9178 9176 }
9179 9177
9180 9178 if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9181 9179 segp->s_state = RSM_STATE_NEW;
9182 9180 cv_broadcast(&segp->s_cv);
9183 9181 rsmseglock_release(segp);
9184 9182 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9185 9183 function, segp->s_state));
9186 9184 return;
9187 9185 }
9188 9186
9189 9187 if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9190 9188 /* bind the segment */
9191 9189 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9192 9190 segp->s_len, segp->s_proc);
9193 9191 if (ret == RSM_SUCCESS) { /* bind successful */
9194 9192 segp->s_state = RSM_STATE_BIND;
9195 9193 } else { /* bind failed - resource unavailable */
9196 9194 segp->s_state = RSM_STATE_NEW;
9197 9195 }
9198 9196 cv_broadcast(&segp->s_cv);
9199 9197 rsmseglock_release(segp);
9200 9198 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9201 9199 "%s done: bind_qscd bind = %d\n", function, ret));
9202 9200 return;
9203 9201 }
9204 9202
9205 9203 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9206 9204 /* wait for the segment to move to EXPORT_QUIESCED state */
9207 9205 cv_wait(&segp->s_cv, &segp->s_lock);
9208 9206 }
9209 9207
9210 9208 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9211 9209 /* bind the segment */
9212 9210 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9213 9211 segp->s_len, segp->s_proc);
9214 9212
9215 9213 if (ret != RSM_SUCCESS) {
9216 9214 /* bind failed - resource unavailable */
9217 9215 acl_len = segp->s_acl_len;
9218 9216 acl = segp->s_acl;
9219 9217 rsmpi_acl = segp->s_acl_in;
9220 9218 segp->s_acl_len = 0;
9221 9219 segp->s_acl = NULL;
9222 9220 segp->s_acl_in = NULL;
9223 9221 rsmseglock_release(segp);
9224 9222
9225 9223 rsmexport_rm(segp);
9226 9224 rsmacl_free(acl, acl_len);
9227 9225 rsmpiacl_free(rsmpi_acl, acl_len);
9228 9226
9229 9227 rsmseglock_acquire(segp);
9230 9228 segp->s_state = RSM_STATE_NEW;
9231 9229 cv_broadcast(&segp->s_cv);
9232 9230 rsmseglock_release(segp);
9233 9231 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9234 9232 "%s done: exp_qscd bind failed = %d\n",
9235 9233 function, ret));
9236 9234 return;
9237 9235 }
9238 9236 /*
9239 9237 * publish the segment
9240 9238 * if successful
9241 9239 * segp->s_state = RSM_STATE_EXPORT;
9242 9240 * else failed
9243 9241 * segp->s_state = RSM_STATE_BIND;
9244 9242 */
9245 9243
9246 9244 /* check whether it is a local_memory_handle */
9247 9245 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9248 9246 if ((segp->s_acl[0].ae_node == my_nodeid) &&
9249 9247 (segp->s_acl[0].ae_permission == 0)) {
9250 9248 segp->s_state = RSM_STATE_EXPORT;
9251 9249 cv_broadcast(&segp->s_cv);
9252 9250 rsmseglock_release(segp);
9253 9251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9254 9252 "%s done:exp_qscd\n", function));
9255 9253 return;
9256 9254 }
9257 9255 }
9258 9256 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9259 9257 sdev, 0, NULL, DDI_UMEM_SLEEP);
9260 9258 ASSERT(xbuf != NULL);
9261 9259
9262 9260 mem.ms_type = RSM_MEM_BUF;
9263 9261 mem.ms_bp = xbuf;
9264 9262
9265 9263 adapter = segp->s_adapter;
9266 9264
9267 9265 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9268 9266 create_flags = RSM_ALLOW_UNBIND_REBIND;
9269 9267 }
9270 9268
9271 9269 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9272 9270 callback_flag = RSM_RESOURCE_DONTWAIT;
9273 9271 } else {
9274 9272 callback_flag = RSM_RESOURCE_SLEEP;
9275 9273 }
9276 9274
9277 9275 ret = adapter->rsmpi_ops->rsm_seg_create(
9278 9276 adapter->rsmpi_handle, &segp->s_handle.out,
9279 9277 segp->s_len, create_flags, &mem,
9280 9278 callback_flag, NULL);
9281 9279
9282 9280 if (ret != RSM_SUCCESS) {
9283 9281 acl_len = segp->s_acl_len;
9284 9282 acl = segp->s_acl;
9285 9283 rsmpi_acl = segp->s_acl_in;
9286 9284 segp->s_acl_len = 0;
9287 9285 segp->s_acl = NULL;
9288 9286 segp->s_acl_in = NULL;
9289 9287 rsmseglock_release(segp);
9290 9288
9291 9289 rsmexport_rm(segp);
9292 9290 rsmacl_free(acl, acl_len);
9293 9291 rsmpiacl_free(rsmpi_acl, acl_len);
9294 9292
9295 9293 rsmseglock_acquire(segp);
9296 9294 segp->s_state = RSM_STATE_BIND;
9297 9295 cv_broadcast(&segp->s_cv);
9298 9296 rsmseglock_release(segp);
9299 9297 DBG_PRINTF((category, RSM_ERR,
9300 9298 "%s done: exp_qscd create failed = %d\n",
9301 9299 function, ret));
9302 9300 return;
9303 9301 }
9304 9302
9305 9303 ret = adapter->rsmpi_ops->rsm_publish(
9306 9304 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9307 9305 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9308 9306
9309 9307 if (ret != RSM_SUCCESS) {
9310 9308 acl_len = segp->s_acl_len;
9311 9309 acl = segp->s_acl;
9312 9310 rsmpi_acl = segp->s_acl_in;
9313 9311 segp->s_acl_len = 0;
9314 9312 segp->s_acl = NULL;
9315 9313 segp->s_acl_in = NULL;
9316 9314 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9317 9315 rsmseglock_release(segp);
9318 9316
9319 9317 rsmexport_rm(segp);
9320 9318 rsmacl_free(acl, acl_len);
9321 9319 rsmpiacl_free(rsmpi_acl, acl_len);
9322 9320
9323 9321 rsmseglock_acquire(segp);
9324 9322 segp->s_state = RSM_STATE_BIND;
9325 9323 cv_broadcast(&segp->s_cv);
9326 9324 rsmseglock_release(segp);
9327 9325 DBG_PRINTF((category, RSM_ERR,
9328 9326 "%s done: exp_qscd publish failed = %d\n",
9329 9327 function, ret));
9330 9328 return;
9331 9329 }
9332 9330
9333 9331 segp->s_state = RSM_STATE_EXPORT;
9334 9332 cv_broadcast(&segp->s_cv);
9335 9333 rsmseglock_release(segp);
9336 9334 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9337 9335 function));
9338 9336 return;
9339 9337 }
9340 9338
9341 9339 rsmseglock_release(segp);
9342 9340
9343 9341 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9344 9342 }
9345 9343
9346 9344 static void
9347 9345 rsm_quiesce_imp_seg(rsmresource_t *resp)
9348 9346 {
9349 9347 rsmseg_t *segp = (rsmseg_t *)resp;
9350 9348 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9351 9349 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9352 9350
9353 9351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9354 9352 "%s enter: key=%u\n", function, segp->s_key));
9355 9353
9356 9354 rsmseglock_acquire(segp);
9357 9355 segp->s_flags |= RSM_DR_INPROGRESS;
9358 9356
9359 9357 while (segp->s_rdmacnt != 0) {
9360 9358 /* wait for the RDMA to complete */
9361 9359 cv_wait(&segp->s_cv, &segp->s_lock);
9362 9360 }
9363 9361
9364 9362 rsmseglock_release(segp);
9365 9363
9366 9364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9367 9365
9368 9366 }
9369 9367
9370 9368 static void
9371 9369 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9372 9370 {
9373 9371 rsmseg_t *segp = (rsmseg_t *)resp;
9374 9372 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9375 9373 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9376 9374
9377 9375 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9378 9376 "%s enter: key=%u\n", function, segp->s_key));
9379 9377
9380 9378 rsmseglock_acquire(segp);
9381 9379
9382 9380 segp->s_flags &= ~RSM_DR_INPROGRESS;
9383 9381 /* wake up any waiting putv/getv ops */
9384 9382 cv_broadcast(&segp->s_cv);
9385 9383
9386 9384 rsmseglock_release(segp);
9387 9385
9388 9386 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9389 9387
9390 9388
9391 9389 }
9392 9390
9393 9391 static void
9394 9392 rsm_process_exp_seg(rsmresource_t *resp, int event)
9395 9393 {
9396 9394 if (event == RSM_DR_QUIESCE)
9397 9395 rsm_quiesce_exp_seg(resp);
9398 9396 else /* UNQUIESCE */
9399 9397 rsm_unquiesce_exp_seg(resp);
9400 9398 }
9401 9399
9402 9400 static void
9403 9401 rsm_process_imp_seg(rsmresource_t *resp, int event)
9404 9402 {
9405 9403 if (event == RSM_DR_QUIESCE)
9406 9404 rsm_quiesce_imp_seg(resp);
9407 9405 else /* UNQUIESCE */
9408 9406 rsm_unquiesce_imp_seg(resp);
9409 9407 }
9410 9408
9411 9409 static void
9412 9410 rsm_dr_process_local_segments(int event)
9413 9411 {
9414 9412
9415 9413 int i, j;
9416 9414 rsmresource_blk_t *blk;
9417 9415 rsmresource_t *p;
9418 9416 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9419 9417
9420 9418 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9421 9419 "rsm_dr_process_local_segments enter\n"));
9422 9420
9423 9421 /* iterate through the resource structure */
9424 9422
9425 9423 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9426 9424
9427 9425 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9428 9426 blk = rsm_resource.rsmrc_root[i];
9429 9427 if (blk != NULL) {
9430 9428 for (j = 0; j < RSMRC_BLKSZ; j++) {
9431 9429 p = blk->rsmrcblk_blks[j];
9432 9430 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9433 9431 /* valid resource */
9434 9432 if (p->rsmrc_type ==
9435 9433 RSM_RESOURCE_EXPORT_SEGMENT)
9436 9434 rsm_process_exp_seg(p, event);
9437 9435 else if (p->rsmrc_type ==
9438 9436 RSM_RESOURCE_IMPORT_SEGMENT)
9439 9437 rsm_process_imp_seg(p, event);
9440 9438 }
9441 9439 }
9442 9440 }
9443 9441 }
9444 9442
9445 9443 rw_exit(&rsm_resource.rsmrc_lock);
9446 9444
9447 9445 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9448 9446 "rsm_dr_process_local_segments done\n"));
9449 9447 }
9450 9448
9451 9449 /* *************** DR callback functions ************ */
9452 9450 static void
9453 9451 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9454 9452 {
9455 9453 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9456 9454 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9457 9455 "rsm_dr_callback_post_add is a no-op\n"));
9458 9456 /* Noop */
9459 9457 }
9460 9458
9461 9459 static int
9462 9460 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9463 9461 {
9464 9462 int recheck_state = 0;
9465 9463 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9466 9464
9467 9465 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9468 9466 "rsm_dr_callback_pre_del enter\n"));
9469 9467
9470 9468 mutex_enter(&rsm_drv_data.drv_lock);
9471 9469
9472 9470 do {
9473 9471 recheck_state = 0;
9474 9472 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9475 9473 "rsm_dr_callback_pre_del:state=%d\n",
9476 9474 rsm_drv_data.drv_state));
9477 9475
9478 9476 switch (rsm_drv_data.drv_state) {
9479 9477 case RSM_DRV_NEW:
9480 9478 /*
9481 9479 * The state should usually never be RSM_DRV_NEW
9482 9480 * since in this state the callbacks have not yet
9483 9481 * been registered. So, ASSERT.
9484 9482 */
9485 9483 ASSERT(0);
9486 9484 return (0);
9487 9485 case RSM_DRV_REG_PROCESSING:
9488 9486 /*
9489 9487 * The driver is in the process of registering
9490 9488 * with the DR framework. So, wait till the
9491 9489 * registration process is complete.
9492 9490 */
9493 9491 recheck_state = 1;
9494 9492 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9495 9493 break;
9496 9494 case RSM_DRV_UNREG_PROCESSING:
9497 9495 /*
9498 9496 * If the state is RSM_DRV_UNREG_PROCESSING, the
9499 9497 * module is in the process of detaching and
9500 9498 * unregistering the callbacks from the DR
9501 9499 * framework. So, simply return.
9502 9500 */
9503 9501 mutex_exit(&rsm_drv_data.drv_lock);
9504 9502 DBG_PRINTF((category, RSM_DEBUG,
9505 9503 "rsm_dr_callback_pre_del:"
9506 9504 "pre-del on NEW/UNREG\n"));
9507 9505 return (0);
9508 9506 case RSM_DRV_OK:
9509 9507 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9510 9508 break;
9511 9509 case RSM_DRV_PREDEL_STARTED:
9512 9510 /* FALLTHRU */
9513 9511 case RSM_DRV_PREDEL_COMPLETED:
9514 9512 /* FALLTHRU */
9515 9513 case RSM_DRV_POSTDEL_IN_PROGRESS:
9516 9514 recheck_state = 1;
9517 9515 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9518 9516 break;
9519 9517 case RSM_DRV_DR_IN_PROGRESS:
9520 9518 rsm_drv_data.drv_memdel_cnt++;
9521 9519 mutex_exit(&rsm_drv_data.drv_lock);
9522 9520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9523 9521 "rsm_dr_callback_pre_del done\n"));
9524 9522 return (0);
9525 9523 /* break; */
9526 9524 default:
9527 9525 ASSERT(0);
9528 9526 break;
9529 9527 }
9530 9528
9531 9529 } while (recheck_state);
9532 9530
9533 9531 rsm_drv_data.drv_memdel_cnt++;
9534 9532
9535 9533 mutex_exit(&rsm_drv_data.drv_lock);
9536 9534
9537 9535 /* Do all the quiescing stuff here */
9538 9536 DBG_PRINTF((category, RSM_DEBUG,
9539 9537 "rsm_dr_callback_pre_del: quiesce things now\n"));
9540 9538
9541 9539 rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9542 9540
9543 9541 /*
9544 9542 * now that all local segments have been quiesced lets inform
9545 9543 * the importers
9546 9544 */
9547 9545 rsm_send_suspend();
9548 9546
9549 9547 /*
9550 9548 * In response to the suspend message the remote node(s) will process
9551 9549 * the segments and send a suspend_complete message. Till all
9552 9550 * the nodes send the suspend_complete message we wait in the
9553 9551 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9554 9552 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9555 9553 */
9556 9554 mutex_enter(&rsm_drv_data.drv_lock);
9557 9555
9558 9556 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9559 9557 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9560 9558 }
9561 9559
9562 9560 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9563 9561
9564 9562 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9565 9563 cv_broadcast(&rsm_drv_data.drv_cv);
9566 9564
9567 9565 mutex_exit(&rsm_drv_data.drv_lock);
9568 9566
9569 9567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9570 9568 "rsm_dr_callback_pre_del done\n"));
9571 9569
9572 9570 return (0);
9573 9571 }
9574 9572
9575 9573 static void
9576 9574 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9577 9575 {
9578 9576 int recheck_state = 0;
9579 9577 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9580 9578
9581 9579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9582 9580 "rsm_dr_callback_post_del enter\n"));
9583 9581
9584 9582 mutex_enter(&rsm_drv_data.drv_lock);
9585 9583
9586 9584 do {
9587 9585 recheck_state = 0;
9588 9586 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9589 9587 "rsm_dr_callback_post_del:state=%d\n",
9590 9588 rsm_drv_data.drv_state));
9591 9589
9592 9590 switch (rsm_drv_data.drv_state) {
9593 9591 case RSM_DRV_NEW:
9594 9592 /*
9595 9593 * The driver state cannot not be RSM_DRV_NEW
9596 9594 * since in this state the callbacks have not
9597 9595 * yet been registered.
9598 9596 */
9599 9597 ASSERT(0);
9600 9598 return;
9601 9599 case RSM_DRV_REG_PROCESSING:
9602 9600 /*
9603 9601 * The driver is in the process of registering with
9604 9602 * the DR framework. Wait till the registration is
9605 9603 * complete.
9606 9604 */
9607 9605 recheck_state = 1;
9608 9606 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9609 9607 break;
9610 9608 case RSM_DRV_UNREG_PROCESSING:
9611 9609 /*
9612 9610 * RSM_DRV_UNREG_PROCESSING state means the module
9613 9611 * is detaching and unregistering the callbacks
9614 9612 * from the DR framework. So simply return.
9615 9613 */
9616 9614 /* FALLTHRU */
9617 9615 case RSM_DRV_OK:
9618 9616 /*
9619 9617 * RSM_DRV_OK means we missed the pre-del
9620 9618 * corresponding to this post-del coz we had not
9621 9619 * registered yet, so simply return.
9622 9620 */
9623 9621 mutex_exit(&rsm_drv_data.drv_lock);
9624 9622 DBG_PRINTF((category, RSM_DEBUG,
9625 9623 "rsm_dr_callback_post_del:"
9626 9624 "post-del on OK/UNREG\n"));
9627 9625 return;
9628 9626 /* break; */
9629 9627 case RSM_DRV_PREDEL_STARTED:
9630 9628 /* FALLTHRU */
9631 9629 case RSM_DRV_PREDEL_COMPLETED:
9632 9630 /* FALLTHRU */
9633 9631 case RSM_DRV_POSTDEL_IN_PROGRESS:
9634 9632 recheck_state = 1;
9635 9633 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9636 9634 break;
9637 9635 case RSM_DRV_DR_IN_PROGRESS:
9638 9636 rsm_drv_data.drv_memdel_cnt--;
9639 9637 if (rsm_drv_data.drv_memdel_cnt > 0) {
9640 9638 mutex_exit(&rsm_drv_data.drv_lock);
9641 9639 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9642 9640 "rsm_dr_callback_post_del done:\n"));
9643 9641 return;
9644 9642 }
9645 9643 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9646 9644 break;
9647 9645 default:
9648 9646 ASSERT(0);
9649 9647 return;
9650 9648 /* break; */
9651 9649 }
9652 9650 } while (recheck_state);
9653 9651
9654 9652 mutex_exit(&rsm_drv_data.drv_lock);
9655 9653
9656 9654 /* Do all the unquiescing stuff here */
9657 9655 DBG_PRINTF((category, RSM_DEBUG,
9658 9656 "rsm_dr_callback_post_del: unquiesce things now\n"));
9659 9657
9660 9658 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9661 9659
9662 9660 /*
9663 9661 * now that all local segments have been unquiesced lets inform
9664 9662 * the importers
9665 9663 */
9666 9664 rsm_send_resume();
9667 9665
9668 9666 mutex_enter(&rsm_drv_data.drv_lock);
9669 9667
9670 9668 rsm_drv_data.drv_state = RSM_DRV_OK;
9671 9669
9672 9670 cv_broadcast(&rsm_drv_data.drv_cv);
9673 9671
9674 9672 mutex_exit(&rsm_drv_data.drv_lock);
9675 9673
9676 9674 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9677 9675 "rsm_dr_callback_post_del done\n"));
9678 9676
9679 9677 return;
9680 9678
9681 9679 }
|
↓ open down ↓ |
1328 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX