Print this page
Two NLM fixes: use zone_kcred() and plug cl_auth leaks
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/klm/nlm_impl.c
+++ new/usr/src/uts/common/klm/nlm_impl.c
1 1 /*
2 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 3 * Authors: Doug Rabson <dfr@rabson.org>
4 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5 5 *
6 6 * Redistribution and use in source and binary forms, with or without
7 7 * modification, are permitted provided that the following conditions
8 8 * are met:
9 9 * 1. Redistributions of source code must retain the above copyright
10 10 * notice, this list of conditions and the following disclaimer.
11 11 * 2. Redistributions in binary form must reproduce the above copyright
12 12 * notice, this list of conditions and the following disclaimer in the
13 13 * documentation and/or other materials provided with the distribution.
14 14 *
15 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 25 * SUCH DAMAGE.
26 26 */
27 27
28 28 /*
29 29 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
30 30 * Copyright (c) 2012 by Delphix. All rights reserved.
31 31 * Copyright 2017 Joyent, Inc. All rights reserved.
32 32 */
33 33
34 34 /*
35 35 * NFS LockManager, start/stop, support functions, etc.
36 36 * Most of the interesting code is here.
37 37 *
38 38 * Source code derived from FreeBSD nlm_prot_impl.c
39 39 */
40 40
41 41 #include <sys/param.h>
42 42 #include <sys/systm.h>
43 43 #include <sys/thread.h>
44 44 #include <sys/fcntl.h>
45 45 #include <sys/flock.h>
46 46 #include <sys/mount.h>
47 47 #include <sys/priv.h>
48 48 #include <sys/proc.h>
49 49 #include <sys/share.h>
50 50 #include <sys/socket.h>
51 51 #include <sys/syscall.h>
52 52 #include <sys/syslog.h>
53 53 #include <sys/systm.h>
54 54 #include <sys/class.h>
55 55 #include <sys/unistd.h>
56 56 #include <sys/vnode.h>
57 57 #include <sys/vfs.h>
58 58 #include <sys/queue.h>
59 59 #include <sys/bitmap.h>
60 60 #include <sys/sdt.h>
61 61 #include <sys/brand.h>
62 62 #include <netinet/in.h>
63 63
64 64 #include <rpc/rpc.h>
65 65 #include <rpc/xdr.h>
66 66 #include <rpc/pmap_prot.h>
67 67 #include <rpc/pmap_clnt.h>
68 68 #include <rpc/rpcb_prot.h>
69 69
70 70 #include <rpcsvc/nlm_prot.h>
71 71 #include <rpcsvc/sm_inter.h>
72 72 #include <rpcsvc/nsm_addr.h>
73 73
74 74 #include <nfs/nfs.h>
75 75 #include <nfs/nfs_clnt.h>
76 76 #include <nfs/export.h>
77 77 #include <nfs/rnode.h>
78 78 #include <nfs/lm.h>
79 79
80 80 #include "nlm_impl.h"
81 81
82 82 struct nlm_knc {
83 83 struct knetconfig n_knc;
84 84 const char *n_netid;
85 85 };
86 86
87 87 /*
88 88 * Number of attempts NLM tries to obtain RPC binding
89 89 * of local statd.
90 90 */
91 91 #define NLM_NSM_RPCBIND_RETRIES 10
92 92
93 93 /*
94 94 * Timeout (in seconds) NLM waits before making another
95 95 * attempt to obtain RPC binding of local statd.
96 96 */
97 97 #define NLM_NSM_RPCBIND_TIMEOUT 5
98 98
99 99 /*
100 100 * Total number of sysids in NLM sysid bitmap
101 101 */
102 102 #define NLM_BMAP_NITEMS (LM_SYSID_MAX + 1)
103 103
104 104 /*
105 105 * Number of ulong_t words in bitmap that is used
106 106 * for allocation of sysid numbers.
107 107 */
108 108 #define NLM_BMAP_WORDS (NLM_BMAP_NITEMS / BT_NBIPUL)
109 109
110 110 /*
111 111 * Given an integer x, the macro returns
112 112 * -1 if x is negative,
113 113 * 0 if x is zero
114 114 * 1 if x is positive
115 115 */
116 116 #define SIGN(x) (((x) > 0) - ((x) < 0))
117 117
118 118 #define ARRSIZE(arr) (sizeof (arr) / sizeof ((arr)[0]))
119 119 #define NLM_KNCS ARRSIZE(nlm_netconfigs)
120 120
121 121 krwlock_t lm_lck;
122 122
123 123 /*
124 124 * Zero timeout for asynchronous NLM RPC operations
125 125 */
126 126 static const struct timeval nlm_rpctv_zero = { 0, 0 };
127 127
128 128 /*
129 129 * List of all Zone globals nlm_globals instences
130 130 * linked together.
131 131 */
132 132 static struct nlm_globals_list nlm_zones_list; /* (g) */
133 133
134 134 /*
135 135 * NLM kmem caches
136 136 */
137 137 static struct kmem_cache *nlm_hosts_cache = NULL;
138 138 static struct kmem_cache *nlm_vhold_cache = NULL;
139 139
140 140 /*
141 141 * A bitmap for allocation of new sysids.
142 142 * Sysid is a unique number between LM_SYSID
143 143 * and LM_SYSID_MAX. Sysid represents unique remote
144 144 * host that does file locks on the given host.
145 145 */
146 146 static ulong_t nlm_sysid_bmap[NLM_BMAP_WORDS]; /* (g) */
147 147 static int nlm_sysid_nidx; /* (g) */
148 148
149 149 /*
150 150 * RPC service registration for all transports
151 151 */
152 152 static SVC_CALLOUT nlm_svcs[] = {
153 153 { NLM_PROG, 4, 4, nlm_prog_4 }, /* NLM4_VERS */
154 154 { NLM_PROG, 1, 3, nlm_prog_3 } /* NLM_VERS - NLM_VERSX */
155 155 };
156 156
157 157 static SVC_CALLOUT_TABLE nlm_sct = {
158 158 ARRSIZE(nlm_svcs),
159 159 FALSE,
160 160 nlm_svcs
161 161 };
162 162
163 163 /*
164 164 * Static table of all netid/knetconfig network
165 165 * lock manager can work with. nlm_netconfigs table
166 166 * is used when we need to get valid knetconfig by
167 167 * netid and vice versa.
168 168 *
169 169 * Knetconfigs are activated either by the call from
170 170 * user-space lockd daemon (server side) or by taking
171 171 * knetconfig from NFS mountinfo (client side)
172 172 */
173 173 static struct nlm_knc nlm_netconfigs[] = { /* (g) */
174 174 /* UDP */
175 175 {
176 176 { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
177 177 "udp",
178 178 },
179 179 /* TCP */
180 180 {
181 181 { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
182 182 "tcp",
183 183 },
184 184 /* UDP over IPv6 */
185 185 {
186 186 { NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
187 187 "udp6",
188 188 },
189 189 /* TCP over IPv6 */
190 190 {
191 191 { NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
192 192 "tcp6",
193 193 },
194 194 /* ticlts (loopback over UDP) */
195 195 {
196 196 { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
197 197 "ticlts",
198 198 },
199 199 /* ticotsord (loopback over TCP) */
200 200 {
201 201 { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
202 202 "ticotsord",
203 203 },
204 204 };
205 205
206 206 /*
207 207 * NLM functions which can be called by a brand hook.
208 208 */
209 209 void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
210 210 void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
211 211
212 212 /*
213 213 * NLM misc. function
214 214 */
215 215 static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
216 216 static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
217 217 static void nlm_kmem_reclaim(void *);
218 218 static void nlm_pool_shutdown(void);
219 219 static void nlm_suspend_zone(struct nlm_globals *);
220 220 static void nlm_resume_zone(struct nlm_globals *);
221 221
222 222 /*
223 223 * NLM thread functions
224 224 */
225 225 static void nlm_gc(struct nlm_globals *);
226 226 static void nlm_reclaimer(struct nlm_host *);
227 227
228 228 /*
229 229 * NLM NSM functions
230 230 */
231 231 static int nlm_init_local_knc(struct knetconfig *);
232 232 static int nlm_nsm_init_local(struct nlm_nsm *);
233 233 static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
234 234 static void nlm_nsm_fini(struct nlm_nsm *);
235 235 static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
236 236 static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
237 237 static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
238 238 static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
239 239
240 240 /*
241 241 * NLM host functions
242 242 */
243 243 static int nlm_host_ctor(void *, void *, int);
244 244 static void nlm_host_dtor(void *, void *);
245 245 static void nlm_host_destroy(struct nlm_host *);
246 246 static struct nlm_host *nlm_host_create(char *, const char *,
247 247 struct knetconfig *, struct netbuf *);
248 248 static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
249 249 const char *, struct netbuf *, avl_index_t *);
250 250 static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
251 251 static void nlm_host_gc_vholds(struct nlm_host *);
252 252 static bool_t nlm_host_has_srv_locks(struct nlm_host *);
253 253 static bool_t nlm_host_has_cli_locks(struct nlm_host *);
254 254 static bool_t nlm_host_has_locks(struct nlm_host *);
255 255
256 256 /*
257 257 * NLM vhold functions
258 258 */
259 259 static int nlm_vhold_ctor(void *, void *, int);
260 260 static void nlm_vhold_dtor(void *, void *);
261 261 static void nlm_vhold_destroy(struct nlm_host *,
262 262 struct nlm_vhold *);
263 263 static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
264 264 static void nlm_vhold_clean(struct nlm_vhold *, int);
265 265
266 266 /*
267 267 * NLM client/server sleeping locks/share reservation functions
268 268 */
269 269 struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
270 270 struct nlm_vhold *, struct flock64 *);
271 271 static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
272 272 static void nlm_shres_destroy_item(struct nlm_shres *);
273 273 static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
274 274
275 275 /*
276 276 * NLM initialization functions.
277 277 */
278 278 void
279 279 nlm_init(void)
280 280 {
281 281 nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
282 282 sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
283 283 nlm_kmem_reclaim, NULL, NULL, 0);
284 284
285 285 nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
286 286 sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
287 287 NULL, NULL, NULL, 0);
288 288
289 289 nlm_rpc_init();
290 290 TAILQ_INIT(&nlm_zones_list);
291 291
292 292 /* initialize sysids bitmap */
293 293 bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
294 294 nlm_sysid_nidx = 1;
295 295
296 296 /*
297 297 * Reserv the sysid #0, because it's associated
298 298 * with local locks only. Don't let to allocate
299 299 * it for remote locks.
300 300 */
301 301 BT_SET(nlm_sysid_bmap, 0);
302 302 }
303 303
304 304 void
305 305 nlm_globals_register(struct nlm_globals *g)
306 306 {
307 307 rw_enter(&lm_lck, RW_WRITER);
308 308 TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
309 309 rw_exit(&lm_lck);
310 310 }
311 311
312 312 void
313 313 nlm_globals_unregister(struct nlm_globals *g)
314 314 {
315 315 rw_enter(&lm_lck, RW_WRITER);
316 316 TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
317 317 rw_exit(&lm_lck);
318 318 }
319 319
320 320 /* ARGSUSED */
321 321 static void
322 322 nlm_kmem_reclaim(void *cdrarg)
323 323 {
324 324 struct nlm_globals *g;
325 325
326 326 rw_enter(&lm_lck, RW_READER);
327 327 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
328 328 cv_broadcast(&g->nlm_gc_sched_cv);
329 329
330 330 rw_exit(&lm_lck);
331 331 }
332 332
333 333 /*
334 334 * NLM garbage collector thread (GC).
335 335 *
336 336 * NLM GC periodically checks whether there're any host objects
337 337 * that can be cleaned up. It also releases stale vnodes that
338 338 * live on the server side (under protection of vhold objects).
339 339 *
340 340 * NLM host objects are cleaned up from GC thread because
341 341 * operations helping us to determine whether given host has
342 342 * any locks can be quite expensive and it's not good to call
343 343 * them every time the very last reference to the host is dropped.
344 344 * Thus we use "lazy" approach for hosts cleanup.
345 345 *
346 346 * The work of GC is to release stale vnodes on the server side
347 347 * and destroy hosts that haven't any locks and any activity for
348 348 * some time (i.e. idle hosts).
349 349 */
350 350 static void
351 351 nlm_gc(struct nlm_globals *g)
352 352 {
353 353 struct nlm_host *hostp;
354 354 clock_t now, idle_period;
355 355
356 356 idle_period = SEC_TO_TICK(g->cn_idle_tmo);
357 357 mutex_enter(&g->lock);
358 358 for (;;) {
359 359 /*
360 360 * GC thread can be explicitly scheduled from
361 361 * memory reclamation function.
362 362 */
363 363 (void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
364 364 ddi_get_lbolt() + idle_period);
365 365
366 366 /*
367 367 * NLM is shutting down, time to die.
368 368 */
369 369 if (g->run_status == NLM_ST_STOPPING)
370 370 break;
371 371
372 372 now = ddi_get_lbolt();
373 373 DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
374 374 clock_t, now);
375 375
376 376 /*
377 377 * Find all obviously unused vholds and destroy them.
378 378 */
379 379 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
380 380 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
381 381 struct nlm_vhold *nvp;
382 382
383 383 mutex_enter(&hostp->nh_lock);
384 384
385 385 nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
386 386 while (nvp != NULL) {
387 387 struct nlm_vhold *new_nvp;
388 388
389 389 new_nvp = TAILQ_NEXT(nvp, nv_link);
390 390
391 391 /*
392 392 * If these conditions are met, the vhold is
393 393 * obviously unused and we will destroy it. In
394 394 * a case either v_filocks and/or v_shrlocks is
395 395 * non-NULL the vhold might still be unused by
396 396 * the host, but it is expensive to check that.
397 397 * We defer such check until the host is idle.
398 398 * The expensive check is done below without
399 399 * the global lock held.
400 400 */
401 401 if (nvp->nv_refcnt == 0 &&
402 402 nvp->nv_vp->v_filocks == NULL &&
403 403 nvp->nv_vp->v_shrlocks == NULL) {
404 404 nlm_vhold_destroy(hostp, nvp);
405 405 }
406 406
407 407 nvp = new_nvp;
408 408 }
409 409
410 410 mutex_exit(&hostp->nh_lock);
411 411 }
412 412
413 413 /*
414 414 * Handle all hosts that are unused at the moment
415 415 * until we meet one with idle timeout in future.
416 416 */
417 417 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
418 418 bool_t has_locks;
419 419
420 420 if (hostp->nh_idle_timeout > now)
421 421 break;
422 422
423 423 /*
424 424 * Drop global lock while doing expensive work
425 425 * on this host. We'll re-check any conditions
426 426 * that might change after retaking the global
427 427 * lock.
428 428 */
429 429 mutex_exit(&g->lock);
430 430 mutex_enter(&hostp->nh_lock);
431 431
432 432 /*
433 433 * nlm_globals lock was dropped earlier because
434 434 * garbage collecting of vholds and checking whether
435 435 * host has any locks/shares are expensive operations.
436 436 */
437 437 nlm_host_gc_vholds(hostp);
438 438 has_locks = nlm_host_has_locks(hostp);
439 439
440 440 mutex_exit(&hostp->nh_lock);
441 441 mutex_enter(&g->lock);
442 442
443 443 /*
444 444 * While we were doing expensive operations
445 445 * outside of nlm_globals critical section,
446 446 * somebody could take the host and remove it
447 447 * from the idle list. Whether its been
448 448 * reinserted or not, our information about
449 449 * the host is outdated, and we should take no
450 450 * further action.
451 451 */
452 452 if ((hostp->nh_flags & NLM_NH_INIDLE) == 0 ||
453 453 hostp->nh_idle_timeout > now)
454 454 continue;
455 455
456 456 /*
457 457 * If the host has locks we have to renew the
458 458 * host's timeout and put it at the end of LRU
459 459 * list.
460 460 */
461 461 if (has_locks) {
462 462 TAILQ_REMOVE(&g->nlm_idle_hosts,
463 463 hostp, nh_link);
464 464 hostp->nh_idle_timeout = now + idle_period;
465 465 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
466 466 hostp, nh_link);
467 467 continue;
468 468 }
469 469
470 470 /*
471 471 * We're here if all the following conditions hold:
472 472 * 1) Host hasn't any locks or share reservations
473 473 * 2) Host is unused
474 474 * 3) Host wasn't touched by anyone at least for
475 475 * g->cn_idle_tmo seconds.
476 476 *
477 477 * So, now we can destroy it.
478 478 */
479 479 nlm_host_unregister(g, hostp);
480 480 mutex_exit(&g->lock);
481 481
482 482 nlm_host_unmonitor(g, hostp);
483 483 nlm_host_destroy(hostp);
484 484 mutex_enter(&g->lock);
485 485 if (g->run_status == NLM_ST_STOPPING)
486 486 break;
487 487
488 488 }
489 489
490 490 DTRACE_PROBE(gc__end);
491 491 }
492 492
493 493 DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
494 494
495 495 /* Let others know that GC has died */
496 496 g->nlm_gc_thread = NULL;
497 497 mutex_exit(&g->lock);
498 498
499 499 cv_broadcast(&g->nlm_gc_finish_cv);
500 500 zthread_exit();
501 501 }
502 502
503 503 /*
504 504 * Thread reclaim locks/shares acquired by the client side
505 505 * on the given server represented by hostp.
506 506 */
507 507 static void
508 508 nlm_reclaimer(struct nlm_host *hostp)
509 509 {
510 510 struct nlm_globals *g;
511 511
512 512 mutex_enter(&hostp->nh_lock);
513 513 hostp->nh_reclaimer = curthread;
514 514 mutex_exit(&hostp->nh_lock);
515 515
516 516 g = zone_getspecific(nlm_zone_key, curzone);
517 517 nlm_reclaim_client(g, hostp);
518 518
519 519 mutex_enter(&hostp->nh_lock);
520 520 hostp->nh_flags &= ~NLM_NH_RECLAIM;
521 521 hostp->nh_reclaimer = NULL;
522 522 cv_broadcast(&hostp->nh_recl_cv);
523 523 mutex_exit(&hostp->nh_lock);
524 524
525 525 /*
526 526 * Host was explicitly referenced before
527 527 * nlm_reclaim() was called, release it
528 528 * here.
529 529 */
530 530 nlm_host_release(g, hostp);
531 531 zthread_exit();
532 532 }
533 533
534 534 /*
535 535 * Copy a struct netobj. (see xdr.h)
536 536 */
537 537 void
538 538 nlm_copy_netobj(struct netobj *dst, struct netobj *src)
539 539 {
540 540 dst->n_len = src->n_len;
541 541 dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
542 542 bcopy(src->n_bytes, dst->n_bytes, src->n_len);
543 543 }
544 544
545 545 /*
546 546 * An NLM specificw replacement for clnt_call().
547 547 * nlm_clnt_call() is used by all RPC functions generated
548 548 * from nlm_prot.x specification. The function is aware
549 549 * about some pitfalls of NLM RPC procedures and has a logic
550 550 * that handles them properly.
551 551 */
552 552 enum clnt_stat
553 553 nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
554 554 caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
555 555 {
556 556 k_sigset_t oldmask;
557 557 enum clnt_stat stat;
558 558 bool_t sig_blocked = FALSE;
559 559
560 560 /*
561 561 * If NLM RPC procnum is one of the NLM _RES procedures
562 562 * that are used to reply to asynchronous NLM RPC
563 563 * (MSG calls), explicitly set RPC timeout to zero.
564 564 * Client doesn't send a reply to RES procedures, so
565 565 * we don't need to wait anything.
566 566 *
567 567 * NOTE: we ignore NLM4_*_RES procnums because they are
568 568 * equal to NLM_*_RES numbers.
569 569 */
570 570 if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
571 571 wait = nlm_rpctv_zero;
572 572
573 573 /*
574 574 * We need to block signals in case of NLM_CANCEL RPC
575 575 * in order to prevent interruption of network RPC
576 576 * calls.
577 577 */
578 578 if (procnum == NLM_CANCEL) {
579 579 k_sigset_t newmask;
580 580
581 581 sigfillset(&newmask);
582 582 sigreplace(&newmask, &oldmask);
583 583 sig_blocked = TRUE;
584 584 }
585 585
586 586 stat = clnt_call(clnt, procnum, xdr_args,
587 587 argsp, xdr_result, resultp, wait);
588 588
589 589 /*
590 590 * Restore signal mask back if signals were blocked
591 591 */
592 592 if (sig_blocked)
593 593 sigreplace(&oldmask, (k_sigset_t *)NULL);
594 594
595 595 return (stat);
596 596 }
597 597
598 598 /*
599 599 * Suspend NLM client/server in the given zone.
600 600 *
601 601 * During suspend operation we mark those hosts
602 602 * that have any locks with NLM_NH_SUSPEND flags,
603 603 * so that they can be checked later, when resume
604 604 * operation occurs.
605 605 */
606 606 static void
607 607 nlm_suspend_zone(struct nlm_globals *g)
608 608 {
609 609 struct nlm_host *hostp;
610 610 struct nlm_host_list all_hosts;
611 611
612 612 /*
613 613 * Note that while we're doing suspend, GC thread is active
614 614 * and it can destroy some hosts while we're walking through
615 615 * the hosts tree. To prevent that and make suspend logic
616 616 * a bit more simple we put all hosts to local "all_hosts"
617 617 * list and increment reference counter of each host.
618 618 * This guaranties that no hosts will be released while
619 619 * we're doing suspend.
620 620 * NOTE: reference of each host must be dropped during
621 621 * resume operation.
622 622 */
623 623 TAILQ_INIT(&all_hosts);
624 624 mutex_enter(&g->lock);
625 625 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
626 626 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
627 627 /*
628 628 * If host is idle, remove it from idle list and
629 629 * clear idle flag. That is done to prevent GC
630 630 * from touching this host.
631 631 */
632 632 if (hostp->nh_flags & NLM_NH_INIDLE) {
633 633 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
634 634 hostp->nh_flags &= ~NLM_NH_INIDLE;
635 635 }
636 636
637 637 hostp->nh_refs++;
638 638 TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
639 639 }
640 640
641 641 /*
642 642 * Now we can walk through all hosts on the system
643 643 * with zone globals lock released. The fact the
644 644 * we have taken a reference to each host guaranties
645 645 * that no hosts can be destroyed during that process.
646 646 */
647 647 mutex_exit(&g->lock);
648 648 while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
649 649 mutex_enter(&hostp->nh_lock);
650 650 if (nlm_host_has_locks(hostp))
651 651 hostp->nh_flags |= NLM_NH_SUSPEND;
652 652
653 653 mutex_exit(&hostp->nh_lock);
654 654 TAILQ_REMOVE(&all_hosts, hostp, nh_link);
655 655 }
656 656 }
657 657
658 658 /*
659 659 * Resume NLM hosts for the given zone.
660 660 *
661 661 * nlm_resume_zone() is called after hosts were suspended
662 662 * (see nlm_suspend_zone) and its main purpose to check
663 663 * whether remote locks owned by hosts are still in consistent
664 664 * state. If they aren't, resume function tries to reclaim
665 665 * locks (for client side hosts) and clean locks (for
666 666 * server side hosts).
667 667 */
668 668 static void
669 669 nlm_resume_zone(struct nlm_globals *g)
670 670 {
671 671 struct nlm_host *hostp, *h_next;
672 672
673 673 mutex_enter(&g->lock);
674 674 hostp = avl_first(&g->nlm_hosts_tree);
675 675
676 676 /*
677 677 * In nlm_suspend_zone() the reference counter of each
678 678 * host was incremented, so we can safely iterate through
679 679 * all hosts without worrying that any host we touch will
680 680 * be removed at the moment.
681 681 */
682 682 while (hostp != NULL) {
683 683 struct nlm_nsm nsm;
684 684 enum clnt_stat stat;
685 685 int32_t sm_state;
686 686 int error;
687 687 bool_t resume_failed = FALSE;
688 688
689 689 h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
690 690 mutex_exit(&g->lock);
691 691
692 692 DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
693 693
694 694 /*
695 695 * Suspend operation marked that the host doesn't
696 696 * have any locks. Skip it.
697 697 */
698 698 if (!(hostp->nh_flags & NLM_NH_SUSPEND))
699 699 goto cycle_end;
700 700
701 701 error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
702 702 if (error != 0) {
703 703 NLM_ERR("Resume: Failed to contact to NSM of host %s "
704 704 "[error=%d]\n", hostp->nh_name, error);
705 705 resume_failed = TRUE;
706 706 goto cycle_end;
707 707 }
708 708
709 709 stat = nlm_nsm_stat(&nsm, &sm_state);
710 710 if (stat != RPC_SUCCESS) {
711 711 NLM_ERR("Resume: Failed to call SM_STAT operation for "
712 712 "host %s [stat=%d]\n", hostp->nh_name, stat);
713 713 resume_failed = TRUE;
714 714 nlm_nsm_fini(&nsm);
715 715 goto cycle_end;
716 716 }
717 717
718 718 if (sm_state != hostp->nh_state) {
719 719 /*
720 720 * Current SM state of the host isn't equal
721 721 * to the one host had when it was suspended.
722 722 * Probably it was rebooted. Try to reclaim
723 723 * locks if the host has any on its client side.
724 724 * Also try to clean up its server side locks
725 725 * (if the host has any).
726 726 */
727 727 nlm_host_notify_client(hostp, sm_state);
728 728 nlm_host_notify_server(hostp, sm_state);
729 729 }
730 730
731 731 nlm_nsm_fini(&nsm);
732 732
733 733 cycle_end:
734 734 if (resume_failed) {
735 735 /*
736 736 * Resume failed for the given host.
737 737 * Just clean up all resources it owns.
738 738 */
739 739 nlm_host_notify_server(hostp, 0);
740 740 nlm_client_cancel_all(g, hostp);
741 741 }
742 742
743 743 hostp->nh_flags &= ~NLM_NH_SUSPEND;
744 744 nlm_host_release(g, hostp);
745 745 hostp = h_next;
746 746 mutex_enter(&g->lock);
747 747 }
748 748
749 749 mutex_exit(&g->lock);
750 750 }
751 751
752 752 /*
753 753 * NLM functions responsible for operations on NSM handle.
754 754 */
755 755
756 756 /*
757 757 * Initialize knetconfig that is used for communication
758 758 * with local statd via loopback interface.
759 759 */
760 760 static int
761 761 nlm_init_local_knc(struct knetconfig *knc)
762 762 {
763 763 int error;
764 764 vnode_t *vp;
765 765
766 766 bzero(knc, sizeof (*knc));
767 767 error = lookupname("/dev/tcp", UIO_SYSSPACE,
768 768 FOLLOW, NULLVPP, &vp);
769 769 if (error != 0)
770 770 return (error);
771 771
772 772 knc->knc_semantics = NC_TPI_COTS;
773 773 knc->knc_protofmly = NC_INET;
774 774 knc->knc_proto = NC_TCP;
775 775 knc->knc_rdev = vp->v_rdev;
776 776 VN_RELE(vp);
777 777
778 778
779 779 return (0);
780 780 }
781 781
782 782 /*
783 783 * Initialize NSM handle that will be used to talk
784 784 * to local statd via loopback interface.
785 785 */
786 786 static int
787 787 nlm_nsm_init_local(struct nlm_nsm *nsm)
788 788 {
789 789 int error;
790 790 struct knetconfig knc;
791 791 struct sockaddr_in sin;
792 792 struct netbuf nb;
793 793
794 794 error = nlm_init_local_knc(&knc);
795 795 if (error != 0)
796 796 return (error);
797 797
798 798 bzero(&sin, sizeof (sin));
799 799 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
800 800 sin.sin_family = AF_INET;
801 801
802 802 nb.buf = (char *)&sin;
803 803 nb.len = nb.maxlen = sizeof (sin);
804 804
805 805 return (nlm_nsm_init(nsm, &knc, &nb));
806 806 }
807 807
808 808 /*
809 809 * Initialize NSM handle used for talking to statd
810 810 */
811 811 static int
812 812 nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
813 813 {
814 814 enum clnt_stat stat;
815 815 int error, retries;
816 816
817 817 bzero(nsm, sizeof (*nsm));
818 818 nsm->ns_knc = *knc;
819 819 nlm_copy_netbuf(&nsm->ns_addr, nb);
820 820
821 821 /*
822 822 * Try several times to get the port of statd service,
823 823 * If rpcbind_getaddr returns RPC_PROGNOTREGISTERED,
824 824 * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
825 825 * seconds berofore.
826 826 */
827 827 for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
828 828 stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
829 829 SM_VERS, &nsm->ns_addr);
830 830 if (stat != RPC_SUCCESS) {
831 831 if (stat == RPC_PROGNOTREGISTERED) {
832 832 delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
833 833 continue;
834 834 }
835 835 }
836 836
837 837 break;
838 838 }
839 839
840 840 if (stat != RPC_SUCCESS) {
841 841 DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
|
↓ open down ↓ |
841 lines elided |
↑ open up ↑ |
842 842 int, retries);
843 843 error = ENOENT;
844 844 goto error;
845 845 }
846 846
847 847 /*
848 848 * Create an RPC handle that'll be used for communication with local
849 849 * statd using the status monitor protocol.
850 850 */
851 851 error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
852 - 0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle);
852 + 0, NLM_RPC_RETRIES, zone_kcred(), &nsm->ns_handle);
853 853 if (error != 0)
854 854 goto error;
855 855
856 856 /*
857 857 * Create an RPC handle that'll be used for communication with the
858 858 * local statd using the address registration protocol.
859 859 */
860 860 error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
861 - NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle);
861 + NSM_ADDR_V1, 0, NLM_RPC_RETRIES, zone_kcred(),
862 + &nsm->ns_addr_handle);
862 863 if (error != 0)
863 864 goto error;
864 865
865 866 sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL);
866 867 return (0);
867 868
868 869 error:
869 870 kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
870 - if (nsm->ns_handle)
871 + if (nsm->ns_handle) {
872 + ASSERT(nsm->ns_handle->cl_auth != NULL);
873 + auth_destroy(nsm->ns_handle->cl_auth);
871 874 CLNT_DESTROY(nsm->ns_handle);
875 + }
872 876
873 877 return (error);
874 878 }
875 879
876 880 static void
877 881 nlm_nsm_fini(struct nlm_nsm *nsm)
878 882 {
879 883 kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
884 + if (nsm->ns_addr_handle->cl_auth != NULL)
885 + auth_destroy(nsm->ns_addr_handle->cl_auth);
880 886 CLNT_DESTROY(nsm->ns_addr_handle);
881 887 nsm->ns_addr_handle = NULL;
888 + if (nsm->ns_handle->cl_auth != NULL)
889 + auth_destroy(nsm->ns_handle->cl_auth);
882 890 CLNT_DESTROY(nsm->ns_handle);
883 891 nsm->ns_handle = NULL;
884 892 sema_destroy(&nsm->ns_sem);
885 893 }
886 894
887 895 static enum clnt_stat
888 896 nlm_nsm_simu_crash(struct nlm_nsm *nsm)
889 897 {
890 898 enum clnt_stat stat;
891 899
892 900 sema_p(&nsm->ns_sem);
893 901 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
894 902 stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
895 903 sema_v(&nsm->ns_sem);
896 904
897 905 return (stat);
898 906 }
899 907
900 908 static enum clnt_stat
901 909 nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
902 910 {
903 911 struct sm_name args;
904 912 struct sm_stat_res res;
905 913 enum clnt_stat stat;
906 914
907 915 args.mon_name = uts_nodename();
908 916 bzero(&res, sizeof (res));
909 917
910 918 sema_p(&nsm->ns_sem);
911 919 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
912 920 stat = sm_stat_1(&args, &res, nsm->ns_handle);
913 921 sema_v(&nsm->ns_sem);
914 922
915 923 if (stat == RPC_SUCCESS)
916 924 *out_stat = res.state;
917 925
918 926 return (stat);
919 927 }
920 928
921 929 static enum clnt_stat
922 930 nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
923 931 {
924 932 struct mon args;
925 933 struct sm_stat_res res;
926 934 enum clnt_stat stat;
927 935
928 936 bzero(&args, sizeof (args));
929 937 bzero(&res, sizeof (res));
930 938
931 939 args.mon_id.mon_name = hostname;
932 940 args.mon_id.my_id.my_name = uts_nodename();
933 941 args.mon_id.my_id.my_prog = NLM_PROG;
934 942 args.mon_id.my_id.my_vers = NLM_SM;
935 943 args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
936 944 bcopy(&priv, args.priv, sizeof (priv));
937 945
938 946 sema_p(&nsm->ns_sem);
939 947 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
940 948 stat = sm_mon_1(&args, &res, nsm->ns_handle);
941 949 sema_v(&nsm->ns_sem);
942 950
943 951 return (stat);
944 952 }
945 953
946 954 static enum clnt_stat
947 955 nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
948 956 {
949 957 struct mon_id args;
950 958 struct sm_stat res;
951 959 enum clnt_stat stat;
952 960
953 961 bzero(&args, sizeof (args));
954 962 bzero(&res, sizeof (res));
955 963
956 964 args.mon_name = hostname;
957 965 args.my_id.my_name = uts_nodename();
958 966 args.my_id.my_prog = NLM_PROG;
959 967 args.my_id.my_vers = NLM_SM;
960 968 args.my_id.my_proc = NLM_SM_NOTIFY1;
961 969
962 970 sema_p(&nsm->ns_sem);
963 971 nlm_nsm_clnt_init(nsm->ns_handle, nsm);
964 972 stat = sm_unmon_1(&args, &res, nsm->ns_handle);
965 973 sema_v(&nsm->ns_sem);
966 974
967 975 return (stat);
968 976 }
969 977
970 978 static enum clnt_stat
971 979 nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
972 980 {
973 981 struct reg1args args = { 0 };
974 982 struct reg1res res = { 0 };
975 983 enum clnt_stat stat;
976 984
977 985 args.family = family;
978 986 args.name = name;
979 987 args.address = *address;
980 988
981 989 sema_p(&nsm->ns_sem);
982 990 nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
983 991 stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
984 992 sema_v(&nsm->ns_sem);
985 993
986 994 return (stat);
987 995 }
988 996
989 997 /*
990 998 * Get NLM vhold object corresponding to vnode "vp".
991 999 * If no such object was found, create a new one.
992 1000 *
993 1001 * The purpose of this function is to associate vhold
994 1002 * object with given vnode, so that:
995 1003 * 1) vnode is hold (VN_HOLD) while vhold object is alive.
996 1004 * 2) host has a track of all vnodes it touched by lock
997 1005 * or share operations. These vnodes are accessible
998 1006 * via collection of vhold objects.
999 1007 */
1000 1008 struct nlm_vhold *
1001 1009 nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
1002 1010 {
1003 1011 struct nlm_vhold *nvp, *new_nvp = NULL;
1004 1012
1005 1013 mutex_enter(&hostp->nh_lock);
1006 1014 nvp = nlm_vhold_find_locked(hostp, vp);
1007 1015 if (nvp != NULL)
1008 1016 goto out;
1009 1017
1010 1018 /* nlm_vhold wasn't found, then create a new one */
1011 1019 mutex_exit(&hostp->nh_lock);
1012 1020 new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
1013 1021
1014 1022 /*
1015 1023 * Check if another thread has already
1016 1024 * created the same nlm_vhold.
1017 1025 */
1018 1026 mutex_enter(&hostp->nh_lock);
1019 1027 nvp = nlm_vhold_find_locked(hostp, vp);
1020 1028 if (nvp == NULL) {
1021 1029 nvp = new_nvp;
1022 1030 new_nvp = NULL;
1023 1031
1024 1032 TAILQ_INIT(&nvp->nv_slreqs);
1025 1033 nvp->nv_vp = vp;
1026 1034 nvp->nv_refcnt = 1;
1027 1035 VN_HOLD(nvp->nv_vp);
1028 1036
1029 1037 VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
1030 1038 (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
1031 1039 TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
1032 1040 }
1033 1041
1034 1042 out:
1035 1043 mutex_exit(&hostp->nh_lock);
1036 1044 if (new_nvp != NULL)
1037 1045 kmem_cache_free(nlm_vhold_cache, new_nvp);
1038 1046
1039 1047 return (nvp);
1040 1048 }
1041 1049
1042 1050 /*
1043 1051 * Drop a reference to vhold object nvp.
1044 1052 */
1045 1053 void
1046 1054 nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1047 1055 {
1048 1056 if (nvp == NULL)
1049 1057 return;
1050 1058
1051 1059 mutex_enter(&hostp->nh_lock);
1052 1060 ASSERT(nvp->nv_refcnt > 0);
1053 1061 nvp->nv_refcnt--;
1054 1062
1055 1063 /*
1056 1064 * If these conditions are met, the vhold is obviously unused and we
1057 1065 * will destroy it. In a case either v_filocks and/or v_shrlocks is
1058 1066 * non-NULL the vhold might still be unused by the host, but it is
1059 1067 * expensive to check that. We defer such check until the host is
1060 1068 * idle. The expensive check is done in the NLM garbage collector.
1061 1069 */
1062 1070 if (nvp->nv_refcnt == 0 &&
1063 1071 nvp->nv_vp->v_filocks == NULL &&
1064 1072 nvp->nv_vp->v_shrlocks == NULL) {
1065 1073 nlm_vhold_destroy(hostp, nvp);
1066 1074 }
1067 1075
1068 1076 mutex_exit(&hostp->nh_lock);
1069 1077 }
1070 1078
1071 1079 /*
1072 1080 * Clean all locks and share reservations on the
1073 1081 * given vhold object that were acquired by the
1074 1082 * given sysid
1075 1083 */
1076 1084 static void
1077 1085 nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1078 1086 {
1079 1087 cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1080 1088 cleanshares_by_sysid(nvp->nv_vp, sysid);
1081 1089 }
1082 1090
1083 1091 static void
1084 1092 nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1085 1093 {
1086 1094 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1087 1095
1088 1096 ASSERT(nvp->nv_refcnt == 0);
1089 1097 ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1090 1098
1091 1099 VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1092 1100 (mod_hash_key_t)nvp->nv_vp,
1093 1101 (mod_hash_val_t)&nvp) == 0);
1094 1102
1095 1103 TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1096 1104 VN_RELE(nvp->nv_vp);
1097 1105 nvp->nv_vp = NULL;
1098 1106
1099 1107 kmem_cache_free(nlm_vhold_cache, nvp);
1100 1108 }
1101 1109
1102 1110 /*
1103 1111 * Return TRUE if the given vhold is busy.
1104 1112 * Vhold object is considered to be "busy" when
1105 1113 * all the following conditions hold:
1106 1114 * 1) No one uses it at the moment;
1107 1115 * 2) It hasn't any locks;
1108 1116 * 3) It hasn't any share reservations;
1109 1117 */
1110 1118 static bool_t
1111 1119 nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1112 1120 {
1113 1121 vnode_t *vp;
1114 1122 int sysid;
1115 1123
1116 1124 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1117 1125
1118 1126 if (nvp->nv_refcnt > 0)
1119 1127 return (TRUE);
1120 1128
1121 1129 vp = nvp->nv_vp;
1122 1130 sysid = hostp->nh_sysid;
1123 1131 if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1124 1132 shr_has_remote_shares(vp, sysid))
1125 1133 return (TRUE);
1126 1134
1127 1135 return (FALSE);
1128 1136 }
1129 1137
1130 1138 /* ARGSUSED */
1131 1139 static int
1132 1140 nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1133 1141 {
1134 1142 struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1135 1143
1136 1144 bzero(nvp, sizeof (*nvp));
1137 1145 return (0);
1138 1146 }
1139 1147
1140 1148 /* ARGSUSED */
1141 1149 static void
1142 1150 nlm_vhold_dtor(void *datap, void *cdrarg)
1143 1151 {
1144 1152 struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1145 1153
1146 1154 ASSERT(nvp->nv_refcnt == 0);
1147 1155 ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1148 1156 ASSERT(nvp->nv_vp == NULL);
1149 1157 }
1150 1158
1151 1159 struct nlm_vhold *
1152 1160 nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1153 1161 {
1154 1162 struct nlm_vhold *nvp = NULL;
1155 1163
1156 1164 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1157 1165 (void) mod_hash_find(hostp->nh_vholds_by_vp,
1158 1166 (mod_hash_key_t)vp,
1159 1167 (mod_hash_val_t)&nvp);
1160 1168
1161 1169 if (nvp != NULL)
1162 1170 nvp->nv_refcnt++;
1163 1171
1164 1172 return (nvp);
1165 1173 }
1166 1174
1167 1175 /*
1168 1176 * NLM host functions
1169 1177 */
1170 1178 static void
1171 1179 nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1172 1180 {
1173 1181 ASSERT(src->len <= src->maxlen);
1174 1182
1175 1183 dst->maxlen = src->maxlen;
1176 1184 dst->len = src->len;
1177 1185 dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1178 1186 bcopy(src->buf, dst->buf, src->len);
1179 1187 }
1180 1188
1181 1189 /* ARGSUSED */
1182 1190 static int
1183 1191 nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1184 1192 {
1185 1193 struct nlm_host *hostp = (struct nlm_host *)datap;
1186 1194
1187 1195 bzero(hostp, sizeof (*hostp));
1188 1196 return (0);
1189 1197 }
1190 1198
1191 1199 /* ARGSUSED */
1192 1200 static void
1193 1201 nlm_host_dtor(void *datap, void *cdrarg)
1194 1202 {
1195 1203 struct nlm_host *hostp = (struct nlm_host *)datap;
1196 1204 ASSERT(hostp->nh_refs == 0);
1197 1205 }
1198 1206
1199 1207 static void
1200 1208 nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1201 1209 {
1202 1210 ASSERT(hostp->nh_refs == 0);
1203 1211 ASSERT(hostp->nh_flags & NLM_NH_INIDLE);
1204 1212
1205 1213 avl_remove(&g->nlm_hosts_tree, hostp);
1206 1214 VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1207 1215 (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1208 1216 (mod_hash_val_t)&hostp) == 0);
1209 1217 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1210 1218 hostp->nh_flags &= ~NLM_NH_INIDLE;
1211 1219 }
1212 1220
1213 1221 /*
1214 1222 * Free resources used by a host. This is called after the reference
1215 1223 * count has reached zero so it doesn't need to worry about locks.
1216 1224 */
1217 1225 static void
1218 1226 nlm_host_destroy(struct nlm_host *hostp)
1219 1227 {
1220 1228 ASSERT(hostp->nh_name != NULL);
1221 1229 ASSERT(hostp->nh_netid != NULL);
1222 1230 ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1223 1231
1224 1232 strfree(hostp->nh_name);
1225 1233 strfree(hostp->nh_netid);
1226 1234 kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1227 1235
1228 1236 if (hostp->nh_sysid != LM_NOSYSID)
1229 1237 nlm_sysid_free(hostp->nh_sysid);
1230 1238
1231 1239 nlm_rpc_cache_destroy(hostp);
1232 1240
1233 1241 ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1234 1242 mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1235 1243
1236 1244 mutex_destroy(&hostp->nh_lock);
1237 1245 cv_destroy(&hostp->nh_rpcb_cv);
1238 1246 cv_destroy(&hostp->nh_recl_cv);
1239 1247
1240 1248 kmem_cache_free(nlm_hosts_cache, hostp);
1241 1249 }
1242 1250
1243 1251 /*
1244 1252 * Cleanup SERVER-side state after a client restarts,
1245 1253 * or becomes unresponsive, or whatever.
1246 1254 *
1247 1255 * We unlock any active locks owned by the host.
1248 1256 * When rpc.lockd is shutting down,
1249 1257 * this function is called with newstate set to zero
1250 1258 * which allows us to cancel any pending async locks
1251 1259 * and clear the locking state.
1252 1260 *
1253 1261 * When "state" is 0, we don't update host's state,
1254 1262 * but cleanup all remote locks on the host.
1255 1263 * It's useful to call this function for resources
1256 1264 * cleanup.
1257 1265 */
1258 1266 void
1259 1267 nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1260 1268 {
1261 1269 struct nlm_vhold *nvp;
1262 1270 struct nlm_slreq *slr;
1263 1271 struct nlm_slreq_list slreqs2free;
1264 1272
1265 1273 TAILQ_INIT(&slreqs2free);
1266 1274 mutex_enter(&hostp->nh_lock);
1267 1275 if (state != 0)
1268 1276 hostp->nh_state = state;
1269 1277
1270 1278 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1271 1279
1272 1280 /* cleanup sleeping requests at first */
1273 1281 while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1274 1282 TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1275 1283
1276 1284 /*
1277 1285 * Instead of freeing cancelled sleeping request
1278 1286 * here, we add it to the linked list created
1279 1287 * on the stack in order to do all frees outside
1280 1288 * the critical section.
1281 1289 */
1282 1290 TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1283 1291 }
1284 1292
1285 1293 nvp->nv_refcnt++;
1286 1294 mutex_exit(&hostp->nh_lock);
1287 1295
1288 1296 nlm_vhold_clean(nvp, hostp->nh_sysid);
1289 1297
1290 1298 mutex_enter(&hostp->nh_lock);
1291 1299 nvp->nv_refcnt--;
1292 1300 }
1293 1301
1294 1302 mutex_exit(&hostp->nh_lock);
1295 1303 while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1296 1304 TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1297 1305 kmem_free(slr, sizeof (*slr));
1298 1306 }
1299 1307 }
1300 1308
1301 1309 /*
1302 1310 * Cleanup CLIENT-side state after a server restarts,
1303 1311 * or becomes unresponsive, or whatever.
1304 1312 *
1305 1313 * This is called by the local NFS statd when we receive a
1306 1314 * host state change notification. (also nlm_svc_stopping)
1307 1315 *
1308 1316 * Deal with a server restart. If we are stopping the
1309 1317 * NLM service, we'll have newstate == 0, and will just
1310 1318 * cancel all our client-side lock requests. Otherwise,
1311 1319 * start the "recovery" process to reclaim any locks
1312 1320 * we hold on this server.
1313 1321 */
1314 1322 void
1315 1323 nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1316 1324 {
1317 1325 mutex_enter(&hostp->nh_lock);
1318 1326 hostp->nh_state = state;
1319 1327 if (hostp->nh_flags & NLM_NH_RECLAIM) {
1320 1328 /*
1321 1329 * Either host's state is up to date or
1322 1330 * host is already in recovery.
1323 1331 */
1324 1332 mutex_exit(&hostp->nh_lock);
1325 1333 return;
1326 1334 }
1327 1335
1328 1336 hostp->nh_flags |= NLM_NH_RECLAIM;
1329 1337
1330 1338 /*
1331 1339 * Host will be released by the recovery thread,
1332 1340 * thus we need to increment refcount.
1333 1341 */
1334 1342 hostp->nh_refs++;
1335 1343 mutex_exit(&hostp->nh_lock);
1336 1344
1337 1345 (void) zthread_create(NULL, 0, nlm_reclaimer,
1338 1346 hostp, 0, minclsyspri);
1339 1347 }
1340 1348
1341 1349 /*
1342 1350 * The function is called when NLM client detects that
1343 1351 * server has entered in grace period and client needs
1344 1352 * to wait until reclamation process (if any) does
1345 1353 * its job.
1346 1354 */
1347 1355 int
1348 1356 nlm_host_wait_grace(struct nlm_host *hostp)
1349 1357 {
1350 1358 struct nlm_globals *g;
1351 1359 int error = 0;
1352 1360
1353 1361 g = zone_getspecific(nlm_zone_key, curzone);
1354 1362 mutex_enter(&hostp->nh_lock);
1355 1363
1356 1364 do {
1357 1365 int rc;
1358 1366
1359 1367 rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1360 1368 &hostp->nh_lock, ddi_get_lbolt() +
1361 1369 SEC_TO_TICK(g->retrans_tmo));
1362 1370
1363 1371 if (rc == 0) {
1364 1372 error = EINTR;
1365 1373 break;
1366 1374 }
1367 1375 } while (hostp->nh_flags & NLM_NH_RECLAIM);
1368 1376
1369 1377 mutex_exit(&hostp->nh_lock);
1370 1378 return (error);
1371 1379 }
1372 1380
1373 1381 /*
1374 1382 * Create a new NLM host.
1375 1383 *
1376 1384 * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1377 1385 * which needs both a knetconfig and an address when creating
1378 1386 * endpoints. Thus host object stores both knetconfig and
1379 1387 * netid.
1380 1388 */
1381 1389 static struct nlm_host *
1382 1390 nlm_host_create(char *name, const char *netid,
1383 1391 struct knetconfig *knc, struct netbuf *naddr)
1384 1392 {
1385 1393 struct nlm_host *host;
1386 1394
1387 1395 host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1388 1396
1389 1397 mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1390 1398 cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1391 1399 cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1392 1400
1393 1401 host->nh_sysid = LM_NOSYSID;
1394 1402 host->nh_refs = 1;
1395 1403 host->nh_name = strdup(name);
1396 1404 host->nh_netid = strdup(netid);
1397 1405 host->nh_knc = *knc;
1398 1406 nlm_copy_netbuf(&host->nh_addr, naddr);
1399 1407
1400 1408 host->nh_state = 0;
1401 1409 host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1402 1410 host->nh_flags = 0;
1403 1411
1404 1412 host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1405 1413 32, mod_hash_null_valdtor, sizeof (vnode_t));
1406 1414
1407 1415 TAILQ_INIT(&host->nh_vholds_list);
1408 1416 TAILQ_INIT(&host->nh_rpchc);
1409 1417
1410 1418 return (host);
1411 1419 }
1412 1420
1413 1421 /*
1414 1422 * Cancel all client side sleeping locks owned by given host.
1415 1423 */
1416 1424 void
1417 1425 nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1418 1426 {
1419 1427 struct nlm_slock *nslp;
1420 1428
1421 1429 mutex_enter(&g->lock);
1422 1430 TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1423 1431 if (nslp->nsl_host == hostp) {
1424 1432 nslp->nsl_state = NLM_SL_CANCELLED;
1425 1433 cv_broadcast(&nslp->nsl_cond);
1426 1434 }
1427 1435 }
1428 1436
1429 1437 mutex_exit(&g->lock);
1430 1438 }
1431 1439
1432 1440 /*
1433 1441 * Garbage collect stale vhold objects.
1434 1442 *
1435 1443 * In other words check whether vnodes that are
1436 1444 * held by vhold objects still have any locks
1437 1445 * or shares or still in use. If they aren't,
1438 1446 * just destroy them.
1439 1447 */
1440 1448 static void
1441 1449 nlm_host_gc_vholds(struct nlm_host *hostp)
1442 1450 {
1443 1451 struct nlm_vhold *nvp;
1444 1452
1445 1453 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1446 1454
1447 1455 nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1448 1456 while (nvp != NULL) {
1449 1457 struct nlm_vhold *nvp_tmp;
1450 1458
1451 1459 if (nlm_vhold_busy(hostp, nvp)) {
1452 1460 nvp = TAILQ_NEXT(nvp, nv_link);
1453 1461 continue;
1454 1462 }
1455 1463
1456 1464 nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1457 1465 nlm_vhold_destroy(hostp, nvp);
1458 1466 nvp = nvp_tmp;
1459 1467 }
1460 1468 }
1461 1469
1462 1470 /*
1463 1471 * Check whether the given host has any
1464 1472 * server side locks or share reservations.
1465 1473 */
1466 1474 static bool_t
1467 1475 nlm_host_has_srv_locks(struct nlm_host *hostp)
1468 1476 {
1469 1477 /*
1470 1478 * It's cheap and simple: if server has
1471 1479 * any locks/shares there must be vhold
1472 1480 * object storing the affected vnode.
1473 1481 *
1474 1482 * NOTE: We don't need to check sleeping
1475 1483 * locks on the server side, because if
1476 1484 * server side sleeping lock is alive,
1477 1485 * there must be a vhold object corresponding
1478 1486 * to target vnode.
1479 1487 */
1480 1488 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1481 1489 if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1482 1490 return (TRUE);
1483 1491
1484 1492 return (FALSE);
1485 1493 }
1486 1494
1487 1495 /*
1488 1496 * Check whether the given host has any client side
1489 1497 * locks or share reservations.
1490 1498 */
1491 1499 static bool_t
1492 1500 nlm_host_has_cli_locks(struct nlm_host *hostp)
1493 1501 {
1494 1502 ASSERT(MUTEX_HELD(&hostp->nh_lock));
1495 1503
1496 1504 /*
1497 1505 * XXX: It's not the way I'd like to do the check,
1498 1506 * because flk_sysid_has_locks() can be very
1499 1507 * expensive by design. Unfortunatelly it iterates
1500 1508 * through all locks on the system, doesn't matter
1501 1509 * were they made on remote system via NLM or
1502 1510 * on local system via reclock. To understand the
1503 1511 * problem, consider that there're dozens of thousands
1504 1512 * of locks that are made on some ZFS dataset. And there's
1505 1513 * another dataset shared by NFS where NLM client had locks
1506 1514 * some time ago, but doesn't have them now.
1507 1515 * In this case flk_sysid_has_locks() will iterate
1508 1516 * thrught dozens of thousands locks until it returns us
1509 1517 * FALSE.
1510 1518 * Oh, I hope that in shiny future somebody will make
1511 1519 * local lock manager (os/flock.c) better, so that
1512 1520 * it'd be more friedly to remote locks and
1513 1521 * flk_sysid_has_locks() wouldn't be so expensive.
1514 1522 */
1515 1523 if (flk_sysid_has_locks(hostp->nh_sysid |
1516 1524 LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1517 1525 return (TRUE);
1518 1526
1519 1527 /*
1520 1528 * Check whether host has any share reservations
1521 1529 * registered on the client side.
1522 1530 */
1523 1531 if (hostp->nh_shrlist != NULL)
1524 1532 return (TRUE);
1525 1533
1526 1534 return (FALSE);
1527 1535 }
1528 1536
1529 1537 /*
1530 1538 * Determine whether the given host owns any
1531 1539 * locks or share reservations.
1532 1540 */
1533 1541 static bool_t
1534 1542 nlm_host_has_locks(struct nlm_host *hostp)
1535 1543 {
1536 1544 if (nlm_host_has_srv_locks(hostp))
1537 1545 return (TRUE);
1538 1546
1539 1547 return (nlm_host_has_cli_locks(hostp));
1540 1548 }
1541 1549
1542 1550 /*
1543 1551 * This function compares only addresses of two netbufs
1544 1552 * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1545 1553 * Port part of netbuf is ignored.
1546 1554 *
1547 1555 * Return values:
1548 1556 * -1: nb1's address is "smaller" than nb2's
1549 1557 * 0: addresses are equal
1550 1558 * 1: nb1's address is "greater" than nb2's
1551 1559 */
1552 1560 static int
1553 1561 nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1554 1562 {
1555 1563 union nlm_addr {
1556 1564 struct sockaddr sa;
1557 1565 struct sockaddr_in sin;
1558 1566 struct sockaddr_in6 sin6;
1559 1567 } *na1, *na2;
1560 1568 int res;
1561 1569
1562 1570 /* LINTED E_BAD_PTR_CAST_ALIGN */
1563 1571 na1 = (union nlm_addr *)nb1->buf;
1564 1572 /* LINTED E_BAD_PTR_CAST_ALIGN */
1565 1573 na2 = (union nlm_addr *)nb2->buf;
1566 1574
1567 1575 if (na1->sa.sa_family < na2->sa.sa_family)
1568 1576 return (-1);
1569 1577 if (na1->sa.sa_family > na2->sa.sa_family)
1570 1578 return (1);
1571 1579
1572 1580 switch (na1->sa.sa_family) {
1573 1581 case AF_INET:
1574 1582 res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1575 1583 sizeof (na1->sin.sin_addr));
1576 1584 break;
1577 1585 case AF_INET6:
1578 1586 res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1579 1587 sizeof (na1->sin6.sin6_addr));
1580 1588 break;
1581 1589 default:
1582 1590 VERIFY(0);
1583 1591 return (0);
1584 1592 }
1585 1593
1586 1594 return (SIGN(res));
1587 1595 }
1588 1596
1589 1597 /*
1590 1598 * Compare two nlm hosts.
1591 1599 * Return values:
1592 1600 * -1: host1 is "smaller" than host2
1593 1601 * 0: host1 is equal to host2
1594 1602 * 1: host1 is "greater" than host2
1595 1603 */
1596 1604 int
1597 1605 nlm_host_cmp(const void *p1, const void *p2)
1598 1606 {
1599 1607 struct nlm_host *h1 = (struct nlm_host *)p1;
1600 1608 struct nlm_host *h2 = (struct nlm_host *)p2;
1601 1609 int res;
1602 1610
1603 1611 res = strcmp(h1->nh_netid, h2->nh_netid);
1604 1612 if (res != 0)
1605 1613 return (SIGN(res));
1606 1614
1607 1615 res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1608 1616 return (res);
1609 1617 }
1610 1618
1611 1619 /*
1612 1620 * Find the host specified by... (see below)
1613 1621 * If found, increment the ref count.
1614 1622 */
1615 1623 static struct nlm_host *
1616 1624 nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1617 1625 struct netbuf *naddr, avl_index_t *wherep)
1618 1626 {
1619 1627 struct nlm_host *hostp, key;
1620 1628 avl_index_t pos;
1621 1629
1622 1630 ASSERT(MUTEX_HELD(&g->lock));
1623 1631
1624 1632 key.nh_netid = (char *)netid;
1625 1633 key.nh_addr.buf = naddr->buf;
1626 1634 key.nh_addr.len = naddr->len;
1627 1635 key.nh_addr.maxlen = naddr->maxlen;
1628 1636
1629 1637 hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1630 1638
1631 1639 if (hostp != NULL) {
1632 1640 /*
1633 1641 * Host is inuse now. Remove it from idle
1634 1642 * hosts list if needed.
1635 1643 */
1636 1644 if (hostp->nh_flags & NLM_NH_INIDLE) {
1637 1645 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1638 1646 hostp->nh_flags &= ~NLM_NH_INIDLE;
1639 1647 }
1640 1648
1641 1649 hostp->nh_refs++;
1642 1650 }
1643 1651 if (wherep != NULL)
1644 1652 *wherep = pos;
1645 1653
1646 1654 return (hostp);
1647 1655 }
1648 1656
1649 1657 /*
1650 1658 * Find NLM host for the given name and address.
1651 1659 */
1652 1660 struct nlm_host *
1653 1661 nlm_host_find(struct nlm_globals *g, const char *netid,
1654 1662 struct netbuf *addr)
1655 1663 {
1656 1664 struct nlm_host *hostp = NULL;
1657 1665
1658 1666 mutex_enter(&g->lock);
1659 1667 if (g->run_status != NLM_ST_UP)
1660 1668 goto out;
1661 1669
1662 1670 hostp = nlm_host_find_locked(g, netid, addr, NULL);
1663 1671
1664 1672 out:
1665 1673 mutex_exit(&g->lock);
1666 1674 return (hostp);
1667 1675 }
1668 1676
1669 1677
1670 1678 /*
1671 1679 * Find or create an NLM host for the given name and address.
1672 1680 *
1673 1681 * The remote host is determined by all of: name, netid, address.
1674 1682 * Note that the netid is whatever nlm_svc_add_ep() gave to
1675 1683 * svc_tli_kcreate() for the service binding. If any of these
1676 1684 * are different, allocate a new host (new sysid).
1677 1685 */
1678 1686 struct nlm_host *
1679 1687 nlm_host_findcreate(struct nlm_globals *g, char *name,
1680 1688 const char *netid, struct netbuf *addr)
1681 1689 {
1682 1690 int err;
1683 1691 struct nlm_host *host, *newhost = NULL;
1684 1692 struct knetconfig knc;
1685 1693 avl_index_t where;
1686 1694
1687 1695 mutex_enter(&g->lock);
1688 1696 if (g->run_status != NLM_ST_UP) {
1689 1697 mutex_exit(&g->lock);
1690 1698 return (NULL);
1691 1699 }
1692 1700
1693 1701 host = nlm_host_find_locked(g, netid, addr, NULL);
1694 1702 mutex_exit(&g->lock);
1695 1703 if (host != NULL)
1696 1704 return (host);
1697 1705
1698 1706 err = nlm_knc_from_netid(netid, &knc);
1699 1707 if (err != 0)
1700 1708 return (NULL);
1701 1709 /*
1702 1710 * Do allocations (etc.) outside of mutex,
1703 1711 * and then check again before inserting.
1704 1712 */
1705 1713 newhost = nlm_host_create(name, netid, &knc, addr);
1706 1714 newhost->nh_sysid = nlm_sysid_alloc();
1707 1715 if (newhost->nh_sysid == LM_NOSYSID)
1708 1716 goto out;
1709 1717
1710 1718 mutex_enter(&g->lock);
1711 1719 host = nlm_host_find_locked(g, netid, addr, &where);
1712 1720 if (host == NULL) {
1713 1721 host = newhost;
1714 1722 newhost = NULL;
1715 1723
1716 1724 /*
1717 1725 * Insert host to the hosts AVL tree that is
1718 1726 * used to lookup by <netid, address> pair.
1719 1727 */
1720 1728 avl_insert(&g->nlm_hosts_tree, host, where);
1721 1729
1722 1730 /*
1723 1731 * Insert host to the hosts hash table that is
1724 1732 * used to lookup host by sysid.
1725 1733 */
1726 1734 VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1727 1735 (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1728 1736 (mod_hash_val_t)host) == 0);
1729 1737 }
1730 1738
1731 1739 mutex_exit(&g->lock);
1732 1740
1733 1741 out:
1734 1742 if (newhost != NULL) {
1735 1743 /*
1736 1744 * We do not need the preallocated nlm_host
1737 1745 * so decrement the reference counter
1738 1746 * and destroy it.
1739 1747 */
1740 1748 newhost->nh_refs--;
1741 1749 nlm_host_destroy(newhost);
1742 1750 }
1743 1751
1744 1752 return (host);
1745 1753 }
1746 1754
1747 1755 /*
1748 1756 * Find the NLM host that matches the value of 'sysid'.
1749 1757 * If found, return it with a new ref,
1750 1758 * else return NULL.
1751 1759 */
1752 1760 struct nlm_host *
1753 1761 nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1754 1762 {
1755 1763 struct nlm_host *hostp = NULL;
1756 1764
1757 1765 mutex_enter(&g->lock);
1758 1766 if (g->run_status != NLM_ST_UP)
1759 1767 goto out;
1760 1768
1761 1769 (void) mod_hash_find(g->nlm_hosts_hash,
1762 1770 (mod_hash_key_t)(uintptr_t)sysid,
1763 1771 (mod_hash_val_t)&hostp);
1764 1772
1765 1773 if (hostp == NULL)
1766 1774 goto out;
1767 1775
1768 1776 /*
1769 1777 * Host is inuse now. Remove it
1770 1778 * from idle hosts list if needed.
1771 1779 */
1772 1780 if (hostp->nh_flags & NLM_NH_INIDLE) {
1773 1781 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1774 1782 hostp->nh_flags &= ~NLM_NH_INIDLE;
1775 1783 }
1776 1784
1777 1785 hostp->nh_refs++;
1778 1786
1779 1787 out:
1780 1788 mutex_exit(&g->lock);
1781 1789 return (hostp);
1782 1790 }
1783 1791
1784 1792 /*
1785 1793 * Release the given host.
1786 1794 * I.e. drop a reference that was taken earlier by one of
1787 1795 * the following functions: nlm_host_findcreate(), nlm_host_find(),
1788 1796 * nlm_host_find_by_sysid().
1789 1797 *
1790 1798 * When the very last reference is dropped, host is moved to
1791 1799 * so-called "idle state". All hosts that are in idle state
1792 1800 * have an idle timeout. If timeout is expired, GC thread
1793 1801 * checks whether hosts have any locks and if they heven't
1794 1802 * any, it removes them.
1795 1803 * NOTE: only unused hosts can be in idle state.
1796 1804 */
1797 1805 static void
1798 1806 nlm_host_release_locked(struct nlm_globals *g, struct nlm_host *hostp)
1799 1807 {
1800 1808 if (hostp == NULL)
1801 1809 return;
1802 1810
1803 1811 ASSERT(MUTEX_HELD(&g->lock));
1804 1812 ASSERT(hostp->nh_refs > 0);
1805 1813
1806 1814 hostp->nh_refs--;
1807 1815 if (hostp->nh_refs != 0)
1808 1816 return;
1809 1817
1810 1818 /*
1811 1819 * The very last reference to the host was dropped,
1812 1820 * thus host is unused now. Set its idle timeout
1813 1821 * and move it to the idle hosts LRU list.
1814 1822 */
1815 1823 hostp->nh_idle_timeout = ddi_get_lbolt() +
1816 1824 SEC_TO_TICK(g->cn_idle_tmo);
1817 1825
1818 1826 ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1819 1827 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1820 1828 hostp->nh_flags |= NLM_NH_INIDLE;
1821 1829 }
1822 1830
1823 1831 void
1824 1832 nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1825 1833 {
1826 1834 if (hostp == NULL)
1827 1835 return;
1828 1836
1829 1837 mutex_enter(&g->lock);
1830 1838 nlm_host_release_locked(g, hostp);
1831 1839 mutex_exit(&g->lock);
1832 1840 }
1833 1841
1834 1842 /*
1835 1843 * Unregister this NLM host (NFS client) with the local statd
1836 1844 * due to idleness (no locks held for a while).
1837 1845 */
1838 1846 void
1839 1847 nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1840 1848 {
1841 1849 enum clnt_stat stat;
1842 1850
1843 1851 VERIFY(host->nh_refs == 0);
1844 1852 if (!(host->nh_flags & NLM_NH_MONITORED))
1845 1853 return;
1846 1854
1847 1855 host->nh_flags &= ~NLM_NH_MONITORED;
1848 1856
1849 1857 if (ZONE_IS_BRANDED(curzone) && ZBROP(curzone)->b_rpc_statd != NULL) {
1850 1858 ZBROP(curzone)->b_rpc_statd(SM_UNMON, g, host);
1851 1859 return;
1852 1860 }
1853 1861
1854 1862 stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1855 1863 if (stat != RPC_SUCCESS) {
1856 1864 NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1857 1865 return;
1858 1866 }
1859 1867 }
1860 1868
1861 1869 /*
1862 1870 * Ask the local NFS statd to begin monitoring this host.
1863 1871 * It will call us back when that host restarts, using the
1864 1872 * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1865 1873 * which is handled in nlm_do_notify1().
1866 1874 */
1867 1875 void
1868 1876 nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1869 1877 {
1870 1878 int family;
1871 1879 netobj obj;
1872 1880 enum clnt_stat stat;
1873 1881
1874 1882 if (state != 0 && host->nh_state == 0) {
1875 1883 /*
1876 1884 * This is the first time we have seen an NSM state
1877 1885 * Value for this host. We record it here to help
1878 1886 * detect host reboots.
1879 1887 */
1880 1888 host->nh_state = state;
1881 1889 }
1882 1890
1883 1891 mutex_enter(&host->nh_lock);
1884 1892 if (host->nh_flags & NLM_NH_MONITORED) {
1885 1893 mutex_exit(&host->nh_lock);
1886 1894 return;
1887 1895 }
1888 1896
1889 1897 host->nh_flags |= NLM_NH_MONITORED;
1890 1898 mutex_exit(&host->nh_lock);
1891 1899
1892 1900 if (ZONE_IS_BRANDED(curzone) && ZBROP(curzone)->b_rpc_statd != NULL) {
1893 1901 ZBROP(curzone)->b_rpc_statd(SM_MON, g, host);
1894 1902 return;
1895 1903 }
1896 1904
1897 1905 /*
1898 1906 * Before we begin monitoring the host register the network address
1899 1907 * associated with this hostname.
1900 1908 */
1901 1909 nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1902 1910 stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1903 1911 if (stat != RPC_SUCCESS) {
1904 1912 NLM_WARN("Failed to register address, stat=%d\n", stat);
1905 1913 mutex_enter(&g->lock);
1906 1914 host->nh_flags &= ~NLM_NH_MONITORED;
1907 1915 mutex_exit(&g->lock);
1908 1916
1909 1917 return;
1910 1918 }
1911 1919
1912 1920 /*
1913 1921 * Tell statd how to call us with status updates for
1914 1922 * this host. Updates arrive via nlm_do_notify1().
1915 1923 *
1916 1924 * We put our assigned system ID value in the priv field to
1917 1925 * make it simpler to find the host if we are notified of a
1918 1926 * host restart.
1919 1927 */
1920 1928 stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1921 1929 if (stat != RPC_SUCCESS) {
1922 1930 NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1923 1931 mutex_enter(&g->lock);
1924 1932 host->nh_flags &= ~NLM_NH_MONITORED;
1925 1933 mutex_exit(&g->lock);
1926 1934
1927 1935 return;
1928 1936 }
1929 1937 }
1930 1938
1931 1939 int
1932 1940 nlm_host_get_state(struct nlm_host *hostp)
1933 1941 {
1934 1942
1935 1943 return (hostp->nh_state);
1936 1944 }
1937 1945
1938 1946 /*
1939 1947 * NLM client/server sleeping locks
1940 1948 */
1941 1949
1942 1950 /*
1943 1951 * Register client side sleeping lock.
1944 1952 *
1945 1953 * Our client code calls this to keep information
1946 1954 * about sleeping lock somewhere. When it receives
1947 1955 * grant callback from server or when it just
1948 1956 * needs to remove all sleeping locks from vnode,
1949 1957 * it uses this information for remove/apply lock
1950 1958 * properly.
1951 1959 */
1952 1960 struct nlm_slock *
1953 1961 nlm_slock_register(
1954 1962 struct nlm_globals *g,
1955 1963 struct nlm_host *host,
1956 1964 struct nlm4_lock *lock,
1957 1965 struct vnode *vp)
1958 1966 {
1959 1967 struct nlm_slock *nslp;
1960 1968
1961 1969 nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1962 1970 cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1963 1971 nslp->nsl_lock = *lock;
1964 1972 nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1965 1973 nslp->nsl_state = NLM_SL_BLOCKED;
1966 1974 nslp->nsl_host = host;
1967 1975 nslp->nsl_vp = vp;
1968 1976
1969 1977 mutex_enter(&g->lock);
1970 1978 TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1971 1979 mutex_exit(&g->lock);
1972 1980
1973 1981 return (nslp);
1974 1982 }
1975 1983
1976 1984 /*
1977 1985 * Remove this lock from the wait list and destroy it.
1978 1986 */
1979 1987 void
1980 1988 nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1981 1989 {
1982 1990 mutex_enter(&g->lock);
1983 1991 TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1984 1992 mutex_exit(&g->lock);
1985 1993
1986 1994 kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1987 1995 cv_destroy(&nslp->nsl_cond);
1988 1996 kmem_free(nslp, sizeof (*nslp));
1989 1997 }
1990 1998
1991 1999 /*
1992 2000 * Wait for a granted callback or cancellation event
1993 2001 * for a sleeping lock.
1994 2002 *
1995 2003 * If a signal interrupted the wait or if the lock
1996 2004 * was cancelled, return EINTR - the caller must arrange to send
1997 2005 * a cancellation to the server.
1998 2006 *
1999 2007 * If timeout occurred, return ETIMEDOUT - the caller must
2000 2008 * resend the lock request to the server.
2001 2009 *
2002 2010 * On success return 0.
2003 2011 */
2004 2012 int
2005 2013 nlm_slock_wait(struct nlm_globals *g,
2006 2014 struct nlm_slock *nslp, uint_t timeo_secs)
2007 2015 {
2008 2016 clock_t timeo_ticks;
2009 2017 int cv_res, error;
2010 2018
2011 2019 /*
2012 2020 * If the granted message arrived before we got here,
2013 2021 * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
2014 2022 */
2015 2023 cv_res = 1;
2016 2024 timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
2017 2025
2018 2026 mutex_enter(&g->lock);
2019 2027 while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
2020 2028 cv_res = cv_timedwait_sig(&nslp->nsl_cond,
2021 2029 &g->lock, timeo_ticks);
2022 2030 }
2023 2031
2024 2032 /*
2025 2033 * No matter why we wake up, if the lock was
2026 2034 * cancelled, let the function caller to know
2027 2035 * about it by returning EINTR.
2028 2036 */
2029 2037 if (nslp->nsl_state == NLM_SL_CANCELLED) {
2030 2038 error = EINTR;
2031 2039 goto out;
2032 2040 }
2033 2041
2034 2042 if (cv_res <= 0) {
2035 2043 /* We were woken up either by timeout or by interrupt */
2036 2044 error = (cv_res < 0) ? ETIMEDOUT : EINTR;
2037 2045
2038 2046 /*
2039 2047 * The granted message may arrive after the
2040 2048 * interrupt/timeout but before we manage to lock the
2041 2049 * mutex. Detect this by examining nslp.
2042 2050 */
2043 2051 if (nslp->nsl_state == NLM_SL_GRANTED)
2044 2052 error = 0;
2045 2053 } else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
2046 2054 error = 0;
2047 2055 VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
2048 2056 }
2049 2057
2050 2058 out:
2051 2059 mutex_exit(&g->lock);
2052 2060 return (error);
2053 2061 }
2054 2062
2055 2063 /*
2056 2064 * Mark client side sleeping lock as granted
2057 2065 * and wake up a process blocked on the lock.
2058 2066 * Called from server side NLM_GRANT handler.
2059 2067 *
2060 2068 * If sleeping lock is found return 0, otherwise
2061 2069 * return ENOENT.
2062 2070 */
2063 2071 int
2064 2072 nlm_slock_grant(struct nlm_globals *g,
2065 2073 struct nlm_host *hostp, struct nlm4_lock *alock)
2066 2074 {
2067 2075 struct nlm_slock *nslp;
2068 2076 int error = ENOENT;
2069 2077
2070 2078 mutex_enter(&g->lock);
2071 2079 TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
2072 2080 if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
2073 2081 (nslp->nsl_host != hostp))
2074 2082 continue;
2075 2083
2076 2084 if (alock->svid == nslp->nsl_lock.svid &&
2077 2085 alock->l_offset == nslp->nsl_lock.l_offset &&
2078 2086 alock->l_len == nslp->nsl_lock.l_len &&
2079 2087 alock->fh.n_len == nslp->nsl_lock.fh.n_len &&
2080 2088 bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2081 2089 nslp->nsl_lock.fh.n_len) == 0) {
2082 2090 nslp->nsl_state = NLM_SL_GRANTED;
2083 2091 cv_broadcast(&nslp->nsl_cond);
2084 2092 error = 0;
2085 2093 break;
2086 2094 }
2087 2095 }
2088 2096
2089 2097 mutex_exit(&g->lock);
2090 2098 return (error);
2091 2099 }
2092 2100
2093 2101 /*
2094 2102 * Register sleeping lock request corresponding to
2095 2103 * flp on the given vhold object.
2096 2104 * On success function returns 0, otherwise (if
2097 2105 * lock request with the same flp is already
2098 2106 * registered) function returns EEXIST.
2099 2107 */
2100 2108 int
2101 2109 nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2102 2110 struct flock64 *flp)
2103 2111 {
2104 2112 struct nlm_slreq *slr, *new_slr = NULL;
2105 2113 int ret = EEXIST;
2106 2114
2107 2115 mutex_enter(&hostp->nh_lock);
2108 2116 slr = nlm_slreq_find_locked(hostp, nvp, flp);
2109 2117 if (slr != NULL)
2110 2118 goto out;
2111 2119
2112 2120 mutex_exit(&hostp->nh_lock);
2113 2121 new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2114 2122 bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2115 2123
2116 2124 mutex_enter(&hostp->nh_lock);
2117 2125 slr = nlm_slreq_find_locked(hostp, nvp, flp);
2118 2126 if (slr == NULL) {
2119 2127 slr = new_slr;
2120 2128 new_slr = NULL;
2121 2129 ret = 0;
2122 2130
2123 2131 TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2124 2132 }
2125 2133
2126 2134 out:
2127 2135 mutex_exit(&hostp->nh_lock);
2128 2136 if (new_slr != NULL)
2129 2137 kmem_free(new_slr, sizeof (*new_slr));
2130 2138
2131 2139 return (ret);
2132 2140 }
2133 2141
2134 2142 /*
2135 2143 * Unregister sleeping lock request corresponding
2136 2144 * to flp from the given vhold object.
2137 2145 * On success function returns 0, otherwise (if
2138 2146 * lock request corresponding to flp isn't found
2139 2147 * on the given vhold) function returns ENOENT.
2140 2148 */
2141 2149 int
2142 2150 nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2143 2151 struct flock64 *flp)
2144 2152 {
2145 2153 struct nlm_slreq *slr;
2146 2154
2147 2155 mutex_enter(&hostp->nh_lock);
2148 2156 slr = nlm_slreq_find_locked(hostp, nvp, flp);
2149 2157 if (slr == NULL) {
2150 2158 mutex_exit(&hostp->nh_lock);
2151 2159 return (ENOENT);
2152 2160 }
2153 2161
2154 2162 TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2155 2163 mutex_exit(&hostp->nh_lock);
2156 2164
2157 2165 kmem_free(slr, sizeof (*slr));
2158 2166 return (0);
2159 2167 }
2160 2168
2161 2169 /*
2162 2170 * Find sleeping lock request on the given vhold object by flp.
2163 2171 */
2164 2172 struct nlm_slreq *
2165 2173 nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2166 2174 struct flock64 *flp)
2167 2175 {
2168 2176 struct nlm_slreq *slr = NULL;
2169 2177
2170 2178 ASSERT(MUTEX_HELD(&hostp->nh_lock));
2171 2179 TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2172 2180 if (slr->nsr_fl.l_start == flp->l_start &&
2173 2181 slr->nsr_fl.l_len == flp->l_len &&
2174 2182 slr->nsr_fl.l_pid == flp->l_pid &&
2175 2183 slr->nsr_fl.l_type == flp->l_type)
2176 2184 break;
2177 2185 }
2178 2186
2179 2187 return (slr);
2180 2188 }
2181 2189
2182 2190 /*
2183 2191 * NLM tracks active share reservations made on the client side.
2184 2192 * It needs to have a track of share reservations for two purposes
2185 2193 * 1) to determine if nlm_host is busy (if it has active locks and/or
2186 2194 * share reservations, it is)
2187 2195 * 2) to recover active share reservations when NLM server reports
2188 2196 * that it has rebooted.
2189 2197 *
2190 2198 * Unfortunately Illumos local share reservations manager (see os/share.c)
2191 2199 * doesn't have an ability to lookup all reservations on the system
2192 2200 * by sysid (like local lock manager) or get all reservations by sysid.
2193 2201 * It tracks reservations per vnode and is able to get/looup them
2194 2202 * on particular vnode. It's not what NLM needs. Thus it has that ugly
2195 2203 * share reservations tracking scheme.
2196 2204 */
2197 2205
2198 2206 void
2199 2207 nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2200 2208 {
2201 2209 struct nlm_shres *nsp, *nsp_new;
2202 2210
2203 2211 /*
2204 2212 * NFS code must fill the s_owner, so that
2205 2213 * s_own_len is never 0.
2206 2214 */
2207 2215 ASSERT(shrp->s_own_len > 0);
2208 2216 nsp_new = nlm_shres_create_item(shrp, vp);
2209 2217
2210 2218 mutex_enter(&hostp->nh_lock);
2211 2219 for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2212 2220 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2213 2221 break;
2214 2222
2215 2223 if (nsp != NULL) {
2216 2224 /*
2217 2225 * Found a duplicate. Do nothing.
2218 2226 */
2219 2227
2220 2228 goto out;
2221 2229 }
2222 2230
2223 2231 nsp = nsp_new;
2224 2232 nsp_new = NULL;
2225 2233 nsp->ns_next = hostp->nh_shrlist;
2226 2234 hostp->nh_shrlist = nsp;
2227 2235
2228 2236 out:
2229 2237 mutex_exit(&hostp->nh_lock);
2230 2238 if (nsp_new != NULL)
2231 2239 nlm_shres_destroy_item(nsp_new);
2232 2240 }
2233 2241
2234 2242 void
2235 2243 nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2236 2244 {
2237 2245 struct nlm_shres *nsp, *nsp_prev = NULL;
2238 2246
2239 2247 mutex_enter(&hostp->nh_lock);
2240 2248 nsp = hostp->nh_shrlist;
2241 2249 while (nsp != NULL) {
2242 2250 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2243 2251 struct nlm_shres *nsp_del;
2244 2252
2245 2253 nsp_del = nsp;
2246 2254 nsp = nsp->ns_next;
2247 2255 if (nsp_prev != NULL)
2248 2256 nsp_prev->ns_next = nsp;
2249 2257 else
2250 2258 hostp->nh_shrlist = nsp;
2251 2259
2252 2260 nlm_shres_destroy_item(nsp_del);
2253 2261 continue;
2254 2262 }
2255 2263
2256 2264 nsp_prev = nsp;
2257 2265 nsp = nsp->ns_next;
2258 2266 }
2259 2267
2260 2268 mutex_exit(&hostp->nh_lock);
2261 2269 }
2262 2270
2263 2271 /*
2264 2272 * Get a _copy_ of the list of all active share reservations
2265 2273 * made by the given host.
2266 2274 * NOTE: the list function returns _must_ be released using
2267 2275 * nlm_free_shrlist().
2268 2276 */
2269 2277 struct nlm_shres *
2270 2278 nlm_get_active_shres(struct nlm_host *hostp)
2271 2279 {
2272 2280 struct nlm_shres *nsp, *nslist = NULL;
2273 2281
2274 2282 mutex_enter(&hostp->nh_lock);
2275 2283 for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2276 2284 struct nlm_shres *nsp_new;
2277 2285
2278 2286 nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2279 2287 nsp_new->ns_next = nslist;
2280 2288 nslist = nsp_new;
2281 2289 }
2282 2290
2283 2291 mutex_exit(&hostp->nh_lock);
2284 2292 return (nslist);
2285 2293 }
2286 2294
2287 2295 /*
2288 2296 * Free memory allocated for the active share reservations
2289 2297 * list created by nlm_get_active_shres() function.
2290 2298 */
2291 2299 void
2292 2300 nlm_free_shrlist(struct nlm_shres *nslist)
2293 2301 {
2294 2302 struct nlm_shres *nsp;
2295 2303
2296 2304 while (nslist != NULL) {
2297 2305 nsp = nslist;
2298 2306 nslist = nslist->ns_next;
2299 2307
2300 2308 nlm_shres_destroy_item(nsp);
2301 2309 }
2302 2310 }
2303 2311
2304 2312 static bool_t
2305 2313 nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2306 2314 {
2307 2315 if (shrp1->s_sysid == shrp2->s_sysid &&
2308 2316 shrp1->s_pid == shrp2->s_pid &&
2309 2317 shrp1->s_own_len == shrp2->s_own_len &&
2310 2318 bcmp(shrp1->s_owner, shrp2->s_owner,
2311 2319 shrp1->s_own_len) == 0)
2312 2320 return (TRUE);
2313 2321
2314 2322 return (FALSE);
2315 2323 }
2316 2324
2317 2325 static struct nlm_shres *
2318 2326 nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2319 2327 {
2320 2328 struct nlm_shres *nsp;
2321 2329
2322 2330 nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2323 2331 nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2324 2332 bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2325 2333 nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2326 2334 bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2327 2335 nsp->ns_vp = vp;
2328 2336
2329 2337 return (nsp);
2330 2338 }
2331 2339
2332 2340 static void
2333 2341 nlm_shres_destroy_item(struct nlm_shres *nsp)
2334 2342 {
2335 2343 kmem_free(nsp->ns_shr->s_owner,
2336 2344 nsp->ns_shr->s_own_len);
2337 2345 kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2338 2346 kmem_free(nsp, sizeof (*nsp));
2339 2347 }
2340 2348
2341 2349 /*
2342 2350 * Called by klmmod.c when lockd adds a network endpoint
2343 2351 * on which we should begin RPC services.
2344 2352 */
2345 2353 int
2346 2354 nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2347 2355 {
2348 2356 SVCMASTERXPRT *xprt = NULL;
2349 2357 int error;
2350 2358
2351 2359 error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2352 2360 &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2353 2361 if (error != 0)
2354 2362 return (error);
2355 2363
2356 2364 (void) nlm_knc_to_netid(knc);
2357 2365 return (0);
2358 2366 }
2359 2367
2360 2368 /*
2361 2369 * Start NLM service.
2362 2370 */
2363 2371 int
2364 2372 nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2365 2373 const char *netid, struct knetconfig *knc)
2366 2374 {
2367 2375 int error;
2368 2376 enum clnt_stat stat;
2369 2377
2370 2378 VERIFY(g->run_status == NLM_ST_STARTING);
2371 2379 VERIFY(g->nlm_gc_thread == NULL);
2372 2380
2373 2381 error = nlm_nsm_init_local(&g->nlm_nsm);
2374 2382 if (error != 0) {
2375 2383 NLM_ERR("Failed to initialize NSM handler "
2376 2384 "(error=%d)\n", error);
2377 2385 g->run_status = NLM_ST_DOWN;
2378 2386 return (error);
2379 2387 }
2380 2388
2381 2389 error = EIO;
2382 2390
2383 2391 /*
2384 2392 * Create an NLM garbage collector thread that will
2385 2393 * clean up stale vholds and hosts objects.
2386 2394 */
2387 2395 g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2388 2396 g, 0, minclsyspri);
2389 2397
2390 2398 /*
2391 2399 * Send SIMU_CRASH to local statd to report that
2392 2400 * NLM started, so that statd can report other hosts
2393 2401 * about NLM state change.
2394 2402 */
2395 2403
2396 2404 stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2397 2405 if (stat != RPC_SUCCESS) {
2398 2406 NLM_ERR("Failed to connect to local statd "
2399 2407 "(rpcerr=%d)\n", stat);
2400 2408 goto shutdown_lm;
2401 2409 }
2402 2410
2403 2411 stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2404 2412 if (stat != RPC_SUCCESS) {
2405 2413 NLM_ERR("Failed to get the status of local statd "
2406 2414 "(rpcerr=%d)\n", stat);
2407 2415 goto shutdown_lm;
2408 2416 }
2409 2417
2410 2418 g->grace_threshold = ddi_get_lbolt() +
2411 2419 SEC_TO_TICK(g->grace_period);
2412 2420
2413 2421 /* Register endpoint used for communications with local NLM */
2414 2422 error = nlm_svc_add_ep(fp, netid, knc);
2415 2423 if (error != 0)
2416 2424 goto shutdown_lm;
2417 2425
2418 2426 (void) svc_pool_control(NLM_SVCPOOL_ID,
2419 2427 SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2420 2428 g->run_status = NLM_ST_UP;
2421 2429 return (0);
2422 2430
2423 2431 shutdown_lm:
2424 2432 mutex_enter(&g->lock);
2425 2433 g->run_status = NLM_ST_STOPPING;
2426 2434 mutex_exit(&g->lock);
2427 2435
2428 2436 nlm_svc_stopping(g);
2429 2437 return (error);
2430 2438 }
2431 2439
2432 2440 /*
2433 2441 * Called when the server pool is destroyed, so that
2434 2442 * all transports are closed and no any server threads
2435 2443 * exist.
2436 2444 *
2437 2445 * Just call lm_shutdown() to shut NLM down properly.
2438 2446 */
2439 2447 static void
2440 2448 nlm_pool_shutdown(void)
2441 2449 {
2442 2450 (void) lm_shutdown();
2443 2451 }
2444 2452
2445 2453 /*
2446 2454 * Stop NLM service, cleanup all resources
2447 2455 * NLM owns at the moment.
2448 2456 *
2449 2457 * NOTE: NFS code can call NLM while it's
2450 2458 * stopping or even if it's shut down. Any attempt
2451 2459 * to lock file either on client or on the server
2452 2460 * will fail if NLM isn't in NLM_ST_UP state.
2453 2461 */
2454 2462 void
2455 2463 nlm_svc_stopping(struct nlm_globals *g)
2456 2464 {
2457 2465 mutex_enter(&g->lock);
2458 2466 ASSERT(g->run_status == NLM_ST_STOPPING);
2459 2467
2460 2468 /*
2461 2469 * Ask NLM GC thread to exit and wait until it dies.
2462 2470 */
2463 2471 cv_signal(&g->nlm_gc_sched_cv);
2464 2472 while (g->nlm_gc_thread != NULL)
2465 2473 cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2466 2474
2467 2475 mutex_exit(&g->lock);
2468 2476
2469 2477 /*
2470 2478 * Cleanup locks owned by NLM hosts.
2471 2479 * NOTE: New hosts won't be created while
2472 2480 * NLM is stopping.
2473 2481 */
2474 2482 while (!avl_is_empty(&g->nlm_hosts_tree)) {
2475 2483 struct nlm_host *hostp;
2476 2484 int busy_hosts = 0;
2477 2485
2478 2486 /*
2479 2487 * Iterate through all NLM hosts in the system
2480 2488 * and drop the locks they own by force.
2481 2489 */
2482 2490 hostp = avl_first(&g->nlm_hosts_tree);
2483 2491 while (hostp != NULL) {
2484 2492 /* Cleanup all client and server side locks */
2485 2493 nlm_client_cancel_all(g, hostp);
2486 2494 nlm_host_notify_server(hostp, 0);
2487 2495
2488 2496 mutex_enter(&hostp->nh_lock);
2489 2497 nlm_host_gc_vholds(hostp);
2490 2498 if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2491 2499 /*
2492 2500 * Oh, it seems the host is still busy, let
2493 2501 * it some time to release and go to the
2494 2502 * next one.
2495 2503 */
2496 2504
2497 2505 mutex_exit(&hostp->nh_lock);
2498 2506 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2499 2507 busy_hosts++;
2500 2508 continue;
2501 2509 }
2502 2510
2503 2511 mutex_exit(&hostp->nh_lock);
2504 2512 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2505 2513 }
2506 2514
2507 2515 /*
2508 2516 * All hosts go to nlm_idle_hosts list after
2509 2517 * all locks they own are cleaned up and last refereces
2510 2518 * were dropped. Just destroy all hosts in nlm_idle_hosts
2511 2519 * list, they can not be removed from there while we're
2512 2520 * in stopping state.
2513 2521 */
2514 2522 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2515 2523 nlm_host_unregister(g, hostp);
2516 2524 nlm_host_destroy(hostp);
2517 2525 }
2518 2526
2519 2527 if (busy_hosts > 0) {
2520 2528 /*
2521 2529 * There're some hosts that weren't cleaned
2522 2530 * up. Probably they're in resource cleanup
2523 2531 * process. Give them some time to do drop
2524 2532 * references.
2525 2533 */
2526 2534 delay(MSEC_TO_TICK(500));
2527 2535 }
2528 2536 }
2529 2537
2530 2538 ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2531 2539
2532 2540 nlm_nsm_fini(&g->nlm_nsm);
2533 2541 g->lockd_pid = 0;
2534 2542 g->run_status = NLM_ST_DOWN;
2535 2543 }
2536 2544
2537 2545 /*
2538 2546 * Returns TRUE if the given vnode has
2539 2547 * any active or sleeping locks.
2540 2548 */
2541 2549 int
2542 2550 nlm_vp_active(const vnode_t *vp)
2543 2551 {
2544 2552 struct nlm_globals *g;
2545 2553 struct nlm_host *hostp;
2546 2554 struct nlm_vhold *nvp;
2547 2555 int active = 0;
2548 2556
2549 2557 g = zone_getspecific(nlm_zone_key, curzone);
2550 2558
2551 2559 /*
2552 2560 * Server side NLM has locks on the given vnode
2553 2561 * if there exist a vhold object that holds
2554 2562 * the given vnode "vp" in one of NLM hosts.
2555 2563 */
2556 2564 mutex_enter(&g->lock);
2557 2565 hostp = avl_first(&g->nlm_hosts_tree);
2558 2566 while (hostp != NULL) {
2559 2567 mutex_enter(&hostp->nh_lock);
2560 2568 nvp = nlm_vhold_find_locked(hostp, vp);
2561 2569 mutex_exit(&hostp->nh_lock);
2562 2570 if (nvp != NULL) {
2563 2571 active = 1;
2564 2572 break;
2565 2573 }
2566 2574
2567 2575 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2568 2576 }
2569 2577
2570 2578 mutex_exit(&g->lock);
2571 2579 return (active);
2572 2580 }
2573 2581
2574 2582 /*
2575 2583 * Called right before NFS export is going to
2576 2584 * dissapear. The function finds all vnodes
2577 2585 * belonging to the given export and cleans
2578 2586 * all remote locks and share reservations
2579 2587 * on them.
2580 2588 */
2581 2589 void
2582 2590 nlm_unexport(struct exportinfo *exi)
2583 2591 {
2584 2592 struct nlm_globals *g;
2585 2593 struct nlm_host *hostp;
2586 2594
2587 2595 /* This may be called on behalf of global-zone doing shutdown. */
2588 2596 ASSERT(exi->exi_zone == curzone || curzone == global_zone);
2589 2597 g = zone_getspecific(nlm_zone_key, exi->exi_zone);
2590 2598 if (g == NULL) {
2591 2599 /* Did zone cleanup get here already? */
2592 2600 return;
2593 2601 }
2594 2602
2595 2603 mutex_enter(&g->lock);
2596 2604 hostp = avl_first(&g->nlm_hosts_tree);
2597 2605 while (hostp != NULL) {
2598 2606 struct nlm_vhold *nvp;
2599 2607
2600 2608 if (hostp->nh_flags & NLM_NH_INIDLE) {
2601 2609 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
2602 2610 hostp->nh_flags &= ~NLM_NH_INIDLE;
2603 2611 }
2604 2612 hostp->nh_refs++;
2605 2613
2606 2614 mutex_exit(&g->lock);
2607 2615
2608 2616 mutex_enter(&hostp->nh_lock);
2609 2617 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2610 2618 vnode_t *vp;
2611 2619
2612 2620 nvp->nv_refcnt++;
2613 2621 mutex_exit(&hostp->nh_lock);
2614 2622
2615 2623 vp = nvp->nv_vp;
2616 2624
2617 2625 if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2618 2626 goto next_iter;
2619 2627
2620 2628 /*
2621 2629 * Ok, it we found out that vnode vp is under
2622 2630 * control by the exportinfo exi, now we need
2623 2631 * to drop all locks from this vnode, let's
2624 2632 * do it.
2625 2633 */
2626 2634 nlm_vhold_clean(nvp, hostp->nh_sysid);
2627 2635
2628 2636 next_iter:
2629 2637 mutex_enter(&hostp->nh_lock);
2630 2638 nvp->nv_refcnt--;
2631 2639 }
2632 2640 mutex_exit(&hostp->nh_lock);
2633 2641
2634 2642 mutex_enter(&g->lock);
2635 2643 nlm_host_release_locked(g, hostp);
2636 2644
2637 2645 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2638 2646 }
2639 2647
2640 2648 mutex_exit(&g->lock);
2641 2649 }
2642 2650
2643 2651 /*
2644 2652 * Allocate new unique sysid.
2645 2653 * In case of failure (no available sysids)
2646 2654 * return LM_NOSYSID.
2647 2655 */
2648 2656 sysid_t
2649 2657 nlm_sysid_alloc(void)
2650 2658 {
2651 2659 sysid_t ret_sysid = LM_NOSYSID;
2652 2660
2653 2661 rw_enter(&lm_lck, RW_WRITER);
2654 2662 if (nlm_sysid_nidx > LM_SYSID_MAX)
2655 2663 nlm_sysid_nidx = LM_SYSID;
2656 2664
2657 2665 if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2658 2666 BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2659 2667 ret_sysid = nlm_sysid_nidx++;
2660 2668 } else {
2661 2669 index_t id;
2662 2670
2663 2671 id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2664 2672 if (id > 0) {
2665 2673 nlm_sysid_nidx = id + 1;
2666 2674 ret_sysid = id;
2667 2675 BT_SET(nlm_sysid_bmap, id);
2668 2676 }
2669 2677 }
2670 2678
2671 2679 rw_exit(&lm_lck);
2672 2680 return (ret_sysid);
2673 2681 }
2674 2682
2675 2683 void
2676 2684 nlm_sysid_free(sysid_t sysid)
2677 2685 {
2678 2686 ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2679 2687
2680 2688 rw_enter(&lm_lck, RW_WRITER);
2681 2689 ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2682 2690 BT_CLEAR(nlm_sysid_bmap, sysid);
2683 2691 rw_exit(&lm_lck);
2684 2692 }
2685 2693
2686 2694 /*
2687 2695 * Return true if the request came from a local caller.
2688 2696 * By necessity, this "knows" the netid names invented
2689 2697 * in lm_svc() and nlm_netid_from_knetconfig().
2690 2698 */
2691 2699 bool_t
2692 2700 nlm_caller_is_local(SVCXPRT *transp)
2693 2701 {
2694 2702 char *netid;
2695 2703 struct netbuf *rtaddr;
2696 2704
2697 2705 netid = svc_getnetid(transp);
2698 2706 rtaddr = svc_getrpccaller(transp);
2699 2707
2700 2708 if (netid == NULL)
2701 2709 return (FALSE);
2702 2710
2703 2711 if (strcmp(netid, "ticlts") == 0 ||
2704 2712 strcmp(netid, "ticotsord") == 0)
2705 2713 return (TRUE);
2706 2714
2707 2715 if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2708 2716 struct sockaddr_in *sin = (void *)rtaddr->buf;
2709 2717 if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2710 2718 return (TRUE);
2711 2719 }
2712 2720 if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2713 2721 struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2714 2722 if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2715 2723 return (TRUE);
2716 2724 }
2717 2725
2718 2726 return (FALSE); /* unknown transport */
2719 2727 }
2720 2728
2721 2729 /*
2722 2730 * Get netid string correspondig to the given knetconfig.
2723 2731 * If not done already, save knc->knc_rdev in our table.
2724 2732 */
2725 2733 const char *
2726 2734 nlm_knc_to_netid(struct knetconfig *knc)
2727 2735 {
2728 2736 int i;
2729 2737 dev_t rdev;
2730 2738 struct nlm_knc *nc;
2731 2739 const char *netid = NULL;
2732 2740
2733 2741 rw_enter(&lm_lck, RW_READER);
2734 2742 for (i = 0; i < NLM_KNCS; i++) {
2735 2743 nc = &nlm_netconfigs[i];
2736 2744
2737 2745 if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2738 2746 strcmp(nc->n_knc.knc_protofmly,
2739 2747 knc->knc_protofmly) == 0) {
2740 2748 netid = nc->n_netid;
2741 2749 rdev = nc->n_knc.knc_rdev;
2742 2750 break;
2743 2751 }
2744 2752 }
2745 2753 rw_exit(&lm_lck);
2746 2754
2747 2755 if (netid != NULL && rdev == NODEV) {
2748 2756 rw_enter(&lm_lck, RW_WRITER);
2749 2757 if (nc->n_knc.knc_rdev == NODEV)
2750 2758 nc->n_knc.knc_rdev = knc->knc_rdev;
2751 2759 rw_exit(&lm_lck);
2752 2760 }
2753 2761
2754 2762 return (netid);
2755 2763 }
2756 2764
2757 2765 /*
2758 2766 * Get a knetconfig corresponding to the given netid.
2759 2767 * If there's no knetconfig for this netid, ENOENT
2760 2768 * is returned.
2761 2769 */
2762 2770 int
2763 2771 nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2764 2772 {
2765 2773 int i, ret;
2766 2774
2767 2775 ret = ENOENT;
2768 2776 for (i = 0; i < NLM_KNCS; i++) {
2769 2777 struct nlm_knc *nknc;
2770 2778
2771 2779 nknc = &nlm_netconfigs[i];
2772 2780 if (strcmp(netid, nknc->n_netid) == 0 &&
2773 2781 nknc->n_knc.knc_rdev != NODEV) {
2774 2782 *knc = nknc->n_knc;
2775 2783 ret = 0;
2776 2784 break;
2777 2785 }
2778 2786 }
2779 2787
2780 2788 return (ret);
2781 2789 }
2782 2790
2783 2791 void
2784 2792 nlm_cprsuspend(void)
2785 2793 {
2786 2794 struct nlm_globals *g;
2787 2795
2788 2796 rw_enter(&lm_lck, RW_READER);
2789 2797 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2790 2798 nlm_suspend_zone(g);
2791 2799
2792 2800 rw_exit(&lm_lck);
2793 2801 }
2794 2802
2795 2803 void
2796 2804 nlm_cprresume(void)
2797 2805 {
2798 2806 struct nlm_globals *g;
2799 2807
2800 2808 rw_enter(&lm_lck, RW_READER);
|
↓ open down ↓ |
1909 lines elided |
↑ open up ↑ |
2801 2809 TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2802 2810 nlm_resume_zone(g);
2803 2811
2804 2812 rw_exit(&lm_lck);
2805 2813 }
2806 2814
2807 2815 void
2808 2816 nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2809 2817 {
2810 2818 (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2811 - NLM_RPC_RETRIES, kcred);
2819 + NLM_RPC_RETRIES, zone_kcred());
2812 2820 }
2813 2821
2814 2822 void
2815 2823 nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2816 2824 {
2817 2825 /* LINTED pointer alignment */
2818 2826 struct sockaddr *sa = (struct sockaddr *)addr->buf;
2819 2827
2820 2828 *family = sa->sa_family;
2821 2829
2822 2830 switch (sa->sa_family) {
2823 2831 case AF_INET: {
2824 2832 /* LINTED pointer alignment */
2825 2833 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2826 2834
2827 2835 obj->n_len = sizeof (sin->sin_addr);
2828 2836 obj->n_bytes = (char *)&sin->sin_addr;
2829 2837 break;
2830 2838 }
2831 2839
2832 2840 case AF_INET6: {
2833 2841 /* LINTED pointer alignment */
2834 2842 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2835 2843
2836 2844 obj->n_len = sizeof (sin6->sin6_addr);
2837 2845 obj->n_bytes = (char *)&sin6->sin6_addr;
2838 2846 break;
2839 2847 }
2840 2848
2841 2849 default:
2842 2850 VERIFY(0);
2843 2851 break;
2844 2852 }
2845 2853 }
|
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX