1 /*
   2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
   3  * Authors: Doug Rabson <dfr@rabson.org>
   4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27 
  28 /*
  29  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  30  * Copyright (c) 2012 by Delphix. All rights reserved.
  31  * Copyright 2017 Joyent, Inc.  All rights reserved.
  32  */
  33 
  34 /*
  35  * NFS LockManager, start/stop, support functions, etc.
  36  * Most of the interesting code is here.
  37  *
  38  * Source code derived from FreeBSD nlm_prot_impl.c
  39  */
  40 
  41 #include <sys/param.h>
  42 #include <sys/systm.h>
  43 #include <sys/thread.h>
  44 #include <sys/fcntl.h>
  45 #include <sys/flock.h>
  46 #include <sys/mount.h>
  47 #include <sys/priv.h>
  48 #include <sys/proc.h>
  49 #include <sys/share.h>
  50 #include <sys/socket.h>
  51 #include <sys/syscall.h>
  52 #include <sys/syslog.h>
  53 #include <sys/systm.h>
  54 #include <sys/class.h>
  55 #include <sys/unistd.h>
  56 #include <sys/vnode.h>
  57 #include <sys/vfs.h>
  58 #include <sys/queue.h>
  59 #include <sys/bitmap.h>
  60 #include <sys/sdt.h>
  61 #include <sys/brand.h>
  62 #include <netinet/in.h>
  63 
  64 #include <rpc/rpc.h>
  65 #include <rpc/xdr.h>
  66 #include <rpc/pmap_prot.h>
  67 #include <rpc/pmap_clnt.h>
  68 #include <rpc/rpcb_prot.h>
  69 
  70 #include <rpcsvc/nlm_prot.h>
  71 #include <rpcsvc/sm_inter.h>
  72 #include <rpcsvc/nsm_addr.h>
  73 
  74 #include <nfs/nfs.h>
  75 #include <nfs/nfs_clnt.h>
  76 #include <nfs/export.h>
  77 #include <nfs/rnode.h>
  78 #include <nfs/lm.h>
  79 
  80 #include "nlm_impl.h"
  81 
  82 struct nlm_knc {
  83         struct knetconfig       n_knc;
  84         const char              *n_netid;
  85 };
  86 
  87 /*
  88  * Number of attempts NLM tries to obtain RPC binding
  89  * of local statd.
  90  */
  91 #define NLM_NSM_RPCBIND_RETRIES 10
  92 
  93 /*
  94  * Timeout (in seconds) NLM waits before making another
  95  * attempt to obtain RPC binding of local statd.
  96  */
  97 #define NLM_NSM_RPCBIND_TIMEOUT 5
  98 
  99 /*
 100  * Total number of sysids in NLM sysid bitmap
 101  */
 102 #define NLM_BMAP_NITEMS (LM_SYSID_MAX + 1)
 103 
 104 /*
 105  * Number of ulong_t words in bitmap that is used
 106  * for allocation of sysid numbers.
 107  */
 108 #define NLM_BMAP_WORDS  (NLM_BMAP_NITEMS / BT_NBIPUL)
 109 
 110 /*
 111  * Given an integer x, the macro returns
 112  * -1 if x is negative,
 113  *  0 if x is zero
 114  *  1 if x is positive
 115  */
 116 #define SIGN(x) (((x) > 0) - ((x) < 0))
 117 
 118 #define ARRSIZE(arr)    (sizeof (arr) / sizeof ((arr)[0]))
 119 #define NLM_KNCS        ARRSIZE(nlm_netconfigs)
 120 
 121 krwlock_t lm_lck;
 122 
 123 /*
 124  * Zero timeout for asynchronous NLM RPC operations
 125  */
 126 static const struct timeval nlm_rpctv_zero = { 0,  0 };
 127 
 128 /*
 129  * List of all Zone globals nlm_globals instences
 130  * linked together.
 131  */
 132 static struct nlm_globals_list nlm_zones_list; /* (g) */
 133 
 134 /*
 135  * NLM kmem caches
 136  */
 137 static struct kmem_cache *nlm_hosts_cache = NULL;
 138 static struct kmem_cache *nlm_vhold_cache = NULL;
 139 
 140 /*
 141  * A bitmap for allocation of new sysids.
 142  * Sysid is a unique number between LM_SYSID
 143  * and LM_SYSID_MAX. Sysid represents unique remote
 144  * host that does file locks on the given host.
 145  */
 146 static ulong_t  nlm_sysid_bmap[NLM_BMAP_WORDS]; /* (g) */
 147 static int      nlm_sysid_nidx;                 /* (g) */
 148 
 149 /*
 150  * RPC service registration for all transports
 151  */
 152 static SVC_CALLOUT nlm_svcs[] = {
 153         { NLM_PROG, 4, 4, nlm_prog_4 }, /* NLM4_VERS */
 154         { NLM_PROG, 1, 3, nlm_prog_3 }  /* NLM_VERS - NLM_VERSX */
 155 };
 156 
 157 static SVC_CALLOUT_TABLE nlm_sct = {
 158         ARRSIZE(nlm_svcs),
 159         FALSE,
 160         nlm_svcs
 161 };
 162 
 163 /*
 164  * Static table of all netid/knetconfig network
 165  * lock manager can work with. nlm_netconfigs table
 166  * is used when we need to get valid knetconfig by
 167  * netid and vice versa.
 168  *
 169  * Knetconfigs are activated either by the call from
 170  * user-space lockd daemon (server side) or by taking
 171  * knetconfig from NFS mountinfo (client side)
 172  */
 173 static struct nlm_knc nlm_netconfigs[] = { /* (g) */
 174         /* UDP */
 175         {
 176                 { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
 177                 "udp",
 178         },
 179         /* TCP */
 180         {
 181                 { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
 182                 "tcp",
 183         },
 184         /* UDP over IPv6 */
 185         {
 186                 { NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
 187                 "udp6",
 188         },
 189         /* TCP over IPv6 */
 190         {
 191                 { NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
 192                 "tcp6",
 193         },
 194         /* ticlts (loopback over UDP) */
 195         {
 196                 { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
 197                 "ticlts",
 198         },
 199         /* ticotsord (loopback over TCP) */
 200         {
 201                 { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
 202                 "ticotsord",
 203         },
 204 };
 205 
 206 /*
 207  * NLM functions which can be called by a brand hook.
 208  */
 209 void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
 210 void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
 211 
 212 /*
 213  * NLM misc. function
 214  */
 215 static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
 216 static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
 217 static void nlm_kmem_reclaim(void *);
 218 static void nlm_pool_shutdown(void);
 219 static void nlm_suspend_zone(struct nlm_globals *);
 220 static void nlm_resume_zone(struct nlm_globals *);
 221 
 222 /*
 223  * NLM thread functions
 224  */
 225 static void nlm_gc(struct nlm_globals *);
 226 static void nlm_reclaimer(struct nlm_host *);
 227 
 228 /*
 229  * NLM NSM functions
 230  */
 231 static int nlm_init_local_knc(struct knetconfig *);
 232 static int nlm_nsm_init_local(struct nlm_nsm *);
 233 static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
 234 static void nlm_nsm_fini(struct nlm_nsm *);
 235 static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
 236 static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
 237 static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
 238 static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
 239 
 240 /*
 241  * NLM host functions
 242  */
 243 static int nlm_host_ctor(void *, void *, int);
 244 static void nlm_host_dtor(void *, void *);
 245 static void nlm_host_destroy(struct nlm_host *);
 246 static struct nlm_host *nlm_host_create(char *, const char *,
 247     struct knetconfig *, struct netbuf *);
 248 static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
 249     const char *, struct netbuf *, avl_index_t *);
 250 static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
 251 static void nlm_host_gc_vholds(struct nlm_host *);
 252 static bool_t nlm_host_has_srv_locks(struct nlm_host *);
 253 static bool_t nlm_host_has_cli_locks(struct nlm_host *);
 254 static bool_t nlm_host_has_locks(struct nlm_host *);
 255 
 256 /*
 257  * NLM vhold functions
 258  */
 259 static int nlm_vhold_ctor(void *, void *, int);
 260 static void nlm_vhold_dtor(void *, void *);
 261 static void nlm_vhold_destroy(struct nlm_host *,
 262     struct nlm_vhold *);
 263 static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
 264 static void nlm_vhold_clean(struct nlm_vhold *, int);
 265 
 266 /*
 267  * NLM client/server sleeping locks/share reservation functions
 268  */
 269 struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
 270     struct nlm_vhold *, struct flock64 *);
 271 static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
 272 static void nlm_shres_destroy_item(struct nlm_shres *);
 273 static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
 274 
 275 /*
 276  * NLM initialization functions.
 277  */
 278 void
 279 nlm_init(void)
 280 {
 281         nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
 282             sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
 283             nlm_kmem_reclaim, NULL, NULL, 0);
 284 
 285         nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
 286             sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
 287             NULL, NULL, NULL, 0);
 288 
 289         nlm_rpc_init();
 290         TAILQ_INIT(&nlm_zones_list);
 291 
 292         /* initialize sysids bitmap */
 293         bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
 294         nlm_sysid_nidx = 1;
 295 
 296         /*
 297          * Reserv the sysid #0, because it's associated
 298          * with local locks only. Don't let to allocate
 299          * it for remote locks.
 300          */
 301         BT_SET(nlm_sysid_bmap, 0);
 302 }
 303 
 304 void
 305 nlm_globals_register(struct nlm_globals *g)
 306 {
 307         rw_enter(&lm_lck, RW_WRITER);
 308         TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
 309         rw_exit(&lm_lck);
 310 }
 311 
 312 void
 313 nlm_globals_unregister(struct nlm_globals *g)
 314 {
 315         rw_enter(&lm_lck, RW_WRITER);
 316         TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
 317         rw_exit(&lm_lck);
 318 }
 319 
 320 /* ARGSUSED */
 321 static void
 322 nlm_kmem_reclaim(void *cdrarg)
 323 {
 324         struct nlm_globals *g;
 325 
 326         rw_enter(&lm_lck, RW_READER);
 327         TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
 328                 cv_broadcast(&g->nlm_gc_sched_cv);
 329 
 330         rw_exit(&lm_lck);
 331 }
 332 
 333 /*
 334  * NLM garbage collector thread (GC).
 335  *
 336  * NLM GC periodically checks whether there're any host objects
 337  * that can be cleaned up. It also releases stale vnodes that
 338  * live on the server side (under protection of vhold objects).
 339  *
 340  * NLM host objects are cleaned up from GC thread because
 341  * operations helping us to determine whether given host has
 342  * any locks can be quite expensive and it's not good to call
 343  * them every time the very last reference to the host is dropped.
 344  * Thus we use "lazy" approach for hosts cleanup.
 345  *
 346  * The work of GC is to release stale vnodes on the server side
 347  * and destroy hosts that haven't any locks and any activity for
 348  * some time (i.e. idle hosts).
 349  */
 350 static void
 351 nlm_gc(struct nlm_globals *g)
 352 {
 353         struct nlm_host *hostp;
 354         clock_t now, idle_period;
 355 
 356         idle_period = SEC_TO_TICK(g->cn_idle_tmo);
 357         mutex_enter(&g->lock);
 358         for (;;) {
 359                 /*
 360                  * GC thread can be explicitly scheduled from
 361                  * memory reclamation function.
 362                  */
 363                 (void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
 364                     ddi_get_lbolt() + idle_period);
 365 
 366                 /*
 367                  * NLM is shutting down, time to die.
 368                  */
 369                 if (g->run_status == NLM_ST_STOPPING)
 370                         break;
 371 
 372                 now = ddi_get_lbolt();
 373                 DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
 374                     clock_t, now);
 375 
 376                 /*
 377                  * Find all obviously unused vholds and destroy them.
 378                  */
 379                 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
 380                     hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
 381                         struct nlm_vhold *nvp;
 382 
 383                         mutex_enter(&hostp->nh_lock);
 384 
 385                         nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
 386                         while (nvp != NULL) {
 387                                 struct nlm_vhold *new_nvp;
 388 
 389                                 new_nvp = TAILQ_NEXT(nvp, nv_link);
 390 
 391                                 /*
 392                                  * If these conditions are met, the vhold is
 393                                  * obviously unused and we will destroy it.  In
 394                                  * a case either v_filocks and/or v_shrlocks is
 395                                  * non-NULL the vhold might still be unused by
 396                                  * the host, but it is expensive to check that.
 397                                  * We defer such check until the host is idle.
 398                                  * The expensive check is done below without
 399                                  * the global lock held.
 400                                  */
 401                                 if (nvp->nv_refcnt == 0 &&
 402                                     nvp->nv_vp->v_filocks == NULL &&
 403                                     nvp->nv_vp->v_shrlocks == NULL) {
 404                                         nlm_vhold_destroy(hostp, nvp);
 405                                 }
 406 
 407                                 nvp = new_nvp;
 408                         }
 409 
 410                         mutex_exit(&hostp->nh_lock);
 411                 }
 412 
 413                 /*
 414                  * Handle all hosts that are unused at the moment
 415                  * until we meet one with idle timeout in future.
 416                  */
 417                 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
 418                         bool_t has_locks;
 419 
 420                         if (hostp->nh_idle_timeout > now)
 421                                 break;
 422 
 423                         /*
 424                          * Drop global lock while doing expensive work
 425                          * on this host. We'll re-check any conditions
 426                          * that might change after retaking the global
 427                          * lock.
 428                          */
 429                         mutex_exit(&g->lock);
 430                         mutex_enter(&hostp->nh_lock);
 431 
 432                         /*
 433                          * nlm_globals lock was dropped earlier because
 434                          * garbage collecting of vholds and checking whether
 435                          * host has any locks/shares are expensive operations.
 436                          */
 437                         nlm_host_gc_vholds(hostp);
 438                         has_locks = nlm_host_has_locks(hostp);
 439 
 440                         mutex_exit(&hostp->nh_lock);
 441                         mutex_enter(&g->lock);
 442 
 443                         /*
 444                          * While we were doing expensive operations
 445                          * outside of nlm_globals critical section,
 446                          * somebody could take the host and remove it
 447                          * from the idle list.  Whether its been
 448                          * reinserted or not, our information about
 449                          * the host is outdated, and we should take no
 450                          * further action.
 451                          */
 452                         if ((hostp->nh_flags & NLM_NH_INIDLE) == 0 ||
 453                             hostp->nh_idle_timeout > now)
 454                                 continue;
 455 
 456                         /*
 457                          * If the host has locks we have to renew the
 458                          * host's timeout and put it at the end of LRU
 459                          * list.
 460                          */
 461                         if (has_locks) {
 462                                 TAILQ_REMOVE(&g->nlm_idle_hosts,
 463                                     hostp, nh_link);
 464                                 hostp->nh_idle_timeout = now + idle_period;
 465                                 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
 466                                     hostp, nh_link);
 467                                 continue;
 468                         }
 469 
 470                         /*
 471                          * We're here if all the following conditions hold:
 472                          * 1) Host hasn't any locks or share reservations
 473                          * 2) Host is unused
 474                          * 3) Host wasn't touched by anyone at least for
 475                          *    g->cn_idle_tmo seconds.
 476                          *
 477                          * So, now we can destroy it.
 478                          */
 479                         nlm_host_unregister(g, hostp);
 480                         mutex_exit(&g->lock);
 481 
 482                         nlm_host_unmonitor(g, hostp);
 483                         nlm_host_destroy(hostp);
 484                         mutex_enter(&g->lock);
 485                         if (g->run_status == NLM_ST_STOPPING)
 486                                 break;
 487 
 488                 }
 489 
 490                 DTRACE_PROBE(gc__end);
 491         }
 492 
 493         DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
 494 
 495         /* Let others know that GC has died */
 496         g->nlm_gc_thread = NULL;
 497         mutex_exit(&g->lock);
 498 
 499         cv_broadcast(&g->nlm_gc_finish_cv);
 500         zthread_exit();
 501 }
 502 
 503 /*
 504  * Thread reclaim locks/shares acquired by the client side
 505  * on the given server represented by hostp.
 506  */
 507 static void
 508 nlm_reclaimer(struct nlm_host *hostp)
 509 {
 510         struct nlm_globals *g;
 511 
 512         mutex_enter(&hostp->nh_lock);
 513         hostp->nh_reclaimer = curthread;
 514         mutex_exit(&hostp->nh_lock);
 515 
 516         g = zone_getspecific(nlm_zone_key, curzone);
 517         nlm_reclaim_client(g, hostp);
 518 
 519         mutex_enter(&hostp->nh_lock);
 520         hostp->nh_flags &= ~NLM_NH_RECLAIM;
 521         hostp->nh_reclaimer = NULL;
 522         cv_broadcast(&hostp->nh_recl_cv);
 523         mutex_exit(&hostp->nh_lock);
 524 
 525         /*
 526          * Host was explicitly referenced before
 527          * nlm_reclaim() was called, release it
 528          * here.
 529          */
 530         nlm_host_release(g, hostp);
 531         zthread_exit();
 532 }
 533 
 534 /*
 535  * Copy a struct netobj.  (see xdr.h)
 536  */
 537 void
 538 nlm_copy_netobj(struct netobj *dst, struct netobj *src)
 539 {
 540         dst->n_len = src->n_len;
 541         dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
 542         bcopy(src->n_bytes, dst->n_bytes, src->n_len);
 543 }
 544 
 545 /*
 546  * An NLM specificw replacement for clnt_call().
 547  * nlm_clnt_call() is used by all RPC functions generated
 548  * from nlm_prot.x specification. The function is aware
 549  * about some pitfalls of NLM RPC procedures and has a logic
 550  * that handles them properly.
 551  */
 552 enum clnt_stat
 553 nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
 554     caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
 555 {
 556         k_sigset_t oldmask;
 557         enum clnt_stat stat;
 558         bool_t sig_blocked = FALSE;
 559 
 560         /*
 561          * If NLM RPC procnum is one of the NLM _RES procedures
 562          * that are used to reply to asynchronous NLM RPC
 563          * (MSG calls), explicitly set RPC timeout to zero.
 564          * Client doesn't send a reply to RES procedures, so
 565          * we don't need to wait anything.
 566          *
 567          * NOTE: we ignore NLM4_*_RES procnums because they are
 568          * equal to NLM_*_RES numbers.
 569          */
 570         if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
 571                 wait = nlm_rpctv_zero;
 572 
 573         /*
 574          * We need to block signals in case of NLM_CANCEL RPC
 575          * in order to prevent interruption of network RPC
 576          * calls.
 577          */
 578         if (procnum == NLM_CANCEL) {
 579                 k_sigset_t newmask;
 580 
 581                 sigfillset(&newmask);
 582                 sigreplace(&newmask, &oldmask);
 583                 sig_blocked = TRUE;
 584         }
 585 
 586         stat = clnt_call(clnt, procnum, xdr_args,
 587             argsp, xdr_result, resultp, wait);
 588 
 589         /*
 590          * Restore signal mask back if signals were blocked
 591          */
 592         if (sig_blocked)
 593                 sigreplace(&oldmask, (k_sigset_t *)NULL);
 594 
 595         return (stat);
 596 }
 597 
 598 /*
 599  * Suspend NLM client/server in the given zone.
 600  *
 601  * During suspend operation we mark those hosts
 602  * that have any locks with NLM_NH_SUSPEND flags,
 603  * so that they can be checked later, when resume
 604  * operation occurs.
 605  */
 606 static void
 607 nlm_suspend_zone(struct nlm_globals *g)
 608 {
 609         struct nlm_host *hostp;
 610         struct nlm_host_list all_hosts;
 611 
 612         /*
 613          * Note that while we're doing suspend, GC thread is active
 614          * and it can destroy some hosts while we're walking through
 615          * the hosts tree. To prevent that and make suspend logic
 616          * a bit more simple we put all hosts to local "all_hosts"
 617          * list and increment reference counter of each host.
 618          * This guaranties that no hosts will be released while
 619          * we're doing suspend.
 620          * NOTE: reference of each host must be dropped during
 621          * resume operation.
 622          */
 623         TAILQ_INIT(&all_hosts);
 624         mutex_enter(&g->lock);
 625         for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
 626             hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
 627                 /*
 628                  * If host is idle, remove it from idle list and
 629                  * clear idle flag. That is done to prevent GC
 630                  * from touching this host.
 631                  */
 632                 if (hostp->nh_flags & NLM_NH_INIDLE) {
 633                         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
 634                         hostp->nh_flags &= ~NLM_NH_INIDLE;
 635                 }
 636 
 637                 hostp->nh_refs++;
 638                 TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
 639         }
 640 
 641         /*
 642          * Now we can walk through all hosts on the system
 643          * with zone globals lock released. The fact the
 644          * we have taken a reference to each host guaranties
 645          * that no hosts can be destroyed during that process.
 646          */
 647         mutex_exit(&g->lock);
 648         while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
 649                 mutex_enter(&hostp->nh_lock);
 650                 if (nlm_host_has_locks(hostp))
 651                         hostp->nh_flags |= NLM_NH_SUSPEND;
 652 
 653                 mutex_exit(&hostp->nh_lock);
 654                 TAILQ_REMOVE(&all_hosts, hostp, nh_link);
 655         }
 656 }
 657 
 658 /*
 659  * Resume NLM hosts for the given zone.
 660  *
 661  * nlm_resume_zone() is called after hosts were suspended
 662  * (see nlm_suspend_zone) and its main purpose to check
 663  * whether remote locks owned by hosts are still in consistent
 664  * state. If they aren't, resume function tries to reclaim
 665  * locks (for client side hosts) and clean locks (for
 666  * server side hosts).
 667  */
 668 static void
 669 nlm_resume_zone(struct nlm_globals *g)
 670 {
 671         struct nlm_host *hostp, *h_next;
 672 
 673         mutex_enter(&g->lock);
 674         hostp = avl_first(&g->nlm_hosts_tree);
 675 
 676         /*
 677          * In nlm_suspend_zone() the reference counter of each
 678          * host was incremented, so we can safely iterate through
 679          * all hosts without worrying that any host we touch will
 680          * be removed at the moment.
 681          */
 682         while (hostp != NULL) {
 683                 struct nlm_nsm nsm;
 684                 enum clnt_stat stat;
 685                 int32_t sm_state;
 686                 int error;
 687                 bool_t resume_failed = FALSE;
 688 
 689                 h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
 690                 mutex_exit(&g->lock);
 691 
 692                 DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
 693 
 694                 /*
 695                  * Suspend operation marked that the host doesn't
 696                  * have any locks. Skip it.
 697                  */
 698                 if (!(hostp->nh_flags & NLM_NH_SUSPEND))
 699                         goto cycle_end;
 700 
 701                 error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
 702                 if (error != 0) {
 703                         NLM_ERR("Resume: Failed to contact to NSM of host %s "
 704                             "[error=%d]\n", hostp->nh_name, error);
 705                         resume_failed = TRUE;
 706                         goto cycle_end;
 707                 }
 708 
 709                 stat = nlm_nsm_stat(&nsm, &sm_state);
 710                 if (stat != RPC_SUCCESS) {
 711                         NLM_ERR("Resume: Failed to call SM_STAT operation for "
 712                             "host %s [stat=%d]\n", hostp->nh_name, stat);
 713                         resume_failed = TRUE;
 714                         nlm_nsm_fini(&nsm);
 715                         goto cycle_end;
 716                 }
 717 
 718                 if (sm_state != hostp->nh_state) {
 719                         /*
 720                          * Current SM state of the host isn't equal
 721                          * to the one host had when it was suspended.
 722                          * Probably it was rebooted. Try to reclaim
 723                          * locks if the host has any on its client side.
 724                          * Also try to clean up its server side locks
 725                          * (if the host has any).
 726                          */
 727                         nlm_host_notify_client(hostp, sm_state);
 728                         nlm_host_notify_server(hostp, sm_state);
 729                 }
 730 
 731                 nlm_nsm_fini(&nsm);
 732 
 733 cycle_end:
 734                 if (resume_failed) {
 735                         /*
 736                          * Resume failed for the given host.
 737                          * Just clean up all resources it owns.
 738                          */
 739                         nlm_host_notify_server(hostp, 0);
 740                         nlm_client_cancel_all(g, hostp);
 741                 }
 742 
 743                 hostp->nh_flags &= ~NLM_NH_SUSPEND;
 744                 nlm_host_release(g, hostp);
 745                 hostp = h_next;
 746                 mutex_enter(&g->lock);
 747         }
 748 
 749         mutex_exit(&g->lock);
 750 }
 751 
 752 /*
 753  * NLM functions responsible for operations on NSM handle.
 754  */
 755 
 756 /*
 757  * Initialize knetconfig that is used for communication
 758  * with local statd via loopback interface.
 759  */
 760 static int
 761 nlm_init_local_knc(struct knetconfig *knc)
 762 {
 763         int error;
 764         vnode_t *vp;
 765 
 766         bzero(knc, sizeof (*knc));
 767         error = lookupname("/dev/tcp", UIO_SYSSPACE,
 768             FOLLOW, NULLVPP, &vp);
 769         if (error != 0)
 770                 return (error);
 771 
 772         knc->knc_semantics = NC_TPI_COTS;
 773         knc->knc_protofmly = NC_INET;
 774         knc->knc_proto = NC_TCP;
 775         knc->knc_rdev = vp->v_rdev;
 776         VN_RELE(vp);
 777 
 778 
 779         return (0);
 780 }
 781 
 782 /*
 783  * Initialize NSM handle that will be used to talk
 784  * to local statd via loopback interface.
 785  */
 786 static int
 787 nlm_nsm_init_local(struct nlm_nsm *nsm)
 788 {
 789         int error;
 790         struct knetconfig knc;
 791         struct sockaddr_in sin;
 792         struct netbuf nb;
 793 
 794         error = nlm_init_local_knc(&knc);
 795         if (error != 0)
 796                 return (error);
 797 
 798         bzero(&sin, sizeof (sin));
 799         sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 800         sin.sin_family = AF_INET;
 801 
 802         nb.buf = (char *)&sin;
 803         nb.len = nb.maxlen = sizeof (sin);
 804 
 805         return (nlm_nsm_init(nsm, &knc, &nb));
 806 }
 807 
 808 /*
 809  * Initialize NSM handle used for talking to statd
 810  */
 811 static int
 812 nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
 813 {
 814         enum clnt_stat stat;
 815         int error, retries;
 816 
 817         bzero(nsm, sizeof (*nsm));
 818         nsm->ns_knc = *knc;
 819         nlm_copy_netbuf(&nsm->ns_addr, nb);
 820 
 821         /*
 822          * Try several times to get the port of statd service,
 823          * If rpcbind_getaddr returns  RPC_PROGNOTREGISTERED,
 824          * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
 825          * seconds berofore.
 826          */
 827         for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
 828                 stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
 829                     SM_VERS, &nsm->ns_addr);
 830                 if (stat != RPC_SUCCESS) {
 831                         if (stat == RPC_PROGNOTREGISTERED) {
 832                                 delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
 833                                 continue;
 834                         }
 835                 }
 836 
 837                 break;
 838         }
 839 
 840         if (stat != RPC_SUCCESS) {
 841                 DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
 842                     int, retries);
 843                 error = ENOENT;
 844                 goto error;
 845         }
 846 
 847         /*
 848          * Create an RPC handle that'll be used for communication with local
 849          * statd using the status monitor protocol.
 850          */
 851         error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
 852             0, NLM_RPC_RETRIES, zone_kcred(), &nsm->ns_handle);
 853         if (error != 0)
 854                 goto error;
 855 
 856         /*
 857          * Create an RPC handle that'll be used for communication with the
 858          * local statd using the address registration protocol.
 859          */
 860         error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
 861             NSM_ADDR_V1, 0, NLM_RPC_RETRIES, zone_kcred(),
 862             &nsm->ns_addr_handle);
 863         if (error != 0)
 864                 goto error;
 865 
 866         mutex_init(&nsm->ns_lock, NULL, MUTEX_DEFAULT, NULL);
 867         return (0);
 868 
 869 error:
 870         kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
 871         if (nsm->ns_handle) {
 872                 ASSERT(nsm->ns_handle->cl_auth != NULL);
 873                 auth_destroy(nsm->ns_handle->cl_auth);
 874                 CLNT_DESTROY(nsm->ns_handle);
 875         }
 876 
 877         return (error);
 878 }
 879 
 880 static void
 881 nlm_nsm_fini(struct nlm_nsm *nsm)
 882 {
 883         kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
 884         if (nsm->ns_addr_handle->cl_auth != NULL)
 885                 auth_destroy(nsm->ns_addr_handle->cl_auth);
 886         CLNT_DESTROY(nsm->ns_addr_handle);
 887         nsm->ns_addr_handle = NULL;
 888         if (nsm->ns_handle->cl_auth != NULL)
 889                 auth_destroy(nsm->ns_handle->cl_auth);
 890         CLNT_DESTROY(nsm->ns_handle);
 891         nsm->ns_handle = NULL;
 892         mutex_destroy(&nsm->ns_lock);
 893 }
 894 
 895 static enum clnt_stat
 896 nlm_nsm_simu_crash(struct nlm_nsm *nsm)
 897 {
 898         enum clnt_stat stat;
 899 
 900         mutex_enter(&nsm->ns_lock);
 901         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 902         stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
 903         mutex_exit(&nsm->ns_lock);
 904 
 905         return (stat);
 906 }
 907 
 908 static enum clnt_stat
 909 nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
 910 {
 911         struct sm_name args;
 912         struct sm_stat_res res;
 913         enum clnt_stat stat;
 914 
 915         args.mon_name = uts_nodename();
 916         bzero(&res, sizeof (res));
 917 
 918         mutex_enter(&nsm->ns_lock);
 919         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 920         stat = sm_stat_1(&args, &res, nsm->ns_handle);
 921         mutex_exit(&nsm->ns_lock);
 922 
 923         if (stat == RPC_SUCCESS)
 924                 *out_stat = res.state;
 925 
 926         return (stat);
 927 }
 928 
 929 static enum clnt_stat
 930 nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
 931 {
 932         struct mon args;
 933         struct sm_stat_res res;
 934         enum clnt_stat stat;
 935 
 936         bzero(&args, sizeof (args));
 937         bzero(&res, sizeof (res));
 938 
 939         args.mon_id.mon_name = hostname;
 940         args.mon_id.my_id.my_name = uts_nodename();
 941         args.mon_id.my_id.my_prog = NLM_PROG;
 942         args.mon_id.my_id.my_vers = NLM_SM;
 943         args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
 944         bcopy(&priv, args.priv, sizeof (priv));
 945 
 946         mutex_enter(&nsm->ns_lock);
 947         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 948         stat = sm_mon_1(&args, &res, nsm->ns_handle);
 949         mutex_exit(&nsm->ns_lock);
 950 
 951         return (stat);
 952 }
 953 
 954 static enum clnt_stat
 955 nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
 956 {
 957         struct mon_id args;
 958         struct sm_stat res;
 959         enum clnt_stat stat;
 960 
 961         bzero(&args, sizeof (args));
 962         bzero(&res, sizeof (res));
 963 
 964         args.mon_name = hostname;
 965         args.my_id.my_name = uts_nodename();
 966         args.my_id.my_prog = NLM_PROG;
 967         args.my_id.my_vers = NLM_SM;
 968         args.my_id.my_proc = NLM_SM_NOTIFY1;
 969 
 970         mutex_enter(&nsm->ns_lock);
 971         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 972         stat = sm_unmon_1(&args, &res, nsm->ns_handle);
 973         mutex_exit(&nsm->ns_lock);
 974 
 975         return (stat);
 976 }
 977 
 978 static enum clnt_stat
 979 nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
 980 {
 981         struct reg1args args = { 0 };
 982         struct reg1res res = { 0 };
 983         enum clnt_stat stat;
 984 
 985         args.family = family;
 986         args.name = name;
 987         args.address = *address;
 988 
 989         mutex_enter(&nsm->ns_lock);
 990         nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
 991         stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
 992         mutex_exit(&nsm->ns_lock);
 993 
 994         return (stat);
 995 }
 996 
 997 /*
 998  * Get NLM vhold object corresponding to vnode "vp".
 999  * If no such object was found, create a new one.
1000  *
1001  * The purpose of this function is to associate vhold
1002  * object with given vnode, so that:
1003  * 1) vnode is hold (VN_HOLD) while vhold object is alive.
1004  * 2) host has a track of all vnodes it touched by lock
1005  *    or share operations. These vnodes are accessible
1006  *    via collection of vhold objects.
1007  */
1008 struct nlm_vhold *
1009 nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
1010 {
1011         struct nlm_vhold *nvp, *new_nvp = NULL;
1012 
1013         mutex_enter(&hostp->nh_lock);
1014         nvp = nlm_vhold_find_locked(hostp, vp);
1015         if (nvp != NULL)
1016                 goto out;
1017 
1018         /* nlm_vhold wasn't found, then create a new one */
1019         mutex_exit(&hostp->nh_lock);
1020         new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
1021 
1022         /*
1023          * Check if another thread has already
1024          * created the same nlm_vhold.
1025          */
1026         mutex_enter(&hostp->nh_lock);
1027         nvp = nlm_vhold_find_locked(hostp, vp);
1028         if (nvp == NULL) {
1029                 nvp = new_nvp;
1030                 new_nvp = NULL;
1031 
1032                 TAILQ_INIT(&nvp->nv_slreqs);
1033                 nvp->nv_vp = vp;
1034                 nvp->nv_refcnt = 1;
1035                 VN_HOLD(nvp->nv_vp);
1036 
1037                 VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
1038                     (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
1039                 TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
1040         }
1041 
1042 out:
1043         mutex_exit(&hostp->nh_lock);
1044         if (new_nvp != NULL)
1045                 kmem_cache_free(nlm_vhold_cache, new_nvp);
1046 
1047         return (nvp);
1048 }
1049 
1050 /*
1051  * Drop a reference to vhold object nvp.
1052  */
1053 void
1054 nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1055 {
1056         if (nvp == NULL)
1057                 return;
1058 
1059         mutex_enter(&hostp->nh_lock);
1060         ASSERT(nvp->nv_refcnt > 0);
1061         nvp->nv_refcnt--;
1062 
1063         /*
1064          * If these conditions are met, the vhold is obviously unused and we
1065          * will destroy it.  In a case either v_filocks and/or v_shrlocks is
1066          * non-NULL the vhold might still be unused by the host, but it is
1067          * expensive to check that.  We defer such check until the host is
1068          * idle.  The expensive check is done in the NLM garbage collector.
1069          */
1070         if (nvp->nv_refcnt == 0 &&
1071             nvp->nv_vp->v_filocks == NULL &&
1072             nvp->nv_vp->v_shrlocks == NULL) {
1073                 nlm_vhold_destroy(hostp, nvp);
1074         }
1075 
1076         mutex_exit(&hostp->nh_lock);
1077 }
1078 
1079 /*
1080  * Clean all locks and share reservations on the
1081  * given vhold object that were acquired by the
1082  * given sysid
1083  */
1084 static void
1085 nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1086 {
1087         cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1088         cleanshares_by_sysid(nvp->nv_vp, sysid);
1089 }
1090 
1091 static void
1092 nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1093 {
1094         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1095 
1096         ASSERT(nvp->nv_refcnt == 0);
1097         ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1098 
1099         VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1100             (mod_hash_key_t)nvp->nv_vp,
1101             (mod_hash_val_t)&nvp) == 0);
1102 
1103         TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1104         VN_RELE(nvp->nv_vp);
1105         nvp->nv_vp = NULL;
1106 
1107         kmem_cache_free(nlm_vhold_cache, nvp);
1108 }
1109 
1110 /*
1111  * Return TRUE if the given vhold is busy.
1112  * Vhold object is considered to be "busy" when
1113  * all the following conditions hold:
1114  * 1) No one uses it at the moment;
1115  * 2) It hasn't any locks;
1116  * 3) It hasn't any share reservations;
1117  */
1118 static bool_t
1119 nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1120 {
1121         vnode_t *vp;
1122         int sysid;
1123 
1124         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1125 
1126         if (nvp->nv_refcnt > 0)
1127                 return (TRUE);
1128 
1129         vp = nvp->nv_vp;
1130         sysid = hostp->nh_sysid;
1131         if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1132             shr_has_remote_shares(vp, sysid))
1133                 return (TRUE);
1134 
1135         return (FALSE);
1136 }
1137 
1138 /* ARGSUSED */
1139 static int
1140 nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1141 {
1142         struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1143 
1144         bzero(nvp, sizeof (*nvp));
1145         return (0);
1146 }
1147 
1148 /* ARGSUSED */
1149 static void
1150 nlm_vhold_dtor(void *datap, void *cdrarg)
1151 {
1152         struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1153 
1154         ASSERT(nvp->nv_refcnt == 0);
1155         ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1156         ASSERT(nvp->nv_vp == NULL);
1157 }
1158 
1159 struct nlm_vhold *
1160 nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1161 {
1162         struct nlm_vhold *nvp = NULL;
1163 
1164         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1165         (void) mod_hash_find(hostp->nh_vholds_by_vp,
1166             (mod_hash_key_t)vp,
1167             (mod_hash_val_t)&nvp);
1168 
1169         if (nvp != NULL)
1170                 nvp->nv_refcnt++;
1171 
1172         return (nvp);
1173 }
1174 
1175 /*
1176  * NLM host functions
1177  */
1178 static void
1179 nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1180 {
1181         ASSERT(src->len <= src->maxlen);
1182 
1183         dst->maxlen = src->maxlen;
1184         dst->len = src->len;
1185         dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1186         bcopy(src->buf, dst->buf, src->len);
1187 }
1188 
1189 /* ARGSUSED */
1190 static int
1191 nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1192 {
1193         struct nlm_host *hostp = (struct nlm_host *)datap;
1194 
1195         bzero(hostp, sizeof (*hostp));
1196         return (0);
1197 }
1198 
1199 /* ARGSUSED */
1200 static void
1201 nlm_host_dtor(void *datap, void *cdrarg)
1202 {
1203         struct nlm_host *hostp = (struct nlm_host *)datap;
1204         ASSERT(hostp->nh_refs == 0);
1205 }
1206 
1207 static void
1208 nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1209 {
1210         ASSERT(hostp->nh_refs == 0);
1211         ASSERT(hostp->nh_flags & NLM_NH_INIDLE);
1212 
1213         avl_remove(&g->nlm_hosts_tree, hostp);
1214         VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1215             (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1216             (mod_hash_val_t)&hostp) == 0);
1217         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1218         hostp->nh_flags &= ~NLM_NH_INIDLE;
1219 }
1220 
1221 /*
1222  * Free resources used by a host. This is called after the reference
1223  * count has reached zero so it doesn't need to worry about locks.
1224  */
1225 static void
1226 nlm_host_destroy(struct nlm_host *hostp)
1227 {
1228         ASSERT(hostp->nh_name != NULL);
1229         ASSERT(hostp->nh_netid != NULL);
1230         ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1231 
1232         strfree(hostp->nh_name);
1233         strfree(hostp->nh_netid);
1234         kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1235 
1236         if (hostp->nh_sysid != LM_NOSYSID)
1237                 nlm_sysid_free(hostp->nh_sysid);
1238 
1239         nlm_rpc_cache_destroy(hostp);
1240 
1241         ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1242         mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1243 
1244         mutex_destroy(&hostp->nh_lock);
1245         cv_destroy(&hostp->nh_rpcb_cv);
1246         cv_destroy(&hostp->nh_recl_cv);
1247 
1248         kmem_cache_free(nlm_hosts_cache, hostp);
1249 }
1250 
1251 /*
1252  * Cleanup SERVER-side state after a client restarts,
1253  * or becomes unresponsive, or whatever.
1254  *
1255  * We unlock any active locks owned by the host.
1256  * When rpc.lockd is shutting down,
1257  * this function is called with newstate set to zero
1258  * which allows us to cancel any pending async locks
1259  * and clear the locking state.
1260  *
1261  * When "state" is 0, we don't update host's state,
1262  * but cleanup all remote locks on the host.
1263  * It's useful to call this function for resources
1264  * cleanup.
1265  */
1266 void
1267 nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1268 {
1269         struct nlm_vhold *nvp;
1270         struct nlm_slreq *slr;
1271         struct nlm_slreq_list slreqs2free;
1272 
1273         TAILQ_INIT(&slreqs2free);
1274         mutex_enter(&hostp->nh_lock);
1275         if (state != 0)
1276                 hostp->nh_state = state;
1277 
1278         TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1279 
1280                 /* cleanup sleeping requests at first */
1281                 while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1282                         TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1283 
1284                         /*
1285                          * Instead of freeing cancelled sleeping request
1286                          * here, we add it to the linked list created
1287                          * on the stack in order to do all frees outside
1288                          * the critical section.
1289                          */
1290                         TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1291                 }
1292 
1293                 nvp->nv_refcnt++;
1294                 mutex_exit(&hostp->nh_lock);
1295 
1296                 nlm_vhold_clean(nvp, hostp->nh_sysid);
1297 
1298                 mutex_enter(&hostp->nh_lock);
1299                 nvp->nv_refcnt--;
1300         }
1301 
1302         mutex_exit(&hostp->nh_lock);
1303         while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1304                 TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1305                 kmem_free(slr, sizeof (*slr));
1306         }
1307 }
1308 
1309 /*
1310  * Cleanup CLIENT-side state after a server restarts,
1311  * or becomes unresponsive, or whatever.
1312  *
1313  * This is called by the local NFS statd when we receive a
1314  * host state change notification.  (also nlm_svc_stopping)
1315  *
1316  * Deal with a server restart.  If we are stopping the
1317  * NLM service, we'll have newstate == 0, and will just
1318  * cancel all our client-side lock requests.  Otherwise,
1319  * start the "recovery" process to reclaim any locks
1320  * we hold on this server.
1321  */
1322 void
1323 nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1324 {
1325         mutex_enter(&hostp->nh_lock);
1326         hostp->nh_state = state;
1327         if (hostp->nh_flags & NLM_NH_RECLAIM) {
1328                 /*
1329                  * Either host's state is up to date or
1330                  * host is already in recovery.
1331                  */
1332                 mutex_exit(&hostp->nh_lock);
1333                 return;
1334         }
1335 
1336         hostp->nh_flags |= NLM_NH_RECLAIM;
1337 
1338         /*
1339          * Host will be released by the recovery thread,
1340          * thus we need to increment refcount.
1341          */
1342         hostp->nh_refs++;
1343         mutex_exit(&hostp->nh_lock);
1344 
1345         (void) zthread_create(NULL, 0, nlm_reclaimer,
1346             hostp, 0, minclsyspri);
1347 }
1348 
1349 /*
1350  * The function is called when NLM client detects that
1351  * server has entered in grace period and client needs
1352  * to wait until reclamation process (if any) does
1353  * its job.
1354  */
1355 int
1356 nlm_host_wait_grace(struct nlm_host *hostp)
1357 {
1358         struct nlm_globals *g;
1359         int error = 0;
1360 
1361         g = zone_getspecific(nlm_zone_key, curzone);
1362         mutex_enter(&hostp->nh_lock);
1363 
1364         do {
1365                 int rc;
1366 
1367                 rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1368                     &hostp->nh_lock, ddi_get_lbolt() +
1369                     SEC_TO_TICK(g->retrans_tmo));
1370 
1371                 if (rc == 0) {
1372                         error = EINTR;
1373                         break;
1374                 }
1375         } while (hostp->nh_flags & NLM_NH_RECLAIM);
1376 
1377         mutex_exit(&hostp->nh_lock);
1378         return (error);
1379 }
1380 
1381 /*
1382  * Create a new NLM host.
1383  *
1384  * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1385  * which needs both a knetconfig and an address when creating
1386  * endpoints. Thus host object stores both knetconfig and
1387  * netid.
1388  */
1389 static struct nlm_host *
1390 nlm_host_create(char *name, const char *netid,
1391     struct knetconfig *knc, struct netbuf *naddr)
1392 {
1393         struct nlm_host *host;
1394 
1395         host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1396 
1397         mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1398         cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1399         cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1400 
1401         host->nh_sysid = LM_NOSYSID;
1402         host->nh_refs = 1;
1403         host->nh_name = strdup(name);
1404         host->nh_netid = strdup(netid);
1405         host->nh_knc = *knc;
1406         nlm_copy_netbuf(&host->nh_addr, naddr);
1407 
1408         host->nh_state = 0;
1409         host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1410         host->nh_flags = 0;
1411 
1412         host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1413             32, mod_hash_null_valdtor, sizeof (vnode_t));
1414 
1415         TAILQ_INIT(&host->nh_vholds_list);
1416         TAILQ_INIT(&host->nh_rpchc);
1417 
1418         return (host);
1419 }
1420 
1421 /*
1422  * Cancel all client side sleeping locks owned by given host.
1423  */
1424 void
1425 nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1426 {
1427         struct nlm_slock *nslp;
1428 
1429         mutex_enter(&g->lock);
1430         TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1431                 if (nslp->nsl_host == hostp) {
1432                         nslp->nsl_state = NLM_SL_CANCELLED;
1433                         cv_broadcast(&nslp->nsl_cond);
1434                 }
1435         }
1436 
1437         mutex_exit(&g->lock);
1438 }
1439 
1440 /*
1441  * Garbage collect stale vhold objects.
1442  *
1443  * In other words check whether vnodes that are
1444  * held by vhold objects still have any locks
1445  * or shares or still in use. If they aren't,
1446  * just destroy them.
1447  */
1448 static void
1449 nlm_host_gc_vholds(struct nlm_host *hostp)
1450 {
1451         struct nlm_vhold *nvp;
1452 
1453         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1454 
1455         nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1456         while (nvp != NULL) {
1457                 struct nlm_vhold *nvp_tmp;
1458 
1459                 if (nlm_vhold_busy(hostp, nvp)) {
1460                         nvp = TAILQ_NEXT(nvp, nv_link);
1461                         continue;
1462                 }
1463 
1464                 nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1465                 nlm_vhold_destroy(hostp, nvp);
1466                 nvp = nvp_tmp;
1467         }
1468 }
1469 
1470 /*
1471  * Check whether the given host has any
1472  * server side locks or share reservations.
1473  */
1474 static bool_t
1475 nlm_host_has_srv_locks(struct nlm_host *hostp)
1476 {
1477         /*
1478          * It's cheap and simple: if server has
1479          * any locks/shares there must be vhold
1480          * object storing the affected vnode.
1481          *
1482          * NOTE: We don't need to check sleeping
1483          * locks on the server side, because if
1484          * server side sleeping lock is alive,
1485          * there must be a vhold object corresponding
1486          * to target vnode.
1487          */
1488         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1489         if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1490                 return (TRUE);
1491 
1492         return (FALSE);
1493 }
1494 
1495 /*
1496  * Check whether the given host has any client side
1497  * locks or share reservations.
1498  */
1499 static bool_t
1500 nlm_host_has_cli_locks(struct nlm_host *hostp)
1501 {
1502         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1503 
1504         /*
1505          * XXX: It's not the way I'd like to do the check,
1506          * because flk_sysid_has_locks() can be very
1507          * expensive by design. Unfortunatelly it iterates
1508          * through all locks on the system, doesn't matter
1509          * were they made on remote system via NLM or
1510          * on local system via reclock. To understand the
1511          * problem, consider that there're dozens of thousands
1512          * of locks that are made on some ZFS dataset. And there's
1513          * another dataset shared by NFS where NLM client had locks
1514          * some time ago, but doesn't have them now.
1515          * In this case flk_sysid_has_locks() will iterate
1516          * thrught dozens of thousands locks until it returns us
1517          * FALSE.
1518          * Oh, I hope that in shiny future somebody will make
1519          * local lock manager (os/flock.c) better, so that
1520          * it'd be more friedly to remote locks and
1521          * flk_sysid_has_locks() wouldn't be so expensive.
1522          */
1523         if (flk_sysid_has_locks(hostp->nh_sysid |
1524             LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1525                 return (TRUE);
1526 
1527         /*
1528          * Check whether host has any share reservations
1529          * registered on the client side.
1530          */
1531         if (hostp->nh_shrlist != NULL)
1532                 return (TRUE);
1533 
1534         return (FALSE);
1535 }
1536 
1537 /*
1538  * Determine whether the given host owns any
1539  * locks or share reservations.
1540  */
1541 static bool_t
1542 nlm_host_has_locks(struct nlm_host *hostp)
1543 {
1544         if (nlm_host_has_srv_locks(hostp))
1545                 return (TRUE);
1546 
1547         return (nlm_host_has_cli_locks(hostp));
1548 }
1549 
1550 /*
1551  * This function compares only addresses of two netbufs
1552  * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1553  * Port part of netbuf is ignored.
1554  *
1555  * Return values:
1556  *  -1: nb1's address is "smaller" than nb2's
1557  *   0: addresses are equal
1558  *   1: nb1's address is "greater" than nb2's
1559  */
1560 static int
1561 nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1562 {
1563         union nlm_addr {
1564                 struct sockaddr sa;
1565                 struct sockaddr_in sin;
1566                 struct sockaddr_in6 sin6;
1567         } *na1, *na2;
1568         int res;
1569 
1570         /* LINTED E_BAD_PTR_CAST_ALIGN */
1571         na1 = (union nlm_addr *)nb1->buf;
1572         /* LINTED E_BAD_PTR_CAST_ALIGN */
1573         na2 = (union nlm_addr *)nb2->buf;
1574 
1575         if (na1->sa.sa_family < na2->sa.sa_family)
1576                 return (-1);
1577         if (na1->sa.sa_family > na2->sa.sa_family)
1578                 return (1);
1579 
1580         switch (na1->sa.sa_family) {
1581         case AF_INET:
1582                 res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1583                     sizeof (na1->sin.sin_addr));
1584                 break;
1585         case AF_INET6:
1586                 res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1587                     sizeof (na1->sin6.sin6_addr));
1588                 break;
1589         default:
1590                 VERIFY(0);
1591                 return (0);
1592         }
1593 
1594         return (SIGN(res));
1595 }
1596 
1597 /*
1598  * Compare two nlm hosts.
1599  * Return values:
1600  * -1: host1 is "smaller" than host2
1601  *  0: host1 is equal to host2
1602  *  1: host1 is "greater" than host2
1603  */
1604 int
1605 nlm_host_cmp(const void *p1, const void *p2)
1606 {
1607         struct nlm_host *h1 = (struct nlm_host *)p1;
1608         struct nlm_host *h2 = (struct nlm_host *)p2;
1609         int res;
1610 
1611         res = strcmp(h1->nh_netid, h2->nh_netid);
1612         if (res != 0)
1613                 return (SIGN(res));
1614 
1615         res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1616         return (res);
1617 }
1618 
1619 /*
1620  * Find the host specified by...  (see below)
1621  * If found, increment the ref count.
1622  */
1623 static struct nlm_host *
1624 nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1625     struct netbuf *naddr, avl_index_t *wherep)
1626 {
1627         struct nlm_host *hostp, key;
1628         avl_index_t pos;
1629 
1630         ASSERT(MUTEX_HELD(&g->lock));
1631 
1632         key.nh_netid = (char *)netid;
1633         key.nh_addr.buf = naddr->buf;
1634         key.nh_addr.len = naddr->len;
1635         key.nh_addr.maxlen = naddr->maxlen;
1636 
1637         hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1638 
1639         if (hostp != NULL) {
1640                 /*
1641                  * Host is inuse now. Remove it from idle
1642                  * hosts list if needed.
1643                  */
1644                 if (hostp->nh_flags & NLM_NH_INIDLE) {
1645                         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1646                         hostp->nh_flags &= ~NLM_NH_INIDLE;
1647                 }
1648 
1649                 hostp->nh_refs++;
1650         }
1651         if (wherep != NULL)
1652                 *wherep = pos;
1653 
1654         return (hostp);
1655 }
1656 
1657 /*
1658  * Find NLM host for the given name and address.
1659  */
1660 struct nlm_host *
1661 nlm_host_find(struct nlm_globals *g, const char *netid,
1662     struct netbuf *addr)
1663 {
1664         struct nlm_host *hostp = NULL;
1665 
1666         mutex_enter(&g->lock);
1667         if (g->run_status != NLM_ST_UP)
1668                 goto out;
1669 
1670         hostp = nlm_host_find_locked(g, netid, addr, NULL);
1671 
1672 out:
1673         mutex_exit(&g->lock);
1674         return (hostp);
1675 }
1676 
1677 
1678 /*
1679  * Find or create an NLM host for the given name and address.
1680  *
1681  * The remote host is determined by all of: name, netid, address.
1682  * Note that the netid is whatever nlm_svc_add_ep() gave to
1683  * svc_tli_kcreate() for the service binding.  If any of these
1684  * are different, allocate a new host (new sysid).
1685  */
1686 struct nlm_host *
1687 nlm_host_findcreate(struct nlm_globals *g, char *name,
1688     const char *netid, struct netbuf *addr)
1689 {
1690         int err;
1691         struct nlm_host *host, *newhost = NULL;
1692         struct knetconfig knc;
1693         avl_index_t where;
1694 
1695         mutex_enter(&g->lock);
1696         if (g->run_status != NLM_ST_UP) {
1697                 mutex_exit(&g->lock);
1698                 return (NULL);
1699         }
1700 
1701         host = nlm_host_find_locked(g, netid, addr, NULL);
1702         mutex_exit(&g->lock);
1703         if (host != NULL)
1704                 return (host);
1705 
1706         err = nlm_knc_from_netid(netid, &knc);
1707         if (err != 0)
1708                 return (NULL);
1709         /*
1710          * Do allocations (etc.) outside of mutex,
1711          * and then check again before inserting.
1712          */
1713         newhost = nlm_host_create(name, netid, &knc, addr);
1714         newhost->nh_sysid = nlm_sysid_alloc();
1715         if (newhost->nh_sysid == LM_NOSYSID)
1716                 goto out;
1717 
1718         mutex_enter(&g->lock);
1719         host = nlm_host_find_locked(g, netid, addr, &where);
1720         if (host == NULL) {
1721                 host = newhost;
1722                 newhost = NULL;
1723 
1724                 /*
1725                  * Insert host to the hosts AVL tree that is
1726                  * used to lookup by <netid, address> pair.
1727                  */
1728                 avl_insert(&g->nlm_hosts_tree, host, where);
1729 
1730                 /*
1731                  * Insert host to the hosts hash table that is
1732                  * used to lookup host by sysid.
1733                  */
1734                 VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1735                     (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1736                     (mod_hash_val_t)host) == 0);
1737         }
1738 
1739         mutex_exit(&g->lock);
1740 
1741 out:
1742         if (newhost != NULL) {
1743                 /*
1744                  * We do not need the preallocated nlm_host
1745                  * so decrement the reference counter
1746                  * and destroy it.
1747                  */
1748                 newhost->nh_refs--;
1749                 nlm_host_destroy(newhost);
1750         }
1751 
1752         return (host);
1753 }
1754 
1755 /*
1756  * Find the NLM host that matches the value of 'sysid'.
1757  * If found, return it with a new ref,
1758  * else return NULL.
1759  */
1760 struct nlm_host *
1761 nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1762 {
1763         struct nlm_host *hostp = NULL;
1764 
1765         mutex_enter(&g->lock);
1766         if (g->run_status != NLM_ST_UP)
1767                 goto out;
1768 
1769         (void) mod_hash_find(g->nlm_hosts_hash,
1770             (mod_hash_key_t)(uintptr_t)sysid,
1771             (mod_hash_val_t)&hostp);
1772 
1773         if (hostp == NULL)
1774                 goto out;
1775 
1776         /*
1777          * Host is inuse now. Remove it
1778          * from idle hosts list if needed.
1779          */
1780         if (hostp->nh_flags & NLM_NH_INIDLE) {
1781                 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1782                 hostp->nh_flags &= ~NLM_NH_INIDLE;
1783         }
1784 
1785         hostp->nh_refs++;
1786 
1787 out:
1788         mutex_exit(&g->lock);
1789         return (hostp);
1790 }
1791 
1792 /*
1793  * Release the given host.
1794  * I.e. drop a reference that was taken earlier by one of
1795  * the following functions: nlm_host_findcreate(), nlm_host_find(),
1796  * nlm_host_find_by_sysid().
1797  *
1798  * When the very last reference is dropped, host is moved to
1799  * so-called "idle state". All hosts that are in idle state
1800  * have an idle timeout. If timeout is expired, GC thread
1801  * checks whether hosts have any locks and if they heven't
1802  * any, it removes them.
1803  * NOTE: only unused hosts can be in idle state.
1804  */
1805 static void
1806 nlm_host_release_locked(struct nlm_globals *g, struct nlm_host *hostp)
1807 {
1808         if (hostp == NULL)
1809                 return;
1810 
1811         ASSERT(MUTEX_HELD(&g->lock));
1812         ASSERT(hostp->nh_refs > 0);
1813 
1814         hostp->nh_refs--;
1815         if (hostp->nh_refs != 0)
1816                 return;
1817 
1818         /*
1819          * The very last reference to the host was dropped,
1820          * thus host is unused now. Set its idle timeout
1821          * and move it to the idle hosts LRU list.
1822          */
1823         hostp->nh_idle_timeout = ddi_get_lbolt() +
1824             SEC_TO_TICK(g->cn_idle_tmo);
1825 
1826         ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1827         TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1828         hostp->nh_flags |= NLM_NH_INIDLE;
1829 }
1830 
1831 void
1832 nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1833 {
1834         if (hostp == NULL)
1835                 return;
1836 
1837         mutex_enter(&g->lock);
1838         nlm_host_release_locked(g, hostp);
1839         mutex_exit(&g->lock);
1840 }
1841 
1842 /*
1843  * Unregister this NLM host (NFS client) with the local statd
1844  * due to idleness (no locks held for a while).
1845  */
1846 void
1847 nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1848 {
1849         enum clnt_stat stat;
1850 
1851         VERIFY(host->nh_refs == 0);
1852         if (!(host->nh_flags & NLM_NH_MONITORED))
1853                 return;
1854 
1855         host->nh_flags &= ~NLM_NH_MONITORED;
1856 
1857         if (ZONE_IS_BRANDED(curzone) && ZBROP(curzone)->b_rpc_statd != NULL) {
1858                 ZBROP(curzone)->b_rpc_statd(SM_UNMON, g, host);
1859                 return;
1860         }
1861 
1862         stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1863         if (stat != RPC_SUCCESS) {
1864                 NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1865                 return;
1866         }
1867 }
1868 
1869 /*
1870  * Ask the local NFS statd to begin monitoring this host.
1871  * It will call us back when that host restarts, using the
1872  * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1873  * which is handled in nlm_do_notify1().
1874  */
1875 void
1876 nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1877 {
1878         int family;
1879         netobj obj;
1880         enum clnt_stat stat;
1881 
1882         if (state != 0 && host->nh_state == 0) {
1883                 /*
1884                  * This is the first time we have seen an NSM state
1885                  * Value for this host. We record it here to help
1886                  * detect host reboots.
1887                  */
1888                 host->nh_state = state;
1889         }
1890 
1891         mutex_enter(&host->nh_lock);
1892         if (host->nh_flags & NLM_NH_MONITORED) {
1893                 mutex_exit(&host->nh_lock);
1894                 return;
1895         }
1896 
1897         host->nh_flags |= NLM_NH_MONITORED;
1898         mutex_exit(&host->nh_lock);
1899 
1900         if (ZONE_IS_BRANDED(curzone) && ZBROP(curzone)->b_rpc_statd != NULL) {
1901                 ZBROP(curzone)->b_rpc_statd(SM_MON, g, host);
1902                 return;
1903         }
1904 
1905         /*
1906          * Before we begin monitoring the host register the network address
1907          * associated with this hostname.
1908          */
1909         nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1910         stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1911         if (stat != RPC_SUCCESS) {
1912                 NLM_WARN("Failed to register address, stat=%d\n", stat);
1913                 mutex_enter(&g->lock);
1914                 host->nh_flags &= ~NLM_NH_MONITORED;
1915                 mutex_exit(&g->lock);
1916 
1917                 return;
1918         }
1919 
1920         /*
1921          * Tell statd how to call us with status updates for
1922          * this host. Updates arrive via nlm_do_notify1().
1923          *
1924          * We put our assigned system ID value in the priv field to
1925          * make it simpler to find the host if we are notified of a
1926          * host restart.
1927          */
1928         stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1929         if (stat != RPC_SUCCESS) {
1930                 NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1931                 mutex_enter(&g->lock);
1932                 host->nh_flags &= ~NLM_NH_MONITORED;
1933                 mutex_exit(&g->lock);
1934 
1935                 return;
1936         }
1937 }
1938 
1939 int
1940 nlm_host_get_state(struct nlm_host *hostp)
1941 {
1942 
1943         return (hostp->nh_state);
1944 }
1945 
1946 /*
1947  * NLM client/server sleeping locks
1948  */
1949 
1950 /*
1951  * Register client side sleeping lock.
1952  *
1953  * Our client code calls this to keep information
1954  * about sleeping lock somewhere. When it receives
1955  * grant callback from server or when it just
1956  * needs to remove all sleeping locks from vnode,
1957  * it uses this information for remove/apply lock
1958  * properly.
1959  */
1960 struct nlm_slock *
1961 nlm_slock_register(
1962         struct nlm_globals *g,
1963         struct nlm_host *host,
1964         struct nlm4_lock *lock,
1965         struct vnode *vp)
1966 {
1967         struct nlm_slock *nslp;
1968 
1969         nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1970         cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1971         nslp->nsl_lock = *lock;
1972         nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1973         nslp->nsl_state = NLM_SL_BLOCKED;
1974         nslp->nsl_host = host;
1975         nslp->nsl_vp = vp;
1976 
1977         mutex_enter(&g->lock);
1978         TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1979         mutex_exit(&g->lock);
1980 
1981         return (nslp);
1982 }
1983 
1984 /*
1985  * Remove this lock from the wait list and destroy it.
1986  */
1987 void
1988 nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1989 {
1990         mutex_enter(&g->lock);
1991         TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1992         mutex_exit(&g->lock);
1993 
1994         kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1995         cv_destroy(&nslp->nsl_cond);
1996         kmem_free(nslp, sizeof (*nslp));
1997 }
1998 
1999 /*
2000  * Wait for a granted callback or cancellation event
2001  * for a sleeping lock.
2002  *
2003  * If a signal interrupted the wait or if the lock
2004  * was cancelled, return EINTR - the caller must arrange to send
2005  * a cancellation to the server.
2006  *
2007  * If timeout occurred, return ETIMEDOUT - the caller must
2008  * resend the lock request to the server.
2009  *
2010  * On success return 0.
2011  */
2012 int
2013 nlm_slock_wait(struct nlm_globals *g,
2014     struct nlm_slock *nslp, uint_t timeo_secs)
2015 {
2016         clock_t timeo_ticks;
2017         int cv_res, error;
2018 
2019         /*
2020          * If the granted message arrived before we got here,
2021          * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
2022          */
2023         cv_res = 1;
2024         timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
2025 
2026         mutex_enter(&g->lock);
2027         while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
2028                 cv_res = cv_timedwait_sig(&nslp->nsl_cond,
2029                     &g->lock, timeo_ticks);
2030         }
2031 
2032         /*
2033          * No matter why we wake up, if the lock was
2034          * cancelled, let the function caller to know
2035          * about it by returning EINTR.
2036          */
2037         if (nslp->nsl_state == NLM_SL_CANCELLED) {
2038                 error = EINTR;
2039                 goto out;
2040         }
2041 
2042         if (cv_res <= 0) {
2043                 /* We were woken up either by timeout or by interrupt */
2044                 error = (cv_res < 0) ? ETIMEDOUT : EINTR;
2045 
2046                 /*
2047                  * The granted message may arrive after the
2048                  * interrupt/timeout but before we manage to lock the
2049                  * mutex. Detect this by examining nslp.
2050                  */
2051                 if (nslp->nsl_state == NLM_SL_GRANTED)
2052                         error = 0;
2053         } else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
2054                 error = 0;
2055                 VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
2056         }
2057 
2058 out:
2059         mutex_exit(&g->lock);
2060         return (error);
2061 }
2062 
2063 /*
2064  * Mark client side sleeping lock as granted
2065  * and wake up a process blocked on the lock.
2066  * Called from server side NLM_GRANT handler.
2067  *
2068  * If sleeping lock is found return 0, otherwise
2069  * return ENOENT.
2070  */
2071 int
2072 nlm_slock_grant(struct nlm_globals *g,
2073     struct nlm_host *hostp, struct nlm4_lock *alock)
2074 {
2075         struct nlm_slock *nslp;
2076         int error = ENOENT;
2077 
2078         mutex_enter(&g->lock);
2079         TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
2080                 if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
2081                     (nslp->nsl_host != hostp))
2082                         continue;
2083 
2084                 if (alock->svid              == nslp->nsl_lock.svid &&
2085                     alock->l_offset  == nslp->nsl_lock.l_offset &&
2086                     alock->l_len     == nslp->nsl_lock.l_len &&
2087                     alock->fh.n_len  == nslp->nsl_lock.fh.n_len &&
2088                     bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2089                     nslp->nsl_lock.fh.n_len) == 0) {
2090                         nslp->nsl_state = NLM_SL_GRANTED;
2091                         cv_broadcast(&nslp->nsl_cond);
2092                         error = 0;
2093                         break;
2094                 }
2095         }
2096 
2097         mutex_exit(&g->lock);
2098         return (error);
2099 }
2100 
2101 /*
2102  * Register sleeping lock request corresponding to
2103  * flp on the given vhold object.
2104  * On success function returns 0, otherwise (if
2105  * lock request with the same flp is already
2106  * registered) function returns EEXIST.
2107  */
2108 int
2109 nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2110     struct flock64 *flp)
2111 {
2112         struct nlm_slreq *slr, *new_slr = NULL;
2113         int ret = EEXIST;
2114 
2115         mutex_enter(&hostp->nh_lock);
2116         slr = nlm_slreq_find_locked(hostp, nvp, flp);
2117         if (slr != NULL)
2118                 goto out;
2119 
2120         mutex_exit(&hostp->nh_lock);
2121         new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2122         bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2123 
2124         mutex_enter(&hostp->nh_lock);
2125         slr = nlm_slreq_find_locked(hostp, nvp, flp);
2126         if (slr == NULL) {
2127                 slr = new_slr;
2128                 new_slr = NULL;
2129                 ret = 0;
2130 
2131                 TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2132         }
2133 
2134 out:
2135         mutex_exit(&hostp->nh_lock);
2136         if (new_slr != NULL)
2137                 kmem_free(new_slr, sizeof (*new_slr));
2138 
2139         return (ret);
2140 }
2141 
2142 /*
2143  * Unregister sleeping lock request corresponding
2144  * to flp from the given vhold object.
2145  * On success function returns 0, otherwise (if
2146  * lock request corresponding to flp isn't found
2147  * on the given vhold) function returns ENOENT.
2148  */
2149 int
2150 nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2151     struct flock64 *flp)
2152 {
2153         struct nlm_slreq *slr;
2154 
2155         mutex_enter(&hostp->nh_lock);
2156         slr = nlm_slreq_find_locked(hostp, nvp, flp);
2157         if (slr == NULL) {
2158                 mutex_exit(&hostp->nh_lock);
2159                 return (ENOENT);
2160         }
2161 
2162         TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2163         mutex_exit(&hostp->nh_lock);
2164 
2165         kmem_free(slr, sizeof (*slr));
2166         return (0);
2167 }
2168 
2169 /*
2170  * Find sleeping lock request on the given vhold object by flp.
2171  */
2172 struct nlm_slreq *
2173 nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2174     struct flock64 *flp)
2175 {
2176         struct nlm_slreq *slr = NULL;
2177 
2178         ASSERT(MUTEX_HELD(&hostp->nh_lock));
2179         TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2180                 if (slr->nsr_fl.l_start              == flp->l_start      &&
2181                     slr->nsr_fl.l_len                == flp->l_len        &&
2182                     slr->nsr_fl.l_pid                == flp->l_pid        &&
2183                     slr->nsr_fl.l_type               == flp->l_type)
2184                         break;
2185         }
2186 
2187         return (slr);
2188 }
2189 
2190 /*
2191  * NLM tracks active share reservations made on the client side.
2192  * It needs to have a track of share reservations for two purposes
2193  * 1) to determine if nlm_host is busy (if it has active locks and/or
2194  *    share reservations, it is)
2195  * 2) to recover active share reservations when NLM server reports
2196  *    that it has rebooted.
2197  *
2198  * Unfortunately Illumos local share reservations manager (see os/share.c)
2199  * doesn't have an ability to lookup all reservations on the system
2200  * by sysid (like local lock manager) or get all reservations by sysid.
2201  * It tracks reservations per vnode and is able to get/looup them
2202  * on particular vnode. It's not what NLM needs. Thus it has that ugly
2203  * share reservations tracking scheme.
2204  */
2205 
2206 void
2207 nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2208 {
2209         struct nlm_shres *nsp, *nsp_new;
2210 
2211         /*
2212          * NFS code must fill the s_owner, so that
2213          * s_own_len is never 0.
2214          */
2215         ASSERT(shrp->s_own_len > 0);
2216         nsp_new = nlm_shres_create_item(shrp, vp);
2217 
2218         mutex_enter(&hostp->nh_lock);
2219         for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2220                 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2221                         break;
2222 
2223         if (nsp != NULL) {
2224                 /*
2225                  * Found a duplicate. Do nothing.
2226                  */
2227 
2228                 goto out;
2229         }
2230 
2231         nsp = nsp_new;
2232         nsp_new = NULL;
2233         nsp->ns_next = hostp->nh_shrlist;
2234         hostp->nh_shrlist = nsp;
2235 
2236 out:
2237         mutex_exit(&hostp->nh_lock);
2238         if (nsp_new != NULL)
2239                 nlm_shres_destroy_item(nsp_new);
2240 }
2241 
2242 void
2243 nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2244 {
2245         struct nlm_shres *nsp, *nsp_prev = NULL;
2246 
2247         mutex_enter(&hostp->nh_lock);
2248         nsp = hostp->nh_shrlist;
2249         while (nsp != NULL) {
2250                 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2251                         struct nlm_shres *nsp_del;
2252 
2253                         nsp_del = nsp;
2254                         nsp = nsp->ns_next;
2255                         if (nsp_prev != NULL)
2256                                 nsp_prev->ns_next = nsp;
2257                         else
2258                                 hostp->nh_shrlist = nsp;
2259 
2260                         nlm_shres_destroy_item(nsp_del);
2261                         continue;
2262                 }
2263 
2264                 nsp_prev = nsp;
2265                 nsp = nsp->ns_next;
2266         }
2267 
2268         mutex_exit(&hostp->nh_lock);
2269 }
2270 
2271 /*
2272  * Get a _copy_ of the list of all active share reservations
2273  * made by the given host.
2274  * NOTE: the list function returns _must_ be released using
2275  *       nlm_free_shrlist().
2276  */
2277 struct nlm_shres *
2278 nlm_get_active_shres(struct nlm_host *hostp)
2279 {
2280         struct nlm_shres *nsp, *nslist = NULL;
2281 
2282         mutex_enter(&hostp->nh_lock);
2283         for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2284                 struct nlm_shres *nsp_new;
2285 
2286                 nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2287                 nsp_new->ns_next = nslist;
2288                 nslist = nsp_new;
2289         }
2290 
2291         mutex_exit(&hostp->nh_lock);
2292         return (nslist);
2293 }
2294 
2295 /*
2296  * Free memory allocated for the active share reservations
2297  * list created by nlm_get_active_shres() function.
2298  */
2299 void
2300 nlm_free_shrlist(struct nlm_shres *nslist)
2301 {
2302         struct nlm_shres *nsp;
2303 
2304         while (nslist != NULL) {
2305                 nsp =  nslist;
2306                 nslist = nslist->ns_next;
2307 
2308                 nlm_shres_destroy_item(nsp);
2309         }
2310 }
2311 
2312 static bool_t
2313 nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2314 {
2315         if (shrp1->s_sysid   == shrp2->s_sysid    &&
2316             shrp1->s_pid     == shrp2->s_pid              &&
2317             shrp1->s_own_len == shrp2->s_own_len  &&
2318             bcmp(shrp1->s_owner, shrp2->s_owner,
2319             shrp1->s_own_len) == 0)
2320                 return (TRUE);
2321 
2322         return (FALSE);
2323 }
2324 
2325 static struct nlm_shres *
2326 nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2327 {
2328         struct nlm_shres *nsp;
2329 
2330         nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2331         nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2332         bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2333         nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2334         bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2335         nsp->ns_vp = vp;
2336 
2337         return (nsp);
2338 }
2339 
2340 static void
2341 nlm_shres_destroy_item(struct nlm_shres *nsp)
2342 {
2343         kmem_free(nsp->ns_shr->s_owner,
2344             nsp->ns_shr->s_own_len);
2345         kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2346         kmem_free(nsp, sizeof (*nsp));
2347 }
2348 
2349 /*
2350  * Called by klmmod.c when lockd adds a network endpoint
2351  * on which we should begin RPC services.
2352  */
2353 int
2354 nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2355 {
2356         SVCMASTERXPRT *xprt = NULL;
2357         int error;
2358 
2359         error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2360             &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2361         if (error != 0)
2362                 return (error);
2363 
2364         (void) nlm_knc_to_netid(knc);
2365         return (0);
2366 }
2367 
2368 /*
2369  * Start NLM service.
2370  */
2371 int
2372 nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2373     const char *netid, struct knetconfig *knc)
2374 {
2375         int error;
2376         enum clnt_stat stat;
2377 
2378         VERIFY(g->run_status == NLM_ST_STARTING);
2379         VERIFY(g->nlm_gc_thread == NULL);
2380 
2381         if (g->nlm_v4_only) {
2382                 NLM_WARN("Zone %d has no rpcbind, NLM is v4 only", getzoneid());
2383                 bzero(&g->nlm_nsm, sizeof (struct nlm_nsm));
2384                 g->nlm_nsm.ns_addr_handle = (void *)-1;
2385                 goto v4_only;
2386         }
2387 
2388         error = nlm_nsm_init_local(&g->nlm_nsm);
2389         if (error != 0) {
2390                 NLM_ERR("Failed to initialize NSM handler "
2391                     "(error=%d)\n", error);
2392                 g->run_status = NLM_ST_DOWN;
2393                 return (error);
2394         }
2395 
2396         error = EIO;
2397 
2398         /*
2399          * Create an NLM garbage collector thread that will
2400          * clean up stale vholds and hosts objects.
2401          */
2402         g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2403             g, 0, minclsyspri);
2404 
2405         /*
2406          * Send SIMU_CRASH to local statd to report that
2407          * NLM started, so that statd can report other hosts
2408          * about NLM state change.
2409          */
2410 
2411         stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2412         if (stat != RPC_SUCCESS) {
2413                 NLM_ERR("Failed to connect to local statd "
2414                     "(rpcerr=%d)\n", stat);
2415                 goto shutdown_lm;
2416         }
2417 
2418         stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2419         if (stat != RPC_SUCCESS) {
2420                 NLM_ERR("Failed to get the status of local statd "
2421                     "(rpcerr=%d)\n", stat);
2422                 goto shutdown_lm;
2423         }
2424 v4_only:
2425 
2426         g->grace_threshold = ddi_get_lbolt() +
2427             SEC_TO_TICK(g->grace_period);
2428 
2429         /* Register endpoint used for communications with local NLM */
2430         error = nlm_svc_add_ep(fp, netid, knc);
2431         if (error != 0)
2432                 goto shutdown_lm;
2433 
2434         (void) svc_pool_control(NLM_SVCPOOL_ID,
2435             SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2436         g->run_status = NLM_ST_UP;
2437         return (0);
2438 
2439 shutdown_lm:
2440         mutex_enter(&g->lock);
2441         g->run_status = NLM_ST_STOPPING;
2442         mutex_exit(&g->lock);
2443 
2444         nlm_svc_stopping(g);
2445         return (error);
2446 }
2447 
2448 /*
2449  * Called when the server pool is destroyed, so that
2450  * all transports are closed and no any server threads
2451  * exist.
2452  *
2453  * Just call lm_shutdown() to shut NLM down properly.
2454  */
2455 static void
2456 nlm_pool_shutdown(void)
2457 {
2458         (void) lm_shutdown();
2459 }
2460 
2461 /*
2462  * Stop NLM service, cleanup all resources
2463  * NLM owns at the moment.
2464  *
2465  * NOTE: NFS code can call NLM while it's
2466  * stopping or even if it's shut down. Any attempt
2467  * to lock file either on client or on the server
2468  * will fail if NLM isn't in NLM_ST_UP state.
2469  */
2470 void
2471 nlm_svc_stopping(struct nlm_globals *g)
2472 {
2473         mutex_enter(&g->lock);
2474         ASSERT(g->run_status == NLM_ST_STOPPING);
2475 
2476         /*
2477          * Ask NLM GC thread to exit and wait until it dies.
2478          */
2479         cv_signal(&g->nlm_gc_sched_cv);
2480         while (g->nlm_gc_thread != NULL)
2481                 cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2482 
2483         mutex_exit(&g->lock);
2484 
2485         /*
2486          * Cleanup locks owned by NLM hosts.
2487          * NOTE: New hosts won't be created while
2488          * NLM is stopping.
2489          */
2490         while (!avl_is_empty(&g->nlm_hosts_tree)) {
2491                 struct nlm_host *hostp;
2492                 int busy_hosts = 0;
2493 
2494                 /*
2495                  * Iterate through all NLM hosts in the system
2496                  * and drop the locks they own by force.
2497                  */
2498                 hostp = avl_first(&g->nlm_hosts_tree);
2499                 while (hostp != NULL) {
2500                         /* Cleanup all client and server side locks */
2501                         nlm_client_cancel_all(g, hostp);
2502                         nlm_host_notify_server(hostp, 0);
2503 
2504                         mutex_enter(&hostp->nh_lock);
2505                         nlm_host_gc_vholds(hostp);
2506                         if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2507                                 /*
2508                                  * Oh, it seems the host is still busy, let
2509                                  * it some time to release and go to the
2510                                  * next one.
2511                                  */
2512 
2513                                 mutex_exit(&hostp->nh_lock);
2514                                 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2515                                 busy_hosts++;
2516                                 continue;
2517                         }
2518 
2519                         mutex_exit(&hostp->nh_lock);
2520                         hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2521                 }
2522 
2523                 /*
2524                  * All hosts go to nlm_idle_hosts list after
2525                  * all locks they own are cleaned up and last refereces
2526                  * were dropped. Just destroy all hosts in nlm_idle_hosts
2527                  * list, they can not be removed from there while we're
2528                  * in stopping state.
2529                  */
2530                 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2531                         nlm_host_unregister(g, hostp);
2532                         nlm_host_destroy(hostp);
2533                 }
2534 
2535                 if (busy_hosts > 0) {
2536                         /*
2537                          * There're some hosts that weren't cleaned
2538                          * up. Probably they're in resource cleanup
2539                          * process. Give them some time to do drop
2540                          * references.
2541                          */
2542                         delay(MSEC_TO_TICK(500));
2543                 }
2544         }
2545 
2546         ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2547 
2548         /* If started with rpcbind (the normal case) */
2549         if (g->nlm_nsm.ns_addr_handle != (void *)-1)
2550                 nlm_nsm_fini(&g->nlm_nsm);
2551         g->lockd_pid = 0;
2552         g->run_status = NLM_ST_DOWN;
2553 }
2554 
2555 /*
2556  * Returns TRUE if the given vnode has
2557  * any active or sleeping locks.
2558  */
2559 int
2560 nlm_vp_active(const vnode_t *vp)
2561 {
2562         struct nlm_globals *g;
2563         struct nlm_host *hostp;
2564         struct nlm_vhold *nvp;
2565         int active = 0;
2566 
2567         g = zone_getspecific(nlm_zone_key, curzone);
2568 
2569         /*
2570          * Server side NLM has locks on the given vnode
2571          * if there exist a vhold object that holds
2572          * the given vnode "vp" in one of NLM hosts.
2573          */
2574         mutex_enter(&g->lock);
2575         hostp = avl_first(&g->nlm_hosts_tree);
2576         while (hostp != NULL) {
2577                 mutex_enter(&hostp->nh_lock);
2578                 nvp = nlm_vhold_find_locked(hostp, vp);
2579                 mutex_exit(&hostp->nh_lock);
2580                 if (nvp != NULL) {
2581                         active = 1;
2582                         break;
2583                 }
2584 
2585                 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2586         }
2587 
2588         mutex_exit(&g->lock);
2589         return (active);
2590 }
2591 
2592 /*
2593  * Called right before NFS export is going to
2594  * dissapear. The function finds all vnodes
2595  * belonging to the given export and cleans
2596  * all remote locks and share reservations
2597  * on them.
2598  */
2599 void
2600 nlm_zone_unexport(struct nlm_globals *g, struct exportinfo *exi)
2601 {
2602         struct nlm_host *hostp;
2603 
2604         mutex_enter(&g->lock);
2605         if (g->run_status != NLM_ST_UP) {
2606                 /* nothing to do */
2607                 mutex_exit(&g->lock);
2608                 return;
2609         }
2610 
2611         hostp = avl_first(&g->nlm_hosts_tree);
2612         while (hostp != NULL) {
2613                 struct nlm_vhold *nvp;
2614 
2615                 if (hostp->nh_flags & NLM_NH_INIDLE) {
2616                         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
2617                         hostp->nh_flags &= ~NLM_NH_INIDLE;
2618                 }
2619                 hostp->nh_refs++;
2620 
2621                 mutex_exit(&g->lock);
2622 
2623                 mutex_enter(&hostp->nh_lock);
2624                 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2625                         vnode_t *vp;
2626 
2627                         nvp->nv_refcnt++;
2628                         mutex_exit(&hostp->nh_lock);
2629 
2630                         vp = nvp->nv_vp;
2631 
2632                         if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2633                                 goto next_iter;
2634 
2635                         /*
2636                          * Ok, it we found out that vnode vp is under
2637                          * control by the exportinfo exi, now we need
2638                          * to drop all locks from this vnode, let's
2639                          * do it.
2640                          */
2641                         nlm_vhold_clean(nvp, hostp->nh_sysid);
2642 
2643                 next_iter:
2644                         mutex_enter(&hostp->nh_lock);
2645                         nvp->nv_refcnt--;
2646                 }
2647                 mutex_exit(&hostp->nh_lock);
2648 
2649                 mutex_enter(&g->lock);
2650                 nlm_host_release_locked(g, hostp);
2651 
2652                 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2653         }
2654 
2655         mutex_exit(&g->lock);
2656 }
2657 
2658 void
2659 nlm_unexport(struct exportinfo *exi)
2660 {
2661         struct nlm_globals *g;
2662 
2663         rw_enter(&lm_lck, RW_READER);
2664         TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) {
2665                 if (g->nlm_zoneid == exi->exi_zoneid) {
2666                         /*
2667                          * NOTE: If we want to drop lm_lock before
2668                          * calling nlm_zone_unexport(), we should break,
2669                          * and have a post-rw_exit() snippit like:
2670                          *      if (g != NULL)
2671                          *              nlm_zone_unexport(g, exi);
2672                          */
2673                         nlm_zone_unexport(g, exi);
2674                         break; /* Only going to match once! */
2675                 }
2676         }
2677         rw_exit(&lm_lck);
2678 }
2679 
2680 /*
2681  * Allocate new unique sysid.
2682  * In case of failure (no available sysids)
2683  * return LM_NOSYSID.
2684  */
2685 sysid_t
2686 nlm_sysid_alloc(void)
2687 {
2688         sysid_t ret_sysid = LM_NOSYSID;
2689 
2690         rw_enter(&lm_lck, RW_WRITER);
2691         if (nlm_sysid_nidx > LM_SYSID_MAX)
2692                 nlm_sysid_nidx = LM_SYSID;
2693 
2694         if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2695                 BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2696                 ret_sysid = nlm_sysid_nidx++;
2697         } else {
2698                 index_t id;
2699 
2700                 id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2701                 if (id > 0) {
2702                         nlm_sysid_nidx = id + 1;
2703                         ret_sysid = id;
2704                         BT_SET(nlm_sysid_bmap, id);
2705                 }
2706         }
2707 
2708         rw_exit(&lm_lck);
2709         return (ret_sysid);
2710 }
2711 
2712 void
2713 nlm_sysid_free(sysid_t sysid)
2714 {
2715         ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2716 
2717         rw_enter(&lm_lck, RW_WRITER);
2718         ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2719         BT_CLEAR(nlm_sysid_bmap, sysid);
2720         rw_exit(&lm_lck);
2721 }
2722 
2723 /*
2724  * Return true if the request came from a local caller.
2725  * By necessity, this "knows" the netid names invented
2726  * in lm_svc() and nlm_netid_from_knetconfig().
2727  */
2728 bool_t
2729 nlm_caller_is_local(SVCXPRT *transp)
2730 {
2731         char *netid;
2732         struct netbuf *rtaddr;
2733 
2734         netid = svc_getnetid(transp);
2735         rtaddr = svc_getrpccaller(transp);
2736 
2737         if (netid == NULL)
2738                 return (FALSE);
2739 
2740         if (strcmp(netid, "ticlts") == 0 ||
2741             strcmp(netid, "ticotsord") == 0)
2742                 return (TRUE);
2743 
2744         if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2745                 struct sockaddr_in *sin = (void *)rtaddr->buf;
2746                 if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2747                         return (TRUE);
2748         }
2749         if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2750                 struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2751                 if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2752                         return (TRUE);
2753         }
2754 
2755         return (FALSE); /* unknown transport */
2756 }
2757 
2758 /*
2759  * Get netid string correspondig to the given knetconfig.
2760  * If not done already, save knc->knc_rdev in our table.
2761  */
2762 const char *
2763 nlm_knc_to_netid(struct knetconfig *knc)
2764 {
2765         int i;
2766         dev_t rdev;
2767         struct nlm_knc *nc;
2768         const char *netid = NULL;
2769 
2770         rw_enter(&lm_lck, RW_READER);
2771         for (i = 0; i < NLM_KNCS; i++) {
2772                 nc = &nlm_netconfigs[i];
2773 
2774                 if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2775                     strcmp(nc->n_knc.knc_protofmly,
2776                     knc->knc_protofmly) == 0) {
2777                         netid = nc->n_netid;
2778                         rdev = nc->n_knc.knc_rdev;
2779                         break;
2780                 }
2781         }
2782         rw_exit(&lm_lck);
2783 
2784         if (netid != NULL && rdev == NODEV) {
2785                 rw_enter(&lm_lck, RW_WRITER);
2786                 if (nc->n_knc.knc_rdev == NODEV)
2787                         nc->n_knc.knc_rdev = knc->knc_rdev;
2788                 rw_exit(&lm_lck);
2789         }
2790 
2791         return (netid);
2792 }
2793 
2794 /*
2795  * Get a knetconfig corresponding to the given netid.
2796  * If there's no knetconfig for this netid, ENOENT
2797  * is returned.
2798  */
2799 int
2800 nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2801 {
2802         int i, ret;
2803 
2804         ret = ENOENT;
2805         for (i = 0; i < NLM_KNCS; i++) {
2806                 struct nlm_knc *nknc;
2807 
2808                 nknc = &nlm_netconfigs[i];
2809                 if (strcmp(netid, nknc->n_netid) == 0 &&
2810                     nknc->n_knc.knc_rdev != NODEV) {
2811                         *knc = nknc->n_knc;
2812                         ret = 0;
2813                         break;
2814                 }
2815         }
2816 
2817         return (ret);
2818 }
2819 
2820 void
2821 nlm_cprsuspend(void)
2822 {
2823         struct nlm_globals *g;
2824 
2825         rw_enter(&lm_lck, RW_READER);
2826         TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2827                 nlm_suspend_zone(g);
2828 
2829         rw_exit(&lm_lck);
2830 }
2831 
2832 void
2833 nlm_cprresume(void)
2834 {
2835         struct nlm_globals *g;
2836 
2837         rw_enter(&lm_lck, RW_READER);
2838         TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2839                 nlm_resume_zone(g);
2840 
2841         rw_exit(&lm_lck);
2842 }
2843 
2844 void
2845 nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2846 {
2847         (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2848             NLM_RPC_RETRIES, zone_kcred());
2849 }
2850 
2851 void
2852 nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2853 {
2854         /* LINTED pointer alignment */
2855         struct sockaddr *sa = (struct sockaddr *)addr->buf;
2856 
2857         *family = sa->sa_family;
2858 
2859         switch (sa->sa_family) {
2860         case AF_INET: {
2861                 /* LINTED pointer alignment */
2862                 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2863 
2864                 obj->n_len = sizeof (sin->sin_addr);
2865                 obj->n_bytes = (char *)&sin->sin_addr;
2866                 break;
2867         }
2868 
2869         case AF_INET6: {
2870                 /* LINTED pointer alignment */
2871                 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2872 
2873                 obj->n_len = sizeof (sin6->sin6_addr);
2874                 obj->n_bytes = (char *)&sin6->sin6_addr;
2875                 break;
2876         }
2877 
2878         default:
2879                 VERIFY(0);
2880                 break;
2881         }
2882 }