1 /*
   2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
   3  * Authors: Doug Rabson <dfr@rabson.org>
   4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27 
  28 /*
  29  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  30  * Copyright (c) 2012 by Delphix. All rights reserved.
  31  */
  32 
  33 /*
  34  * NFS LockManager, start/stop, support functions, etc.
  35  * Most of the interesting code is here.
  36  *
  37  * Source code derived from FreeBSD nlm_prot_impl.c
  38  */
  39 
  40 #include <sys/param.h>
  41 #include <sys/systm.h>
  42 #include <sys/thread.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/mount.h>
  46 #include <sys/priv.h>
  47 #include <sys/proc.h>
  48 #include <sys/share.h>
  49 #include <sys/socket.h>
  50 #include <sys/syscall.h>
  51 #include <sys/syslog.h>
  52 #include <sys/systm.h>
  53 #include <sys/class.h>
  54 #include <sys/unistd.h>
  55 #include <sys/vnode.h>
  56 #include <sys/vfs.h>
  57 #include <sys/queue.h>
  58 #include <sys/bitmap.h>
  59 #include <sys/sdt.h>
  60 #include <netinet/in.h>
  61 
  62 #include <rpc/rpc.h>
  63 #include <rpc/xdr.h>
  64 #include <rpc/pmap_prot.h>
  65 #include <rpc/pmap_clnt.h>
  66 #include <rpc/rpcb_prot.h>
  67 
  68 #include <rpcsvc/nlm_prot.h>
  69 #include <rpcsvc/sm_inter.h>
  70 #include <rpcsvc/nsm_addr.h>
  71 
  72 #include <nfs/nfs.h>
  73 #include <nfs/nfs_clnt.h>
  74 #include <nfs/export.h>
  75 #include <nfs/rnode.h>
  76 #include <nfs/lm.h>
  77 
  78 #include "nlm_impl.h"
  79 
  80 struct nlm_knc {
  81         struct knetconfig       n_knc;
  82         const char              *n_netid;
  83 };
  84 
  85 /*
  86  * Number of attempts NLM tries to obtain RPC binding
  87  * of local statd.
  88  */
  89 #define NLM_NSM_RPCBIND_RETRIES 10
  90 
  91 /*
  92  * Timeout (in seconds) NLM waits before making another
  93  * attempt to obtain RPC binding of local statd.
  94  */
  95 #define NLM_NSM_RPCBIND_TIMEOUT 5
  96 
  97 /*
  98  * Total number of sysids in NLM sysid bitmap
  99  */
 100 #define NLM_BMAP_NITEMS (LM_SYSID_MAX + 1)
 101 
 102 /*
 103  * Number of ulong_t words in bitmap that is used
 104  * for allocation of sysid numbers.
 105  */
 106 #define NLM_BMAP_WORDS  (NLM_BMAP_NITEMS / BT_NBIPUL)
 107 
 108 /*
 109  * Given an integer x, the macro returns
 110  * -1 if x is negative,
 111  *  0 if x is zero
 112  *  1 if x is positive
 113  */
 114 #define SIGN(x) (((x) > 0) - ((x) < 0))
 115 
 116 #define ARRSIZE(arr)    (sizeof (arr) / sizeof ((arr)[0]))
 117 #define NLM_KNCS        ARRSIZE(nlm_netconfigs)
 118 
 119 krwlock_t lm_lck;
 120 
 121 /*
 122  * Zero timeout for asynchronous NLM RPC operations
 123  */
 124 static const struct timeval nlm_rpctv_zero = { 0,  0 };
 125 
 126 /*
 127  * List of all Zone globals nlm_globals instences
 128  * linked together.
 129  */
 130 static struct nlm_globals_list nlm_zones_list; /* (g) */
 131 
 132 /*
 133  * NLM kmem caches
 134  */
 135 static struct kmem_cache *nlm_hosts_cache = NULL;
 136 static struct kmem_cache *nlm_vhold_cache = NULL;
 137 
 138 /*
 139  * A bitmap for allocation of new sysids.
 140  * Sysid is a unique number between LM_SYSID
 141  * and LM_SYSID_MAX. Sysid represents unique remote
 142  * host that does file locks on the given host.
 143  */
 144 static ulong_t  nlm_sysid_bmap[NLM_BMAP_WORDS]; /* (g) */
 145 static int      nlm_sysid_nidx;                 /* (g) */
 146 
 147 /*
 148  * RPC service registration for all transports
 149  */
 150 static SVC_CALLOUT nlm_svcs[] = {
 151         { NLM_PROG, 4, 4, nlm_prog_4 }, /* NLM4_VERS */
 152         { NLM_PROG, 1, 3, nlm_prog_3 }  /* NLM_VERS - NLM_VERSX */
 153 };
 154 
 155 static SVC_CALLOUT_TABLE nlm_sct = {
 156         ARRSIZE(nlm_svcs),
 157         FALSE,
 158         nlm_svcs
 159 };
 160 
 161 /*
 162  * Static table of all netid/knetconfig network
 163  * lock manager can work with. nlm_netconfigs table
 164  * is used when we need to get valid knetconfig by
 165  * netid and vice versa.
 166  *
 167  * Knetconfigs are activated either by the call from
 168  * user-space lockd daemon (server side) or by taking
 169  * knetconfig from NFS mountinfo (client side)
 170  */
 171 static struct nlm_knc nlm_netconfigs[] = { /* (g) */
 172         /* UDP */
 173         {
 174                 { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
 175                 "udp",
 176         },
 177         /* TCP */
 178         {
 179                 { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
 180                 "tcp",
 181         },
 182         /* UDP over IPv6 */
 183         {
 184                 { NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
 185                 "udp6",
 186         },
 187         /* TCP over IPv6 */
 188         {
 189                 { NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
 190                 "tcp6",
 191         },
 192         /* ticlts (loopback over UDP) */
 193         {
 194                 { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
 195                 "ticlts",
 196         },
 197         /* ticotsord (loopback over TCP) */
 198         {
 199                 { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
 200                 "ticotsord",
 201         },
 202 };
 203 
 204 /*
 205  * NLM misc. function
 206  */
 207 static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
 208 static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
 209 static void nlm_kmem_reclaim(void *);
 210 static void nlm_pool_shutdown(void);
 211 static void nlm_suspend_zone(struct nlm_globals *);
 212 static void nlm_resume_zone(struct nlm_globals *);
 213 static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
 214 static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
 215 
 216 /*
 217  * NLM thread functions
 218  */
 219 static void nlm_gc(struct nlm_globals *);
 220 static void nlm_reclaimer(struct nlm_host *);
 221 
 222 /*
 223  * NLM NSM functions
 224  */
 225 static int nlm_init_local_knc(struct knetconfig *);
 226 static int nlm_nsm_init_local(struct nlm_nsm *);
 227 static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
 228 static void nlm_nsm_fini(struct nlm_nsm *);
 229 static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
 230 static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
 231 static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
 232 static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
 233 
 234 /*
 235  * NLM host functions
 236  */
 237 static int nlm_host_ctor(void *, void *, int);
 238 static void nlm_host_dtor(void *, void *);
 239 static void nlm_host_destroy(struct nlm_host *);
 240 static struct nlm_host *nlm_host_create(char *, const char *,
 241     struct knetconfig *, struct netbuf *);
 242 static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
 243     const char *, struct netbuf *, avl_index_t *);
 244 static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
 245 static void nlm_host_gc_vholds(struct nlm_host *);
 246 static bool_t nlm_host_has_srv_locks(struct nlm_host *);
 247 static bool_t nlm_host_has_cli_locks(struct nlm_host *);
 248 static bool_t nlm_host_has_locks(struct nlm_host *);
 249 
 250 /*
 251  * NLM vhold functions
 252  */
 253 static int nlm_vhold_ctor(void *, void *, int);
 254 static void nlm_vhold_dtor(void *, void *);
 255 static void nlm_vhold_destroy(struct nlm_host *,
 256     struct nlm_vhold *);
 257 static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
 258 static void nlm_vhold_clean(struct nlm_vhold *, int);
 259 
 260 /*
 261  * NLM client/server sleeping locks/share reservation functions
 262  */
 263 struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
 264     struct nlm_vhold *, struct flock64 *);
 265 static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
 266 static void nlm_shres_destroy_item(struct nlm_shres *);
 267 static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
 268 
 269 /*
 270  * NLM initialization functions.
 271  */
 272 void
 273 nlm_init(void)
 274 {
 275         nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
 276             sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
 277             nlm_kmem_reclaim, NULL, NULL, 0);
 278 
 279         nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
 280             sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
 281             NULL, NULL, NULL, 0);
 282 
 283         nlm_rpc_init();
 284         TAILQ_INIT(&nlm_zones_list);
 285 
 286         /* initialize sysids bitmap */
 287         bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
 288         nlm_sysid_nidx = 1;
 289 
 290         /*
 291          * Reserv the sysid #0, because it's associated
 292          * with local locks only. Don't let to allocate
 293          * it for remote locks.
 294          */
 295         BT_SET(nlm_sysid_bmap, 0);
 296 }
 297 
 298 void
 299 nlm_globals_register(struct nlm_globals *g)
 300 {
 301         rw_enter(&lm_lck, RW_WRITER);
 302         TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
 303         rw_exit(&lm_lck);
 304 }
 305 
 306 void
 307 nlm_globals_unregister(struct nlm_globals *g)
 308 {
 309         rw_enter(&lm_lck, RW_WRITER);
 310         TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
 311         rw_exit(&lm_lck);
 312 }
 313 
 314 /* ARGSUSED */
 315 static void
 316 nlm_kmem_reclaim(void *cdrarg)
 317 {
 318         struct nlm_globals *g;
 319 
 320         rw_enter(&lm_lck, RW_READER);
 321         TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
 322                 cv_broadcast(&g->nlm_gc_sched_cv);
 323 
 324         rw_exit(&lm_lck);
 325 }
 326 
 327 /*
 328  * NLM garbage collector thread (GC).
 329  *
 330  * NLM GC periodically checks whether there're any host objects
 331  * that can be cleaned up. It also releases stale vnodes that
 332  * live on the server side (under protection of vhold objects).
 333  *
 334  * NLM host objects are cleaned up from GC thread because
 335  * operations helping us to determine whether given host has
 336  * any locks can be quite expensive and it's not good to call
 337  * them every time the very last reference to the host is dropped.
 338  * Thus we use "lazy" approach for hosts cleanup.
 339  *
 340  * The work of GC is to release stale vnodes on the server side
 341  * and destroy hosts that haven't any locks and any activity for
 342  * some time (i.e. idle hosts).
 343  */
 344 static void
 345 nlm_gc(struct nlm_globals *g)
 346 {
 347         struct nlm_host *hostp;
 348         clock_t now, idle_period;
 349 
 350         idle_period = SEC_TO_TICK(g->cn_idle_tmo);
 351         mutex_enter(&g->lock);
 352         for (;;) {
 353                 /*
 354                  * GC thread can be explicitly scheduled from
 355                  * memory reclamation function.
 356                  */
 357                 (void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
 358                     ddi_get_lbolt() + idle_period);
 359 
 360                 /*
 361                  * NLM is shutting down, time to die.
 362                  */
 363                 if (g->run_status == NLM_ST_STOPPING)
 364                         break;
 365 
 366                 now = ddi_get_lbolt();
 367                 DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
 368                     clock_t, now);
 369 
 370                 /*
 371                  * Find all obviously unused vholds and destroy them.
 372                  */
 373                 for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
 374                     hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
 375                         struct nlm_vhold *nvp;
 376 
 377                         mutex_enter(&hostp->nh_lock);
 378 
 379                         nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
 380                         while (nvp != NULL) {
 381                                 struct nlm_vhold *new_nvp;
 382 
 383                                 new_nvp = TAILQ_NEXT(nvp, nv_link);
 384 
 385                                 /*
 386                                  * If these conditions are met, the vhold is
 387                                  * obviously unused and we will destroy it.  In
 388                                  * a case either v_filocks and/or v_shrlocks is
 389                                  * non-NULL the vhold might still be unused by
 390                                  * the host, but it is expensive to check that.
 391                                  * We defer such check until the host is idle.
 392                                  * The expensive check is done below without
 393                                  * the global lock held.
 394                                  */
 395                                 if (nvp->nv_refcnt == 0 &&
 396                                     nvp->nv_vp->v_filocks == NULL &&
 397                                     nvp->nv_vp->v_shrlocks == NULL) {
 398                                         nlm_vhold_destroy(hostp, nvp);
 399                                 }
 400 
 401                                 nvp = new_nvp;
 402                         }
 403 
 404                         mutex_exit(&hostp->nh_lock);
 405                 }
 406 
 407                 /*
 408                  * Handle all hosts that are unused at the moment
 409                  * until we meet one with idle timeout in future.
 410                  */
 411                 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
 412                         bool_t has_locks;
 413 
 414                         if (hostp->nh_idle_timeout > now)
 415                                 break;
 416 
 417                         /*
 418                          * Drop global lock while doing expensive work
 419                          * on this host. We'll re-check any conditions
 420                          * that might change after retaking the global
 421                          * lock.
 422                          */
 423                         mutex_exit(&g->lock);
 424                         mutex_enter(&hostp->nh_lock);
 425 
 426                         /*
 427                          * nlm_globals lock was dropped earlier because
 428                          * garbage collecting of vholds and checking whether
 429                          * host has any locks/shares are expensive operations.
 430                          */
 431                         nlm_host_gc_vholds(hostp);
 432                         has_locks = nlm_host_has_locks(hostp);
 433 
 434                         mutex_exit(&hostp->nh_lock);
 435                         mutex_enter(&g->lock);
 436 
 437                         /*
 438                          * While we were doing expensive operations
 439                          * outside of nlm_globals critical section,
 440                          * somebody could take the host and remove it
 441                          * from the idle list.  Whether its been
 442                          * reinserted or not, our information about
 443                          * the host is outdated, and we should take no
 444                          * further action.
 445                          */
 446                         if ((hostp->nh_flags & NLM_NH_INIDLE) == 0 ||
 447                             hostp->nh_idle_timeout > now)
 448                                 continue;
 449 
 450                         /*
 451                          * If the host has locks we have to renew the
 452                          * host's timeout and put it at the end of LRU
 453                          * list.
 454                          */
 455                         if (has_locks) {
 456                                 TAILQ_REMOVE(&g->nlm_idle_hosts,
 457                                     hostp, nh_link);
 458                                 hostp->nh_idle_timeout = now + idle_period;
 459                                 TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
 460                                     hostp, nh_link);
 461                                 continue;
 462                         }
 463 
 464                         /*
 465                          * We're here if all the following conditions hold:
 466                          * 1) Host hasn't any locks or share reservations
 467                          * 2) Host is unused
 468                          * 3) Host wasn't touched by anyone at least for
 469                          *    g->cn_idle_tmo seconds.
 470                          *
 471                          * So, now we can destroy it.
 472                          */
 473                         nlm_host_unregister(g, hostp);
 474                         mutex_exit(&g->lock);
 475 
 476                         nlm_host_unmonitor(g, hostp);
 477                         nlm_host_destroy(hostp);
 478                         mutex_enter(&g->lock);
 479                         if (g->run_status == NLM_ST_STOPPING)
 480                                 break;
 481 
 482                 }
 483 
 484                 DTRACE_PROBE(gc__end);
 485         }
 486 
 487         DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
 488 
 489         /* Let others know that GC has died */
 490         g->nlm_gc_thread = NULL;
 491         mutex_exit(&g->lock);
 492 
 493         cv_broadcast(&g->nlm_gc_finish_cv);
 494         zthread_exit();
 495 }
 496 
 497 /*
 498  * Thread reclaim locks/shares acquired by the client side
 499  * on the given server represented by hostp.
 500  */
 501 static void
 502 nlm_reclaimer(struct nlm_host *hostp)
 503 {
 504         struct nlm_globals *g;
 505 
 506         mutex_enter(&hostp->nh_lock);
 507         hostp->nh_reclaimer = curthread;
 508         mutex_exit(&hostp->nh_lock);
 509 
 510         g = zone_getspecific(nlm_zone_key, curzone);
 511         nlm_reclaim_client(g, hostp);
 512 
 513         mutex_enter(&hostp->nh_lock);
 514         hostp->nh_flags &= ~NLM_NH_RECLAIM;
 515         hostp->nh_reclaimer = NULL;
 516         cv_broadcast(&hostp->nh_recl_cv);
 517         mutex_exit(&hostp->nh_lock);
 518 
 519         /*
 520          * Host was explicitly referenced before
 521          * nlm_reclaim() was called, release it
 522          * here.
 523          */
 524         nlm_host_release(g, hostp);
 525         zthread_exit();
 526 }
 527 
 528 /*
 529  * Copy a struct netobj.  (see xdr.h)
 530  */
 531 void
 532 nlm_copy_netobj(struct netobj *dst, struct netobj *src)
 533 {
 534         dst->n_len = src->n_len;
 535         dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
 536         bcopy(src->n_bytes, dst->n_bytes, src->n_len);
 537 }
 538 
 539 /*
 540  * An NLM specificw replacement for clnt_call().
 541  * nlm_clnt_call() is used by all RPC functions generated
 542  * from nlm_prot.x specification. The function is aware
 543  * about some pitfalls of NLM RPC procedures and has a logic
 544  * that handles them properly.
 545  */
 546 enum clnt_stat
 547 nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
 548     caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
 549 {
 550         k_sigset_t oldmask;
 551         enum clnt_stat stat;
 552         bool_t sig_blocked = FALSE;
 553 
 554         /*
 555          * If NLM RPC procnum is one of the NLM _RES procedures
 556          * that are used to reply to asynchronous NLM RPC
 557          * (MSG calls), explicitly set RPC timeout to zero.
 558          * Client doesn't send a reply to RES procedures, so
 559          * we don't need to wait anything.
 560          *
 561          * NOTE: we ignore NLM4_*_RES procnums because they are
 562          * equal to NLM_*_RES numbers.
 563          */
 564         if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
 565                 wait = nlm_rpctv_zero;
 566 
 567         /*
 568          * We need to block signals in case of NLM_CANCEL RPC
 569          * in order to prevent interruption of network RPC
 570          * calls.
 571          */
 572         if (procnum == NLM_CANCEL) {
 573                 k_sigset_t newmask;
 574 
 575                 sigfillset(&newmask);
 576                 sigreplace(&newmask, &oldmask);
 577                 sig_blocked = TRUE;
 578         }
 579 
 580         stat = clnt_call(clnt, procnum, xdr_args,
 581             argsp, xdr_result, resultp, wait);
 582 
 583         /*
 584          * Restore signal mask back if signals were blocked
 585          */
 586         if (sig_blocked)
 587                 sigreplace(&oldmask, (k_sigset_t *)NULL);
 588 
 589         return (stat);
 590 }
 591 
 592 /*
 593  * Suspend NLM client/server in the given zone.
 594  *
 595  * During suspend operation we mark those hosts
 596  * that have any locks with NLM_NH_SUSPEND flags,
 597  * so that they can be checked later, when resume
 598  * operation occurs.
 599  */
 600 static void
 601 nlm_suspend_zone(struct nlm_globals *g)
 602 {
 603         struct nlm_host *hostp;
 604         struct nlm_host_list all_hosts;
 605 
 606         /*
 607          * Note that while we're doing suspend, GC thread is active
 608          * and it can destroy some hosts while we're walking through
 609          * the hosts tree. To prevent that and make suspend logic
 610          * a bit more simple we put all hosts to local "all_hosts"
 611          * list and increment reference counter of each host.
 612          * This guaranties that no hosts will be released while
 613          * we're doing suspend.
 614          * NOTE: reference of each host must be dropped during
 615          * resume operation.
 616          */
 617         TAILQ_INIT(&all_hosts);
 618         mutex_enter(&g->lock);
 619         for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
 620             hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
 621                 /*
 622                  * If host is idle, remove it from idle list and
 623                  * clear idle flag. That is done to prevent GC
 624                  * from touching this host.
 625                  */
 626                 if (hostp->nh_flags & NLM_NH_INIDLE) {
 627                         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
 628                         hostp->nh_flags &= ~NLM_NH_INIDLE;
 629                 }
 630 
 631                 hostp->nh_refs++;
 632                 TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
 633         }
 634 
 635         /*
 636          * Now we can walk through all hosts on the system
 637          * with zone globals lock released. The fact the
 638          * we have taken a reference to each host guaranties
 639          * that no hosts can be destroyed during that process.
 640          */
 641         mutex_exit(&g->lock);
 642         while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
 643                 mutex_enter(&hostp->nh_lock);
 644                 if (nlm_host_has_locks(hostp))
 645                         hostp->nh_flags |= NLM_NH_SUSPEND;
 646 
 647                 mutex_exit(&hostp->nh_lock);
 648                 TAILQ_REMOVE(&all_hosts, hostp, nh_link);
 649         }
 650 }
 651 
 652 /*
 653  * Resume NLM hosts for the given zone.
 654  *
 655  * nlm_resume_zone() is called after hosts were suspended
 656  * (see nlm_suspend_zone) and its main purpose to check
 657  * whether remote locks owned by hosts are still in consistent
 658  * state. If they aren't, resume function tries to reclaim
 659  * locks (for client side hosts) and clean locks (for
 660  * server side hosts).
 661  */
 662 static void
 663 nlm_resume_zone(struct nlm_globals *g)
 664 {
 665         struct nlm_host *hostp, *h_next;
 666 
 667         mutex_enter(&g->lock);
 668         hostp = avl_first(&g->nlm_hosts_tree);
 669 
 670         /*
 671          * In nlm_suspend_zone() the reference counter of each
 672          * host was incremented, so we can safely iterate through
 673          * all hosts without worrying that any host we touch will
 674          * be removed at the moment.
 675          */
 676         while (hostp != NULL) {
 677                 struct nlm_nsm nsm;
 678                 enum clnt_stat stat;
 679                 int32_t sm_state;
 680                 int error;
 681                 bool_t resume_failed = FALSE;
 682 
 683                 h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
 684                 mutex_exit(&g->lock);
 685 
 686                 DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
 687 
 688                 /*
 689                  * Suspend operation marked that the host doesn't
 690                  * have any locks. Skip it.
 691                  */
 692                 if (!(hostp->nh_flags & NLM_NH_SUSPEND))
 693                         goto cycle_end;
 694 
 695                 error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
 696                 if (error != 0) {
 697                         NLM_ERR("Resume: Failed to contact to NSM of host %s "
 698                             "[error=%d]\n", hostp->nh_name, error);
 699                         resume_failed = TRUE;
 700                         goto cycle_end;
 701                 }
 702 
 703                 stat = nlm_nsm_stat(&nsm, &sm_state);
 704                 if (stat != RPC_SUCCESS) {
 705                         NLM_ERR("Resume: Failed to call SM_STAT operation for "
 706                             "host %s [stat=%d]\n", hostp->nh_name, stat);
 707                         resume_failed = TRUE;
 708                         nlm_nsm_fini(&nsm);
 709                         goto cycle_end;
 710                 }
 711 
 712                 if (sm_state != hostp->nh_state) {
 713                         /*
 714                          * Current SM state of the host isn't equal
 715                          * to the one host had when it was suspended.
 716                          * Probably it was rebooted. Try to reclaim
 717                          * locks if the host has any on its client side.
 718                          * Also try to clean up its server side locks
 719                          * (if the host has any).
 720                          */
 721                         nlm_host_notify_client(hostp, sm_state);
 722                         nlm_host_notify_server(hostp, sm_state);
 723                 }
 724 
 725                 nlm_nsm_fini(&nsm);
 726 
 727 cycle_end:
 728                 if (resume_failed) {
 729                         /*
 730                          * Resume failed for the given host.
 731                          * Just clean up all resources it owns.
 732                          */
 733                         nlm_host_notify_server(hostp, 0);
 734                         nlm_client_cancel_all(g, hostp);
 735                 }
 736 
 737                 hostp->nh_flags &= ~NLM_NH_SUSPEND;
 738                 nlm_host_release(g, hostp);
 739                 hostp = h_next;
 740                 mutex_enter(&g->lock);
 741         }
 742 
 743         mutex_exit(&g->lock);
 744 }
 745 
 746 /*
 747  * NLM functions responsible for operations on NSM handle.
 748  */
 749 
 750 /*
 751  * Initialize knetconfig that is used for communication
 752  * with local statd via loopback interface.
 753  */
 754 static int
 755 nlm_init_local_knc(struct knetconfig *knc)
 756 {
 757         int error;
 758         vnode_t *vp;
 759 
 760         bzero(knc, sizeof (*knc));
 761         error = lookupname("/dev/tcp", UIO_SYSSPACE,
 762             FOLLOW, NULLVPP, &vp);
 763         if (error != 0)
 764                 return (error);
 765 
 766         knc->knc_semantics = NC_TPI_COTS;
 767         knc->knc_protofmly = NC_INET;
 768         knc->knc_proto = NC_TCP;
 769         knc->knc_rdev = vp->v_rdev;
 770         VN_RELE(vp);
 771 
 772 
 773         return (0);
 774 }
 775 
 776 /*
 777  * Initialize NSM handle that will be used to talk
 778  * to local statd via loopback interface.
 779  */
 780 static int
 781 nlm_nsm_init_local(struct nlm_nsm *nsm)
 782 {
 783         int error;
 784         struct knetconfig knc;
 785         struct sockaddr_in sin;
 786         struct netbuf nb;
 787 
 788         error = nlm_init_local_knc(&knc);
 789         if (error != 0)
 790                 return (error);
 791 
 792         bzero(&sin, sizeof (sin));
 793         sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 794         sin.sin_family = AF_INET;
 795 
 796         nb.buf = (char *)&sin;
 797         nb.len = nb.maxlen = sizeof (sin);
 798 
 799         return (nlm_nsm_init(nsm, &knc, &nb));
 800 }
 801 
 802 /*
 803  * Initialize NSM handle used for talking to statd
 804  */
 805 static int
 806 nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
 807 {
 808         enum clnt_stat stat;
 809         int error, retries;
 810 
 811         bzero(nsm, sizeof (*nsm));
 812         nsm->ns_knc = *knc;
 813         nlm_copy_netbuf(&nsm->ns_addr, nb);
 814 
 815         /*
 816          * Try several times to get the port of statd service,
 817          * If rpcbind_getaddr returns  RPC_PROGNOTREGISTERED,
 818          * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
 819          * seconds berofore.
 820          */
 821         for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
 822                 stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
 823                     SM_VERS, &nsm->ns_addr);
 824                 if (stat != RPC_SUCCESS) {
 825                         if (stat == RPC_PROGNOTREGISTERED) {
 826                                 delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
 827                                 continue;
 828                         }
 829                 }
 830 
 831                 break;
 832         }
 833 
 834         if (stat != RPC_SUCCESS) {
 835                 DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
 836                     int, retries);
 837                 error = ENOENT;
 838                 goto error;
 839         }
 840 
 841         /*
 842          * Create an RPC handle that'll be used for communication with local
 843          * statd using the status monitor protocol.
 844          */
 845         error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
 846             0, NLM_RPC_RETRIES, zone_kcred(), &nsm->ns_handle);
 847         if (error != 0)
 848                 goto error;
 849 
 850         /*
 851          * Create an RPC handle that'll be used for communication with the
 852          * local statd using the address registration protocol.
 853          */
 854         error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
 855             NSM_ADDR_V1, 0, NLM_RPC_RETRIES, zone_kcred(),
 856             &nsm->ns_addr_handle);
 857         if (error != 0)
 858                 goto error;
 859 
 860         sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL);
 861         return (0);
 862 
 863 error:
 864         kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
 865         if (nsm->ns_handle) {
 866                 ASSERT(nsm->ns_handle->cl_auth != NULL);
 867                 auth_destroy(nsm->ns_handle->cl_auth);
 868                 CLNT_DESTROY(nsm->ns_handle);
 869         }
 870 
 871         return (error);
 872 }
 873 
 874 static void
 875 nlm_nsm_fini(struct nlm_nsm *nsm)
 876 {
 877         kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
 878         if (nsm->ns_addr_handle->cl_auth != NULL)
 879                 auth_destroy(nsm->ns_addr_handle->cl_auth);
 880         CLNT_DESTROY(nsm->ns_addr_handle);
 881         nsm->ns_addr_handle = NULL;
 882         if (nsm->ns_handle->cl_auth != NULL)
 883                 auth_destroy(nsm->ns_handle->cl_auth);
 884         CLNT_DESTROY(nsm->ns_handle);
 885         nsm->ns_handle = NULL;
 886         sema_destroy(&nsm->ns_sem);
 887 }
 888 
 889 static enum clnt_stat
 890 nlm_nsm_simu_crash(struct nlm_nsm *nsm)
 891 {
 892         enum clnt_stat stat;
 893 
 894         sema_p(&nsm->ns_sem);
 895         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 896         stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
 897         sema_v(&nsm->ns_sem);
 898 
 899         return (stat);
 900 }
 901 
 902 static enum clnt_stat
 903 nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
 904 {
 905         struct sm_name args;
 906         struct sm_stat_res res;
 907         enum clnt_stat stat;
 908 
 909         args.mon_name = uts_nodename();
 910         bzero(&res, sizeof (res));
 911 
 912         sema_p(&nsm->ns_sem);
 913         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 914         stat = sm_stat_1(&args, &res, nsm->ns_handle);
 915         sema_v(&nsm->ns_sem);
 916 
 917         if (stat == RPC_SUCCESS)
 918                 *out_stat = res.state;
 919 
 920         return (stat);
 921 }
 922 
 923 static enum clnt_stat
 924 nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
 925 {
 926         struct mon args;
 927         struct sm_stat_res res;
 928         enum clnt_stat stat;
 929 
 930         bzero(&args, sizeof (args));
 931         bzero(&res, sizeof (res));
 932 
 933         args.mon_id.mon_name = hostname;
 934         args.mon_id.my_id.my_name = uts_nodename();
 935         args.mon_id.my_id.my_prog = NLM_PROG;
 936         args.mon_id.my_id.my_vers = NLM_SM;
 937         args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
 938         bcopy(&priv, args.priv, sizeof (priv));
 939 
 940         sema_p(&nsm->ns_sem);
 941         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 942         stat = sm_mon_1(&args, &res, nsm->ns_handle);
 943         sema_v(&nsm->ns_sem);
 944 
 945         return (stat);
 946 }
 947 
 948 static enum clnt_stat
 949 nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
 950 {
 951         struct mon_id args;
 952         struct sm_stat res;
 953         enum clnt_stat stat;
 954 
 955         bzero(&args, sizeof (args));
 956         bzero(&res, sizeof (res));
 957 
 958         args.mon_name = hostname;
 959         args.my_id.my_name = uts_nodename();
 960         args.my_id.my_prog = NLM_PROG;
 961         args.my_id.my_vers = NLM_SM;
 962         args.my_id.my_proc = NLM_SM_NOTIFY1;
 963 
 964         sema_p(&nsm->ns_sem);
 965         nlm_nsm_clnt_init(nsm->ns_handle, nsm);
 966         stat = sm_unmon_1(&args, &res, nsm->ns_handle);
 967         sema_v(&nsm->ns_sem);
 968 
 969         return (stat);
 970 }
 971 
 972 static enum clnt_stat
 973 nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
 974 {
 975         struct reg1args args = { 0 };
 976         struct reg1res res = { 0 };
 977         enum clnt_stat stat;
 978 
 979         args.family = family;
 980         args.name = name;
 981         args.address = *address;
 982 
 983         sema_p(&nsm->ns_sem);
 984         nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
 985         stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
 986         sema_v(&nsm->ns_sem);
 987 
 988         return (stat);
 989 }
 990 
 991 /*
 992  * Get NLM vhold object corresponding to vnode "vp".
 993  * If no such object was found, create a new one.
 994  *
 995  * The purpose of this function is to associate vhold
 996  * object with given vnode, so that:
 997  * 1) vnode is hold (VN_HOLD) while vhold object is alive.
 998  * 2) host has a track of all vnodes it touched by lock
 999  *    or share operations. These vnodes are accessible
1000  *    via collection of vhold objects.
1001  */
1002 struct nlm_vhold *
1003 nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
1004 {
1005         struct nlm_vhold *nvp, *new_nvp = NULL;
1006 
1007         mutex_enter(&hostp->nh_lock);
1008         nvp = nlm_vhold_find_locked(hostp, vp);
1009         if (nvp != NULL)
1010                 goto out;
1011 
1012         /* nlm_vhold wasn't found, then create a new one */
1013         mutex_exit(&hostp->nh_lock);
1014         new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
1015 
1016         /*
1017          * Check if another thread has already
1018          * created the same nlm_vhold.
1019          */
1020         mutex_enter(&hostp->nh_lock);
1021         nvp = nlm_vhold_find_locked(hostp, vp);
1022         if (nvp == NULL) {
1023                 nvp = new_nvp;
1024                 new_nvp = NULL;
1025 
1026                 TAILQ_INIT(&nvp->nv_slreqs);
1027                 nvp->nv_vp = vp;
1028                 nvp->nv_refcnt = 1;
1029                 VN_HOLD(nvp->nv_vp);
1030 
1031                 VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
1032                     (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
1033                 TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
1034         }
1035 
1036 out:
1037         mutex_exit(&hostp->nh_lock);
1038         if (new_nvp != NULL)
1039                 kmem_cache_free(nlm_vhold_cache, new_nvp);
1040 
1041         return (nvp);
1042 }
1043 
1044 /*
1045  * Drop a reference to vhold object nvp.
1046  */
1047 void
1048 nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1049 {
1050         if (nvp == NULL)
1051                 return;
1052 
1053         mutex_enter(&hostp->nh_lock);
1054         ASSERT(nvp->nv_refcnt > 0);
1055         nvp->nv_refcnt--;
1056 
1057         /*
1058          * If these conditions are met, the vhold is obviously unused and we
1059          * will destroy it.  In a case either v_filocks and/or v_shrlocks is
1060          * non-NULL the vhold might still be unused by the host, but it is
1061          * expensive to check that.  We defer such check until the host is
1062          * idle.  The expensive check is done in the NLM garbage collector.
1063          */
1064         if (nvp->nv_refcnt == 0 &&
1065             nvp->nv_vp->v_filocks == NULL &&
1066             nvp->nv_vp->v_shrlocks == NULL) {
1067                 nlm_vhold_destroy(hostp, nvp);
1068         }
1069 
1070         mutex_exit(&hostp->nh_lock);
1071 }
1072 
1073 /*
1074  * Clean all locks and share reservations on the
1075  * given vhold object that were acquired by the
1076  * given sysid
1077  */
1078 static void
1079 nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1080 {
1081         cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1082         cleanshares_by_sysid(nvp->nv_vp, sysid);
1083 }
1084 
1085 static void
1086 nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1087 {
1088         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1089 
1090         ASSERT(nvp->nv_refcnt == 0);
1091         ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1092 
1093         VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1094             (mod_hash_key_t)nvp->nv_vp,
1095             (mod_hash_val_t)&nvp) == 0);
1096 
1097         TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1098         VN_RELE(nvp->nv_vp);
1099         nvp->nv_vp = NULL;
1100 
1101         kmem_cache_free(nlm_vhold_cache, nvp);
1102 }
1103 
1104 /*
1105  * Return TRUE if the given vhold is busy.
1106  * Vhold object is considered to be "busy" when
1107  * all the following conditions hold:
1108  * 1) No one uses it at the moment;
1109  * 2) It hasn't any locks;
1110  * 3) It hasn't any share reservations;
1111  */
1112 static bool_t
1113 nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1114 {
1115         vnode_t *vp;
1116         int sysid;
1117 
1118         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1119 
1120         if (nvp->nv_refcnt > 0)
1121                 return (TRUE);
1122 
1123         vp = nvp->nv_vp;
1124         sysid = hostp->nh_sysid;
1125         if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1126             shr_has_remote_shares(vp, sysid))
1127                 return (TRUE);
1128 
1129         return (FALSE);
1130 }
1131 
1132 /* ARGSUSED */
1133 static int
1134 nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1135 {
1136         struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1137 
1138         bzero(nvp, sizeof (*nvp));
1139         return (0);
1140 }
1141 
1142 /* ARGSUSED */
1143 static void
1144 nlm_vhold_dtor(void *datap, void *cdrarg)
1145 {
1146         struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1147 
1148         ASSERT(nvp->nv_refcnt == 0);
1149         ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1150         ASSERT(nvp->nv_vp == NULL);
1151 }
1152 
1153 struct nlm_vhold *
1154 nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1155 {
1156         struct nlm_vhold *nvp = NULL;
1157 
1158         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1159         (void) mod_hash_find(hostp->nh_vholds_by_vp,
1160             (mod_hash_key_t)vp,
1161             (mod_hash_val_t)&nvp);
1162 
1163         if (nvp != NULL)
1164                 nvp->nv_refcnt++;
1165 
1166         return (nvp);
1167 }
1168 
1169 /*
1170  * NLM host functions
1171  */
1172 static void
1173 nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1174 {
1175         ASSERT(src->len <= src->maxlen);
1176 
1177         dst->maxlen = src->maxlen;
1178         dst->len = src->len;
1179         dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1180         bcopy(src->buf, dst->buf, src->len);
1181 }
1182 
1183 /* ARGSUSED */
1184 static int
1185 nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1186 {
1187         struct nlm_host *hostp = (struct nlm_host *)datap;
1188 
1189         bzero(hostp, sizeof (*hostp));
1190         return (0);
1191 }
1192 
1193 /* ARGSUSED */
1194 static void
1195 nlm_host_dtor(void *datap, void *cdrarg)
1196 {
1197         struct nlm_host *hostp = (struct nlm_host *)datap;
1198         ASSERT(hostp->nh_refs == 0);
1199 }
1200 
1201 static void
1202 nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1203 {
1204         ASSERT(hostp->nh_refs == 0);
1205         ASSERT(hostp->nh_flags & NLM_NH_INIDLE);
1206 
1207         avl_remove(&g->nlm_hosts_tree, hostp);
1208         VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1209             (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1210             (mod_hash_val_t)&hostp) == 0);
1211         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1212         hostp->nh_flags &= ~NLM_NH_INIDLE;
1213 }
1214 
1215 /*
1216  * Free resources used by a host. This is called after the reference
1217  * count has reached zero so it doesn't need to worry about locks.
1218  */
1219 static void
1220 nlm_host_destroy(struct nlm_host *hostp)
1221 {
1222         ASSERT(hostp->nh_name != NULL);
1223         ASSERT(hostp->nh_netid != NULL);
1224         ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1225 
1226         strfree(hostp->nh_name);
1227         strfree(hostp->nh_netid);
1228         kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1229 
1230         if (hostp->nh_sysid != LM_NOSYSID)
1231                 nlm_sysid_free(hostp->nh_sysid);
1232 
1233         nlm_rpc_cache_destroy(hostp);
1234 
1235         ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1236         mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1237 
1238         mutex_destroy(&hostp->nh_lock);
1239         cv_destroy(&hostp->nh_rpcb_cv);
1240         cv_destroy(&hostp->nh_recl_cv);
1241 
1242         kmem_cache_free(nlm_hosts_cache, hostp);
1243 }
1244 
1245 /*
1246  * Cleanup SERVER-side state after a client restarts,
1247  * or becomes unresponsive, or whatever.
1248  *
1249  * We unlock any active locks owned by the host.
1250  * When rpc.lockd is shutting down,
1251  * this function is called with newstate set to zero
1252  * which allows us to cancel any pending async locks
1253  * and clear the locking state.
1254  *
1255  * When "state" is 0, we don't update host's state,
1256  * but cleanup all remote locks on the host.
1257  * It's useful to call this function for resources
1258  * cleanup.
1259  */
1260 void
1261 nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1262 {
1263         struct nlm_vhold *nvp;
1264         struct nlm_slreq *slr;
1265         struct nlm_slreq_list slreqs2free;
1266 
1267         TAILQ_INIT(&slreqs2free);
1268         mutex_enter(&hostp->nh_lock);
1269         if (state != 0)
1270                 hostp->nh_state = state;
1271 
1272         TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1273 
1274                 /* cleanup sleeping requests at first */
1275                 while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1276                         TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1277 
1278                         /*
1279                          * Instead of freeing cancelled sleeping request
1280                          * here, we add it to the linked list created
1281                          * on the stack in order to do all frees outside
1282                          * the critical section.
1283                          */
1284                         TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1285                 }
1286 
1287                 nvp->nv_refcnt++;
1288                 mutex_exit(&hostp->nh_lock);
1289 
1290                 nlm_vhold_clean(nvp, hostp->nh_sysid);
1291 
1292                 mutex_enter(&hostp->nh_lock);
1293                 nvp->nv_refcnt--;
1294         }
1295 
1296         mutex_exit(&hostp->nh_lock);
1297         while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1298                 TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1299                 kmem_free(slr, sizeof (*slr));
1300         }
1301 }
1302 
1303 /*
1304  * Cleanup CLIENT-side state after a server restarts,
1305  * or becomes unresponsive, or whatever.
1306  *
1307  * This is called by the local NFS statd when we receive a
1308  * host state change notification.  (also nlm_svc_stopping)
1309  *
1310  * Deal with a server restart.  If we are stopping the
1311  * NLM service, we'll have newstate == 0, and will just
1312  * cancel all our client-side lock requests.  Otherwise,
1313  * start the "recovery" process to reclaim any locks
1314  * we hold on this server.
1315  */
1316 void
1317 nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1318 {
1319         mutex_enter(&hostp->nh_lock);
1320         hostp->nh_state = state;
1321         if (hostp->nh_flags & NLM_NH_RECLAIM) {
1322                 /*
1323                  * Either host's state is up to date or
1324                  * host is already in recovery.
1325                  */
1326                 mutex_exit(&hostp->nh_lock);
1327                 return;
1328         }
1329 
1330         hostp->nh_flags |= NLM_NH_RECLAIM;
1331 
1332         /*
1333          * Host will be released by the recovery thread,
1334          * thus we need to increment refcount.
1335          */
1336         hostp->nh_refs++;
1337         mutex_exit(&hostp->nh_lock);
1338 
1339         (void) zthread_create(NULL, 0, nlm_reclaimer,
1340             hostp, 0, minclsyspri);
1341 }
1342 
1343 /*
1344  * The function is called when NLM client detects that
1345  * server has entered in grace period and client needs
1346  * to wait until reclamation process (if any) does
1347  * its job.
1348  */
1349 int
1350 nlm_host_wait_grace(struct nlm_host *hostp)
1351 {
1352         struct nlm_globals *g;
1353         int error = 0;
1354 
1355         g = zone_getspecific(nlm_zone_key, curzone);
1356         mutex_enter(&hostp->nh_lock);
1357 
1358         do {
1359                 int rc;
1360 
1361                 rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1362                     &hostp->nh_lock, ddi_get_lbolt() +
1363                     SEC_TO_TICK(g->retrans_tmo));
1364 
1365                 if (rc == 0) {
1366                         error = EINTR;
1367                         break;
1368                 }
1369         } while (hostp->nh_flags & NLM_NH_RECLAIM);
1370 
1371         mutex_exit(&hostp->nh_lock);
1372         return (error);
1373 }
1374 
1375 /*
1376  * Create a new NLM host.
1377  *
1378  * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1379  * which needs both a knetconfig and an address when creating
1380  * endpoints. Thus host object stores both knetconfig and
1381  * netid.
1382  */
1383 static struct nlm_host *
1384 nlm_host_create(char *name, const char *netid,
1385     struct knetconfig *knc, struct netbuf *naddr)
1386 {
1387         struct nlm_host *host;
1388 
1389         host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1390 
1391         mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1392         cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1393         cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1394 
1395         host->nh_sysid = LM_NOSYSID;
1396         host->nh_refs = 1;
1397         host->nh_name = strdup(name);
1398         host->nh_netid = strdup(netid);
1399         host->nh_knc = *knc;
1400         nlm_copy_netbuf(&host->nh_addr, naddr);
1401 
1402         host->nh_state = 0;
1403         host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1404         host->nh_flags = 0;
1405 
1406         host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1407             32, mod_hash_null_valdtor, sizeof (vnode_t));
1408 
1409         TAILQ_INIT(&host->nh_vholds_list);
1410         TAILQ_INIT(&host->nh_rpchc);
1411 
1412         return (host);
1413 }
1414 
1415 /*
1416  * Cancel all client side sleeping locks owned by given host.
1417  */
1418 void
1419 nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1420 {
1421         struct nlm_slock *nslp;
1422 
1423         mutex_enter(&g->lock);
1424         TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1425                 if (nslp->nsl_host == hostp) {
1426                         nslp->nsl_state = NLM_SL_CANCELLED;
1427                         cv_broadcast(&nslp->nsl_cond);
1428                 }
1429         }
1430 
1431         mutex_exit(&g->lock);
1432 }
1433 
1434 /*
1435  * Garbage collect stale vhold objects.
1436  *
1437  * In other words check whether vnodes that are
1438  * held by vhold objects still have any locks
1439  * or shares or still in use. If they aren't,
1440  * just destroy them.
1441  */
1442 static void
1443 nlm_host_gc_vholds(struct nlm_host *hostp)
1444 {
1445         struct nlm_vhold *nvp;
1446 
1447         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1448 
1449         nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1450         while (nvp != NULL) {
1451                 struct nlm_vhold *nvp_tmp;
1452 
1453                 if (nlm_vhold_busy(hostp, nvp)) {
1454                         nvp = TAILQ_NEXT(nvp, nv_link);
1455                         continue;
1456                 }
1457 
1458                 nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1459                 nlm_vhold_destroy(hostp, nvp);
1460                 nvp = nvp_tmp;
1461         }
1462 }
1463 
1464 /*
1465  * Check whether the given host has any
1466  * server side locks or share reservations.
1467  */
1468 static bool_t
1469 nlm_host_has_srv_locks(struct nlm_host *hostp)
1470 {
1471         /*
1472          * It's cheap and simple: if server has
1473          * any locks/shares there must be vhold
1474          * object storing the affected vnode.
1475          *
1476          * NOTE: We don't need to check sleeping
1477          * locks on the server side, because if
1478          * server side sleeping lock is alive,
1479          * there must be a vhold object corresponding
1480          * to target vnode.
1481          */
1482         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1483         if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1484                 return (TRUE);
1485 
1486         return (FALSE);
1487 }
1488 
1489 /*
1490  * Check whether the given host has any client side
1491  * locks or share reservations.
1492  */
1493 static bool_t
1494 nlm_host_has_cli_locks(struct nlm_host *hostp)
1495 {
1496         ASSERT(MUTEX_HELD(&hostp->nh_lock));
1497 
1498         /*
1499          * XXX: It's not the way I'd like to do the check,
1500          * because flk_sysid_has_locks() can be very
1501          * expensive by design. Unfortunatelly it iterates
1502          * through all locks on the system, doesn't matter
1503          * were they made on remote system via NLM or
1504          * on local system via reclock. To understand the
1505          * problem, consider that there're dozens of thousands
1506          * of locks that are made on some ZFS dataset. And there's
1507          * another dataset shared by NFS where NLM client had locks
1508          * some time ago, but doesn't have them now.
1509          * In this case flk_sysid_has_locks() will iterate
1510          * thrught dozens of thousands locks until it returns us
1511          * FALSE.
1512          * Oh, I hope that in shiny future somebody will make
1513          * local lock manager (os/flock.c) better, so that
1514          * it'd be more friedly to remote locks and
1515          * flk_sysid_has_locks() wouldn't be so expensive.
1516          */
1517         if (flk_sysid_has_locks(hostp->nh_sysid |
1518             LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1519                 return (TRUE);
1520 
1521         /*
1522          * Check whether host has any share reservations
1523          * registered on the client side.
1524          */
1525         if (hostp->nh_shrlist != NULL)
1526                 return (TRUE);
1527 
1528         return (FALSE);
1529 }
1530 
1531 /*
1532  * Determine whether the given host owns any
1533  * locks or share reservations.
1534  */
1535 static bool_t
1536 nlm_host_has_locks(struct nlm_host *hostp)
1537 {
1538         if (nlm_host_has_srv_locks(hostp))
1539                 return (TRUE);
1540 
1541         return (nlm_host_has_cli_locks(hostp));
1542 }
1543 
1544 /*
1545  * This function compares only addresses of two netbufs
1546  * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1547  * Port part of netbuf is ignored.
1548  *
1549  * Return values:
1550  *  -1: nb1's address is "smaller" than nb2's
1551  *   0: addresses are equal
1552  *   1: nb1's address is "greater" than nb2's
1553  */
1554 static int
1555 nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1556 {
1557         union nlm_addr {
1558                 struct sockaddr sa;
1559                 struct sockaddr_in sin;
1560                 struct sockaddr_in6 sin6;
1561         } *na1, *na2;
1562         int res;
1563 
1564         /* LINTED E_BAD_PTR_CAST_ALIGN */
1565         na1 = (union nlm_addr *)nb1->buf;
1566         /* LINTED E_BAD_PTR_CAST_ALIGN */
1567         na2 = (union nlm_addr *)nb2->buf;
1568 
1569         if (na1->sa.sa_family < na2->sa.sa_family)
1570                 return (-1);
1571         if (na1->sa.sa_family > na2->sa.sa_family)
1572                 return (1);
1573 
1574         switch (na1->sa.sa_family) {
1575         case AF_INET:
1576                 res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1577                     sizeof (na1->sin.sin_addr));
1578                 break;
1579         case AF_INET6:
1580                 res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1581                     sizeof (na1->sin6.sin6_addr));
1582                 break;
1583         default:
1584                 VERIFY(0);
1585                 return (0);
1586         }
1587 
1588         return (SIGN(res));
1589 }
1590 
1591 /*
1592  * Compare two nlm hosts.
1593  * Return values:
1594  * -1: host1 is "smaller" than host2
1595  *  0: host1 is equal to host2
1596  *  1: host1 is "greater" than host2
1597  */
1598 int
1599 nlm_host_cmp(const void *p1, const void *p2)
1600 {
1601         struct nlm_host *h1 = (struct nlm_host *)p1;
1602         struct nlm_host *h2 = (struct nlm_host *)p2;
1603         int res;
1604 
1605         res = strcmp(h1->nh_netid, h2->nh_netid);
1606         if (res != 0)
1607                 return (SIGN(res));
1608 
1609         res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1610         return (res);
1611 }
1612 
1613 /*
1614  * Find the host specified by...  (see below)
1615  * If found, increment the ref count.
1616  */
1617 static struct nlm_host *
1618 nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1619     struct netbuf *naddr, avl_index_t *wherep)
1620 {
1621         struct nlm_host *hostp, key;
1622         avl_index_t pos;
1623 
1624         ASSERT(MUTEX_HELD(&g->lock));
1625 
1626         key.nh_netid = (char *)netid;
1627         key.nh_addr.buf = naddr->buf;
1628         key.nh_addr.len = naddr->len;
1629         key.nh_addr.maxlen = naddr->maxlen;
1630 
1631         hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1632 
1633         if (hostp != NULL) {
1634                 /*
1635                  * Host is inuse now. Remove it from idle
1636                  * hosts list if needed.
1637                  */
1638                 if (hostp->nh_flags & NLM_NH_INIDLE) {
1639                         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1640                         hostp->nh_flags &= ~NLM_NH_INIDLE;
1641                 }
1642 
1643                 hostp->nh_refs++;
1644         }
1645         if (wherep != NULL)
1646                 *wherep = pos;
1647 
1648         return (hostp);
1649 }
1650 
1651 /*
1652  * Find NLM host for the given name and address.
1653  */
1654 struct nlm_host *
1655 nlm_host_find(struct nlm_globals *g, const char *netid,
1656     struct netbuf *addr)
1657 {
1658         struct nlm_host *hostp = NULL;
1659 
1660         mutex_enter(&g->lock);
1661         if (g->run_status != NLM_ST_UP)
1662                 goto out;
1663 
1664         hostp = nlm_host_find_locked(g, netid, addr, NULL);
1665 
1666 out:
1667         mutex_exit(&g->lock);
1668         return (hostp);
1669 }
1670 
1671 
1672 /*
1673  * Find or create an NLM host for the given name and address.
1674  *
1675  * The remote host is determined by all of: name, netid, address.
1676  * Note that the netid is whatever nlm_svc_add_ep() gave to
1677  * svc_tli_kcreate() for the service binding.  If any of these
1678  * are different, allocate a new host (new sysid).
1679  */
1680 struct nlm_host *
1681 nlm_host_findcreate(struct nlm_globals *g, char *name,
1682     const char *netid, struct netbuf *addr)
1683 {
1684         int err;
1685         struct nlm_host *host, *newhost = NULL;
1686         struct knetconfig knc;
1687         avl_index_t where;
1688 
1689         mutex_enter(&g->lock);
1690         if (g->run_status != NLM_ST_UP) {
1691                 mutex_exit(&g->lock);
1692                 return (NULL);
1693         }
1694 
1695         host = nlm_host_find_locked(g, netid, addr, NULL);
1696         mutex_exit(&g->lock);
1697         if (host != NULL)
1698                 return (host);
1699 
1700         err = nlm_knc_from_netid(netid, &knc);
1701         if (err != 0)
1702                 return (NULL);
1703         /*
1704          * Do allocations (etc.) outside of mutex,
1705          * and then check again before inserting.
1706          */
1707         newhost = nlm_host_create(name, netid, &knc, addr);
1708         newhost->nh_sysid = nlm_sysid_alloc();
1709         if (newhost->nh_sysid == LM_NOSYSID)
1710                 goto out;
1711 
1712         mutex_enter(&g->lock);
1713         host = nlm_host_find_locked(g, netid, addr, &where);
1714         if (host == NULL) {
1715                 host = newhost;
1716                 newhost = NULL;
1717 
1718                 /*
1719                  * Insert host to the hosts AVL tree that is
1720                  * used to lookup by <netid, address> pair.
1721                  */
1722                 avl_insert(&g->nlm_hosts_tree, host, where);
1723 
1724                 /*
1725                  * Insert host to the hosts hash table that is
1726                  * used to lookup host by sysid.
1727                  */
1728                 VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1729                     (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1730                     (mod_hash_val_t)host) == 0);
1731         }
1732 
1733         mutex_exit(&g->lock);
1734 
1735 out:
1736         if (newhost != NULL) {
1737                 /*
1738                  * We do not need the preallocated nlm_host
1739                  * so decrement the reference counter
1740                  * and destroy it.
1741                  */
1742                 newhost->nh_refs--;
1743                 nlm_host_destroy(newhost);
1744         }
1745 
1746         return (host);
1747 }
1748 
1749 /*
1750  * Find the NLM host that matches the value of 'sysid'.
1751  * If found, return it with a new ref,
1752  * else return NULL.
1753  */
1754 struct nlm_host *
1755 nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1756 {
1757         struct nlm_host *hostp = NULL;
1758 
1759         mutex_enter(&g->lock);
1760         if (g->run_status != NLM_ST_UP)
1761                 goto out;
1762 
1763         (void) mod_hash_find(g->nlm_hosts_hash,
1764             (mod_hash_key_t)(uintptr_t)sysid,
1765             (mod_hash_val_t)&hostp);
1766 
1767         if (hostp == NULL)
1768                 goto out;
1769 
1770         /*
1771          * Host is inuse now. Remove it
1772          * from idle hosts list if needed.
1773          */
1774         if (hostp->nh_flags & NLM_NH_INIDLE) {
1775                 TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1776                 hostp->nh_flags &= ~NLM_NH_INIDLE;
1777         }
1778 
1779         hostp->nh_refs++;
1780 
1781 out:
1782         mutex_exit(&g->lock);
1783         return (hostp);
1784 }
1785 
1786 /*
1787  * Release the given host.
1788  * I.e. drop a reference that was taken earlier by one of
1789  * the following functions: nlm_host_findcreate(), nlm_host_find(),
1790  * nlm_host_find_by_sysid().
1791  *
1792  * When the very last reference is dropped, host is moved to
1793  * so-called "idle state". All hosts that are in idle state
1794  * have an idle timeout. If timeout is expired, GC thread
1795  * checks whether hosts have any locks and if they heven't
1796  * any, it removes them.
1797  * NOTE: only unused hosts can be in idle state.
1798  */
1799 static void
1800 nlm_host_release_locked(struct nlm_globals *g, struct nlm_host *hostp)
1801 {
1802         if (hostp == NULL)
1803                 return;
1804 
1805         ASSERT(MUTEX_HELD(&g->lock));
1806         ASSERT(hostp->nh_refs > 0);
1807 
1808         hostp->nh_refs--;
1809         if (hostp->nh_refs != 0)
1810                 return;
1811 
1812         /*
1813          * The very last reference to the host was dropped,
1814          * thus host is unused now. Set its idle timeout
1815          * and move it to the idle hosts LRU list.
1816          */
1817         hostp->nh_idle_timeout = ddi_get_lbolt() +
1818             SEC_TO_TICK(g->cn_idle_tmo);
1819 
1820         ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1821         TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1822         hostp->nh_flags |= NLM_NH_INIDLE;
1823 }
1824 
1825 void
1826 nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1827 {
1828         if (hostp == NULL)
1829                 return;
1830 
1831         mutex_enter(&g->lock);
1832         nlm_host_release_locked(g, hostp);
1833         mutex_exit(&g->lock);
1834 }
1835 
1836 /*
1837  * Unregister this NLM host (NFS client) with the local statd
1838  * due to idleness (no locks held for a while).
1839  */
1840 void
1841 nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1842 {
1843         enum clnt_stat stat;
1844 
1845         VERIFY(host->nh_refs == 0);
1846         if (!(host->nh_flags & NLM_NH_MONITORED))
1847                 return;
1848 
1849         host->nh_flags &= ~NLM_NH_MONITORED;
1850         stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1851         if (stat != RPC_SUCCESS) {
1852                 NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1853                 return;
1854         }
1855 }
1856 
1857 /*
1858  * Ask the local NFS statd to begin monitoring this host.
1859  * It will call us back when that host restarts, using the
1860  * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1861  * which is handled in nlm_do_notify1().
1862  */
1863 void
1864 nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1865 {
1866         int family;
1867         netobj obj;
1868         enum clnt_stat stat;
1869 
1870         if (state != 0 && host->nh_state == 0) {
1871                 /*
1872                  * This is the first time we have seen an NSM state
1873                  * Value for this host. We record it here to help
1874                  * detect host reboots.
1875                  */
1876                 host->nh_state = state;
1877         }
1878 
1879         mutex_enter(&host->nh_lock);
1880         if (host->nh_flags & NLM_NH_MONITORED) {
1881                 mutex_exit(&host->nh_lock);
1882                 return;
1883         }
1884 
1885         host->nh_flags |= NLM_NH_MONITORED;
1886         mutex_exit(&host->nh_lock);
1887 
1888         /*
1889          * Before we begin monitoring the host register the network address
1890          * associated with this hostname.
1891          */
1892         nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1893         stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1894         if (stat != RPC_SUCCESS) {
1895                 NLM_WARN("Failed to register address, stat=%d\n", stat);
1896                 mutex_enter(&g->lock);
1897                 host->nh_flags &= ~NLM_NH_MONITORED;
1898                 mutex_exit(&g->lock);
1899 
1900                 return;
1901         }
1902 
1903         /*
1904          * Tell statd how to call us with status updates for
1905          * this host. Updates arrive via nlm_do_notify1().
1906          *
1907          * We put our assigned system ID value in the priv field to
1908          * make it simpler to find the host if we are notified of a
1909          * host restart.
1910          */
1911         stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1912         if (stat != RPC_SUCCESS) {
1913                 NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1914                 mutex_enter(&g->lock);
1915                 host->nh_flags &= ~NLM_NH_MONITORED;
1916                 mutex_exit(&g->lock);
1917 
1918                 return;
1919         }
1920 }
1921 
1922 int
1923 nlm_host_get_state(struct nlm_host *hostp)
1924 {
1925 
1926         return (hostp->nh_state);
1927 }
1928 
1929 /*
1930  * NLM client/server sleeping locks
1931  */
1932 
1933 /*
1934  * Register client side sleeping lock.
1935  *
1936  * Our client code calls this to keep information
1937  * about sleeping lock somewhere. When it receives
1938  * grant callback from server or when it just
1939  * needs to remove all sleeping locks from vnode,
1940  * it uses this information for remove/apply lock
1941  * properly.
1942  */
1943 struct nlm_slock *
1944 nlm_slock_register(
1945         struct nlm_globals *g,
1946         struct nlm_host *host,
1947         struct nlm4_lock *lock,
1948         struct vnode *vp)
1949 {
1950         struct nlm_slock *nslp;
1951 
1952         nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1953         cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1954         nslp->nsl_lock = *lock;
1955         nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1956         nslp->nsl_state = NLM_SL_BLOCKED;
1957         nslp->nsl_host = host;
1958         nslp->nsl_vp = vp;
1959 
1960         mutex_enter(&g->lock);
1961         TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1962         mutex_exit(&g->lock);
1963 
1964         return (nslp);
1965 }
1966 
1967 /*
1968  * Remove this lock from the wait list and destroy it.
1969  */
1970 void
1971 nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1972 {
1973         mutex_enter(&g->lock);
1974         TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1975         mutex_exit(&g->lock);
1976 
1977         kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1978         cv_destroy(&nslp->nsl_cond);
1979         kmem_free(nslp, sizeof (*nslp));
1980 }
1981 
1982 /*
1983  * Wait for a granted callback or cancellation event
1984  * for a sleeping lock.
1985  *
1986  * If a signal interrupted the wait or if the lock
1987  * was cancelled, return EINTR - the caller must arrange to send
1988  * a cancellation to the server.
1989  *
1990  * If timeout occurred, return ETIMEDOUT - the caller must
1991  * resend the lock request to the server.
1992  *
1993  * On success return 0.
1994  */
1995 int
1996 nlm_slock_wait(struct nlm_globals *g,
1997     struct nlm_slock *nslp, uint_t timeo_secs)
1998 {
1999         clock_t timeo_ticks;
2000         int cv_res, error;
2001 
2002         /*
2003          * If the granted message arrived before we got here,
2004          * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
2005          */
2006         cv_res = 1;
2007         timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
2008 
2009         mutex_enter(&g->lock);
2010         while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
2011                 cv_res = cv_timedwait_sig(&nslp->nsl_cond,
2012                     &g->lock, timeo_ticks);
2013         }
2014 
2015         /*
2016          * No matter why we wake up, if the lock was
2017          * cancelled, let the function caller to know
2018          * about it by returning EINTR.
2019          */
2020         if (nslp->nsl_state == NLM_SL_CANCELLED) {
2021                 error = EINTR;
2022                 goto out;
2023         }
2024 
2025         if (cv_res <= 0) {
2026                 /* We were woken up either by timeout or by interrupt */
2027                 error = (cv_res < 0) ? ETIMEDOUT : EINTR;
2028 
2029                 /*
2030                  * The granted message may arrive after the
2031                  * interrupt/timeout but before we manage to lock the
2032                  * mutex. Detect this by examining nslp.
2033                  */
2034                 if (nslp->nsl_state == NLM_SL_GRANTED)
2035                         error = 0;
2036         } else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
2037                 error = 0;
2038                 VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
2039         }
2040 
2041 out:
2042         mutex_exit(&g->lock);
2043         return (error);
2044 }
2045 
2046 /*
2047  * Mark client side sleeping lock as granted
2048  * and wake up a process blocked on the lock.
2049  * Called from server side NLM_GRANT handler.
2050  *
2051  * If sleeping lock is found return 0, otherwise
2052  * return ENOENT.
2053  */
2054 int
2055 nlm_slock_grant(struct nlm_globals *g,
2056     struct nlm_host *hostp, struct nlm4_lock *alock)
2057 {
2058         struct nlm_slock *nslp;
2059         int error = ENOENT;
2060 
2061         mutex_enter(&g->lock);
2062         TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
2063                 if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
2064                     (nslp->nsl_host != hostp))
2065                         continue;
2066 
2067                 if (alock->svid              == nslp->nsl_lock.svid &&
2068                     alock->l_offset  == nslp->nsl_lock.l_offset &&
2069                     alock->l_len     == nslp->nsl_lock.l_len &&
2070                     alock->fh.n_len  == nslp->nsl_lock.fh.n_len &&
2071                     bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2072                     nslp->nsl_lock.fh.n_len) == 0) {
2073                         nslp->nsl_state = NLM_SL_GRANTED;
2074                         cv_broadcast(&nslp->nsl_cond);
2075                         error = 0;
2076                         break;
2077                 }
2078         }
2079 
2080         mutex_exit(&g->lock);
2081         return (error);
2082 }
2083 
2084 /*
2085  * Register sleeping lock request corresponding to
2086  * flp on the given vhold object.
2087  * On success function returns 0, otherwise (if
2088  * lock request with the same flp is already
2089  * registered) function returns EEXIST.
2090  */
2091 int
2092 nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2093     struct flock64 *flp)
2094 {
2095         struct nlm_slreq *slr, *new_slr = NULL;
2096         int ret = EEXIST;
2097 
2098         mutex_enter(&hostp->nh_lock);
2099         slr = nlm_slreq_find_locked(hostp, nvp, flp);
2100         if (slr != NULL)
2101                 goto out;
2102 
2103         mutex_exit(&hostp->nh_lock);
2104         new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2105         bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2106 
2107         mutex_enter(&hostp->nh_lock);
2108         slr = nlm_slreq_find_locked(hostp, nvp, flp);
2109         if (slr == NULL) {
2110                 slr = new_slr;
2111                 new_slr = NULL;
2112                 ret = 0;
2113 
2114                 TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2115         }
2116 
2117 out:
2118         mutex_exit(&hostp->nh_lock);
2119         if (new_slr != NULL)
2120                 kmem_free(new_slr, sizeof (*new_slr));
2121 
2122         return (ret);
2123 }
2124 
2125 /*
2126  * Unregister sleeping lock request corresponding
2127  * to flp from the given vhold object.
2128  * On success function returns 0, otherwise (if
2129  * lock request corresponding to flp isn't found
2130  * on the given vhold) function returns ENOENT.
2131  */
2132 int
2133 nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2134     struct flock64 *flp)
2135 {
2136         struct nlm_slreq *slr;
2137 
2138         mutex_enter(&hostp->nh_lock);
2139         slr = nlm_slreq_find_locked(hostp, nvp, flp);
2140         if (slr == NULL) {
2141                 mutex_exit(&hostp->nh_lock);
2142                 return (ENOENT);
2143         }
2144 
2145         TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2146         mutex_exit(&hostp->nh_lock);
2147 
2148         kmem_free(slr, sizeof (*slr));
2149         return (0);
2150 }
2151 
2152 /*
2153  * Find sleeping lock request on the given vhold object by flp.
2154  */
2155 struct nlm_slreq *
2156 nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2157     struct flock64 *flp)
2158 {
2159         struct nlm_slreq *slr = NULL;
2160 
2161         ASSERT(MUTEX_HELD(&hostp->nh_lock));
2162         TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2163                 if (slr->nsr_fl.l_start              == flp->l_start      &&
2164                     slr->nsr_fl.l_len                == flp->l_len        &&
2165                     slr->nsr_fl.l_pid                == flp->l_pid        &&
2166                     slr->nsr_fl.l_type               == flp->l_type)
2167                         break;
2168         }
2169 
2170         return (slr);
2171 }
2172 
2173 /*
2174  * NLM tracks active share reservations made on the client side.
2175  * It needs to have a track of share reservations for two purposes
2176  * 1) to determine if nlm_host is busy (if it has active locks and/or
2177  *    share reservations, it is)
2178  * 2) to recover active share reservations when NLM server reports
2179  *    that it has rebooted.
2180  *
2181  * Unfortunately Illumos local share reservations manager (see os/share.c)
2182  * doesn't have an ability to lookup all reservations on the system
2183  * by sysid (like local lock manager) or get all reservations by sysid.
2184  * It tracks reservations per vnode and is able to get/looup them
2185  * on particular vnode. It's not what NLM needs. Thus it has that ugly
2186  * share reservations tracking scheme.
2187  */
2188 
2189 void
2190 nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2191 {
2192         struct nlm_shres *nsp, *nsp_new;
2193 
2194         /*
2195          * NFS code must fill the s_owner, so that
2196          * s_own_len is never 0.
2197          */
2198         ASSERT(shrp->s_own_len > 0);
2199         nsp_new = nlm_shres_create_item(shrp, vp);
2200 
2201         mutex_enter(&hostp->nh_lock);
2202         for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2203                 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2204                         break;
2205 
2206         if (nsp != NULL) {
2207                 /*
2208                  * Found a duplicate. Do nothing.
2209                  */
2210 
2211                 goto out;
2212         }
2213 
2214         nsp = nsp_new;
2215         nsp_new = NULL;
2216         nsp->ns_next = hostp->nh_shrlist;
2217         hostp->nh_shrlist = nsp;
2218 
2219 out:
2220         mutex_exit(&hostp->nh_lock);
2221         if (nsp_new != NULL)
2222                 nlm_shres_destroy_item(nsp_new);
2223 }
2224 
2225 void
2226 nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2227 {
2228         struct nlm_shres *nsp, *nsp_prev = NULL;
2229 
2230         mutex_enter(&hostp->nh_lock);
2231         nsp = hostp->nh_shrlist;
2232         while (nsp != NULL) {
2233                 if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2234                         struct nlm_shres *nsp_del;
2235 
2236                         nsp_del = nsp;
2237                         nsp = nsp->ns_next;
2238                         if (nsp_prev != NULL)
2239                                 nsp_prev->ns_next = nsp;
2240                         else
2241                                 hostp->nh_shrlist = nsp;
2242 
2243                         nlm_shres_destroy_item(nsp_del);
2244                         continue;
2245                 }
2246 
2247                 nsp_prev = nsp;
2248                 nsp = nsp->ns_next;
2249         }
2250 
2251         mutex_exit(&hostp->nh_lock);
2252 }
2253 
2254 /*
2255  * Get a _copy_ of the list of all active share reservations
2256  * made by the given host.
2257  * NOTE: the list function returns _must_ be released using
2258  *       nlm_free_shrlist().
2259  */
2260 struct nlm_shres *
2261 nlm_get_active_shres(struct nlm_host *hostp)
2262 {
2263         struct nlm_shres *nsp, *nslist = NULL;
2264 
2265         mutex_enter(&hostp->nh_lock);
2266         for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2267                 struct nlm_shres *nsp_new;
2268 
2269                 nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2270                 nsp_new->ns_next = nslist;
2271                 nslist = nsp_new;
2272         }
2273 
2274         mutex_exit(&hostp->nh_lock);
2275         return (nslist);
2276 }
2277 
2278 /*
2279  * Free memory allocated for the active share reservations
2280  * list created by nlm_get_active_shres() function.
2281  */
2282 void
2283 nlm_free_shrlist(struct nlm_shres *nslist)
2284 {
2285         struct nlm_shres *nsp;
2286 
2287         while (nslist != NULL) {
2288                 nsp =  nslist;
2289                 nslist = nslist->ns_next;
2290 
2291                 nlm_shres_destroy_item(nsp);
2292         }
2293 }
2294 
2295 static bool_t
2296 nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2297 {
2298         if (shrp1->s_sysid   == shrp2->s_sysid    &&
2299             shrp1->s_pid     == shrp2->s_pid              &&
2300             shrp1->s_own_len == shrp2->s_own_len  &&
2301             bcmp(shrp1->s_owner, shrp2->s_owner,
2302             shrp1->s_own_len) == 0)
2303                 return (TRUE);
2304 
2305         return (FALSE);
2306 }
2307 
2308 static struct nlm_shres *
2309 nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2310 {
2311         struct nlm_shres *nsp;
2312 
2313         nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2314         nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2315         bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2316         nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2317         bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2318         nsp->ns_vp = vp;
2319 
2320         return (nsp);
2321 }
2322 
2323 static void
2324 nlm_shres_destroy_item(struct nlm_shres *nsp)
2325 {
2326         kmem_free(nsp->ns_shr->s_owner,
2327             nsp->ns_shr->s_own_len);
2328         kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2329         kmem_free(nsp, sizeof (*nsp));
2330 }
2331 
2332 /*
2333  * Called by klmmod.c when lockd adds a network endpoint
2334  * on which we should begin RPC services.
2335  */
2336 int
2337 nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2338 {
2339         SVCMASTERXPRT *xprt = NULL;
2340         int error;
2341 
2342         error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2343             &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2344         if (error != 0)
2345                 return (error);
2346 
2347         (void) nlm_knc_to_netid(knc);
2348         return (0);
2349 }
2350 
2351 /*
2352  * Start NLM service.
2353  */
2354 int
2355 nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2356     const char *netid, struct knetconfig *knc)
2357 {
2358         int error;
2359         enum clnt_stat stat;
2360 
2361         VERIFY(g->run_status == NLM_ST_STARTING);
2362         VERIFY(g->nlm_gc_thread == NULL);
2363 
2364         error = nlm_nsm_init_local(&g->nlm_nsm);
2365         if (error != 0) {
2366                 NLM_ERR("Failed to initialize NSM handler "
2367                     "(error=%d)\n", error);
2368                 g->run_status = NLM_ST_DOWN;
2369                 return (error);
2370         }
2371 
2372         error = EIO;
2373 
2374         /*
2375          * Create an NLM garbage collector thread that will
2376          * clean up stale vholds and hosts objects.
2377          */
2378         g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2379             g, 0, minclsyspri);
2380 
2381         /*
2382          * Send SIMU_CRASH to local statd to report that
2383          * NLM started, so that statd can report other hosts
2384          * about NLM state change.
2385          */
2386 
2387         stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2388         if (stat != RPC_SUCCESS) {
2389                 NLM_ERR("Failed to connect to local statd "
2390                     "(rpcerr=%d)\n", stat);
2391                 goto shutdown_lm;
2392         }
2393 
2394         stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2395         if (stat != RPC_SUCCESS) {
2396                 NLM_ERR("Failed to get the status of local statd "
2397                     "(rpcerr=%d)\n", stat);
2398                 goto shutdown_lm;
2399         }
2400 
2401         g->grace_threshold = ddi_get_lbolt() +
2402             SEC_TO_TICK(g->grace_period);
2403 
2404         /* Register endpoint used for communications with local NLM */
2405         error = nlm_svc_add_ep(fp, netid, knc);
2406         if (error != 0)
2407                 goto shutdown_lm;
2408 
2409         (void) svc_pool_control(NLM_SVCPOOL_ID,
2410             SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2411         g->run_status = NLM_ST_UP;
2412         return (0);
2413 
2414 shutdown_lm:
2415         mutex_enter(&g->lock);
2416         g->run_status = NLM_ST_STOPPING;
2417         mutex_exit(&g->lock);
2418 
2419         nlm_svc_stopping(g);
2420         return (error);
2421 }
2422 
2423 /*
2424  * Called when the server pool is destroyed, so that
2425  * all transports are closed and no any server threads
2426  * exist.
2427  *
2428  * Just call lm_shutdown() to shut NLM down properly.
2429  */
2430 static void
2431 nlm_pool_shutdown(void)
2432 {
2433         (void) lm_shutdown();
2434 }
2435 
2436 /*
2437  * Stop NLM service, cleanup all resources
2438  * NLM owns at the moment.
2439  *
2440  * NOTE: NFS code can call NLM while it's
2441  * stopping or even if it's shut down. Any attempt
2442  * to lock file either on client or on the server
2443  * will fail if NLM isn't in NLM_ST_UP state.
2444  */
2445 void
2446 nlm_svc_stopping(struct nlm_globals *g)
2447 {
2448         mutex_enter(&g->lock);
2449         ASSERT(g->run_status == NLM_ST_STOPPING);
2450 
2451         /*
2452          * Ask NLM GC thread to exit and wait until it dies.
2453          */
2454         cv_signal(&g->nlm_gc_sched_cv);
2455         while (g->nlm_gc_thread != NULL)
2456                 cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2457 
2458         mutex_exit(&g->lock);
2459 
2460         /*
2461          * Cleanup locks owned by NLM hosts.
2462          * NOTE: New hosts won't be created while
2463          * NLM is stopping.
2464          */
2465         while (!avl_is_empty(&g->nlm_hosts_tree)) {
2466                 struct nlm_host *hostp;
2467                 int busy_hosts = 0;
2468 
2469                 /*
2470                  * Iterate through all NLM hosts in the system
2471                  * and drop the locks they own by force.
2472                  */
2473                 hostp = avl_first(&g->nlm_hosts_tree);
2474                 while (hostp != NULL) {
2475                         /* Cleanup all client and server side locks */
2476                         nlm_client_cancel_all(g, hostp);
2477                         nlm_host_notify_server(hostp, 0);
2478 
2479                         mutex_enter(&hostp->nh_lock);
2480                         nlm_host_gc_vholds(hostp);
2481                         if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2482                                 /*
2483                                  * Oh, it seems the host is still busy, let
2484                                  * it some time to release and go to the
2485                                  * next one.
2486                                  */
2487 
2488                                 mutex_exit(&hostp->nh_lock);
2489                                 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2490                                 busy_hosts++;
2491                                 continue;
2492                         }
2493 
2494                         mutex_exit(&hostp->nh_lock);
2495                         hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2496                 }
2497 
2498                 /*
2499                  * All hosts go to nlm_idle_hosts list after
2500                  * all locks they own are cleaned up and last refereces
2501                  * were dropped. Just destroy all hosts in nlm_idle_hosts
2502                  * list, they can not be removed from there while we're
2503                  * in stopping state.
2504                  */
2505                 while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2506                         nlm_host_unregister(g, hostp);
2507                         nlm_host_destroy(hostp);
2508                 }
2509 
2510                 if (busy_hosts > 0) {
2511                         /*
2512                          * There're some hosts that weren't cleaned
2513                          * up. Probably they're in resource cleanup
2514                          * process. Give them some time to do drop
2515                          * references.
2516                          */
2517                         delay(MSEC_TO_TICK(500));
2518                 }
2519         }
2520 
2521         ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2522 
2523         nlm_nsm_fini(&g->nlm_nsm);
2524         g->lockd_pid = 0;
2525         g->run_status = NLM_ST_DOWN;
2526 }
2527 
2528 /*
2529  * Returns TRUE if the given vnode has
2530  * any active or sleeping locks.
2531  */
2532 int
2533 nlm_vp_active(const vnode_t *vp)
2534 {
2535         struct nlm_globals *g;
2536         struct nlm_host *hostp;
2537         struct nlm_vhold *nvp;
2538         int active = 0;
2539 
2540         g = zone_getspecific(nlm_zone_key, curzone);
2541 
2542         /*
2543          * Server side NLM has locks on the given vnode
2544          * if there exist a vhold object that holds
2545          * the given vnode "vp" in one of NLM hosts.
2546          */
2547         mutex_enter(&g->lock);
2548         hostp = avl_first(&g->nlm_hosts_tree);
2549         while (hostp != NULL) {
2550                 mutex_enter(&hostp->nh_lock);
2551                 nvp = nlm_vhold_find_locked(hostp, vp);
2552                 mutex_exit(&hostp->nh_lock);
2553                 if (nvp != NULL) {
2554                         active = 1;
2555                         break;
2556                 }
2557 
2558                 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2559         }
2560 
2561         mutex_exit(&g->lock);
2562         return (active);
2563 }
2564 
2565 /*
2566  * Called right before NFS export is going to
2567  * dissapear. The function finds all vnodes
2568  * belonging to the given export and cleans
2569  * all remote locks and share reservations
2570  * on them.
2571  */
2572 void
2573 nlm_unexport(struct exportinfo *exi)
2574 {
2575         struct nlm_globals *g;
2576         struct nlm_host *hostp;
2577 
2578         /* This may be called on behalf of global-zone doing shutdown. */
2579         ASSERT(exi->exi_zone == curzone || curzone == global_zone);
2580         g = zone_getspecific(nlm_zone_key, exi->exi_zone);
2581         if (g == NULL) {
2582                 /* Did zone cleanup get here already? */
2583                 return;
2584         }
2585 
2586         mutex_enter(&g->lock);
2587         hostp = avl_first(&g->nlm_hosts_tree);
2588         while (hostp != NULL) {
2589                 struct nlm_vhold *nvp;
2590 
2591                 if (hostp->nh_flags & NLM_NH_INIDLE) {
2592                         TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
2593                         hostp->nh_flags &= ~NLM_NH_INIDLE;
2594                 }
2595                 hostp->nh_refs++;
2596 
2597                 mutex_exit(&g->lock);
2598 
2599                 mutex_enter(&hostp->nh_lock);
2600                 TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2601                         vnode_t *vp;
2602 
2603                         nvp->nv_refcnt++;
2604                         mutex_exit(&hostp->nh_lock);
2605 
2606                         vp = nvp->nv_vp;
2607 
2608                         if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2609                                 goto next_iter;
2610 
2611                         /*
2612                          * Ok, it we found out that vnode vp is under
2613                          * control by the exportinfo exi, now we need
2614                          * to drop all locks from this vnode, let's
2615                          * do it.
2616                          */
2617                         nlm_vhold_clean(nvp, hostp->nh_sysid);
2618 
2619                 next_iter:
2620                         mutex_enter(&hostp->nh_lock);
2621                         nvp->nv_refcnt--;
2622                 }
2623                 mutex_exit(&hostp->nh_lock);
2624 
2625                 mutex_enter(&g->lock);
2626                 nlm_host_release_locked(g, hostp);
2627 
2628                 hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2629         }
2630 
2631         mutex_exit(&g->lock);
2632 }
2633 
2634 /*
2635  * Allocate new unique sysid.
2636  * In case of failure (no available sysids)
2637  * return LM_NOSYSID.
2638  */
2639 sysid_t
2640 nlm_sysid_alloc(void)
2641 {
2642         sysid_t ret_sysid = LM_NOSYSID;
2643 
2644         rw_enter(&lm_lck, RW_WRITER);
2645         if (nlm_sysid_nidx > LM_SYSID_MAX)
2646                 nlm_sysid_nidx = LM_SYSID;
2647 
2648         if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2649                 BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2650                 ret_sysid = nlm_sysid_nidx++;
2651         } else {
2652                 index_t id;
2653 
2654                 id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2655                 if (id > 0) {
2656                         nlm_sysid_nidx = id + 1;
2657                         ret_sysid = id;
2658                         BT_SET(nlm_sysid_bmap, id);
2659                 }
2660         }
2661 
2662         rw_exit(&lm_lck);
2663         return (ret_sysid);
2664 }
2665 
2666 void
2667 nlm_sysid_free(sysid_t sysid)
2668 {
2669         ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2670 
2671         rw_enter(&lm_lck, RW_WRITER);
2672         ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2673         BT_CLEAR(nlm_sysid_bmap, sysid);
2674         rw_exit(&lm_lck);
2675 }
2676 
2677 /*
2678  * Return true if the request came from a local caller.
2679  * By necessity, this "knows" the netid names invented
2680  * in lm_svc() and nlm_netid_from_knetconfig().
2681  */
2682 bool_t
2683 nlm_caller_is_local(SVCXPRT *transp)
2684 {
2685         char *netid;
2686         struct netbuf *rtaddr;
2687 
2688         netid = svc_getnetid(transp);
2689         rtaddr = svc_getrpccaller(transp);
2690 
2691         if (netid == NULL)
2692                 return (FALSE);
2693 
2694         if (strcmp(netid, "ticlts") == 0 ||
2695             strcmp(netid, "ticotsord") == 0)
2696                 return (TRUE);
2697 
2698         if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2699                 struct sockaddr_in *sin = (void *)rtaddr->buf;
2700                 if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2701                         return (TRUE);
2702         }
2703         if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2704                 struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2705                 if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2706                         return (TRUE);
2707         }
2708 
2709         return (FALSE); /* unknown transport */
2710 }
2711 
2712 /*
2713  * Get netid string correspondig to the given knetconfig.
2714  * If not done already, save knc->knc_rdev in our table.
2715  */
2716 const char *
2717 nlm_knc_to_netid(struct knetconfig *knc)
2718 {
2719         int i;
2720         dev_t rdev;
2721         struct nlm_knc *nc;
2722         const char *netid = NULL;
2723 
2724         rw_enter(&lm_lck, RW_READER);
2725         for (i = 0; i < NLM_KNCS; i++) {
2726                 nc = &nlm_netconfigs[i];
2727 
2728                 if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2729                     strcmp(nc->n_knc.knc_protofmly,
2730                     knc->knc_protofmly) == 0) {
2731                         netid = nc->n_netid;
2732                         rdev = nc->n_knc.knc_rdev;
2733                         break;
2734                 }
2735         }
2736         rw_exit(&lm_lck);
2737 
2738         if (netid != NULL && rdev == NODEV) {
2739                 rw_enter(&lm_lck, RW_WRITER);
2740                 if (nc->n_knc.knc_rdev == NODEV)
2741                         nc->n_knc.knc_rdev = knc->knc_rdev;
2742                 rw_exit(&lm_lck);
2743         }
2744 
2745         return (netid);
2746 }
2747 
2748 /*
2749  * Get a knetconfig corresponding to the given netid.
2750  * If there's no knetconfig for this netid, ENOENT
2751  * is returned.
2752  */
2753 int
2754 nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2755 {
2756         int i, ret;
2757 
2758         ret = ENOENT;
2759         for (i = 0; i < NLM_KNCS; i++) {
2760                 struct nlm_knc *nknc;
2761 
2762                 nknc = &nlm_netconfigs[i];
2763                 if (strcmp(netid, nknc->n_netid) == 0 &&
2764                     nknc->n_knc.knc_rdev != NODEV) {
2765                         *knc = nknc->n_knc;
2766                         ret = 0;
2767                         break;
2768                 }
2769         }
2770 
2771         return (ret);
2772 }
2773 
2774 void
2775 nlm_cprsuspend(void)
2776 {
2777         struct nlm_globals *g;
2778 
2779         rw_enter(&lm_lck, RW_READER);
2780         TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2781                 nlm_suspend_zone(g);
2782 
2783         rw_exit(&lm_lck);
2784 }
2785 
2786 void
2787 nlm_cprresume(void)
2788 {
2789         struct nlm_globals *g;
2790 
2791         rw_enter(&lm_lck, RW_READER);
2792         TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2793                 nlm_resume_zone(g);
2794 
2795         rw_exit(&lm_lck);
2796 }
2797 
2798 static void
2799 nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2800 {
2801         (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2802             NLM_RPC_RETRIES, zone_kcred());
2803 }
2804 
2805 static void
2806 nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2807 {
2808         /* LINTED pointer alignment */
2809         struct sockaddr *sa = (struct sockaddr *)addr->buf;
2810 
2811         *family = sa->sa_family;
2812 
2813         switch (sa->sa_family) {
2814         case AF_INET: {
2815                 /* LINTED pointer alignment */
2816                 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2817 
2818                 obj->n_len = sizeof (sin->sin_addr);
2819                 obj->n_bytes = (char *)&sin->sin_addr;
2820                 break;
2821         }
2822 
2823         case AF_INET6: {
2824                 /* LINTED pointer alignment */
2825                 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2826 
2827                 obj->n_len = sizeof (sin6->sin6_addr);
2828                 obj->n_bytes = (char *)&sin6->sin6_addr;
2829                 break;
2830         }
2831 
2832         default:
2833                 VERIFY(0);
2834                 break;
2835         }
2836 }