Print this page
    
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/klm/klmmod.c
          +++ new/usr/src/uts/common/klm/klmmod.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy is of the CDDL is also available via the Internet
   9    9   * at http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  14   14   * Copyright (c) 2012 by Delphix. All rights reserved.
  15   15   */
  16   16  
  17   17  /*
  18   18   * NFS Lock Manager, server-side and common.
  19   19   *
  20   20   * This file contains all the external entry points of klmmod.
  21   21   * Basically, this is the "glue" to the BSD nlm code.
  22   22   */
  23   23  
  24   24  #include <sys/types.h>
  25   25  #include <sys/errno.h>
  26   26  #include <sys/modctl.h>
  27   27  #include <sys/flock.h>
  28   28  
  29   29  #include <nfs/nfs.h>
  30   30  #include <nfs/nfssys.h>
  31   31  #include <nfs/lm.h>
  32   32  #include <rpcsvc/nlm_prot.h>
  33   33  #include "nlm_impl.h"
  34   34  
  35   35  static struct modlmisc modlmisc = {
  36   36          &mod_miscops, "lock mgr common module"
  37   37  };
  38   38  
  39   39  static struct modlinkage modlinkage = {
  40   40          MODREV_1, &modlmisc, NULL
  41   41  };
  42   42  
  43   43  /*
  44   44   * Cluster node ID.  Zero unless we're part of a cluster.
  45   45   * Set by lm_set_nlmid_flk.  Pass to lm_set_nlm_status.
  46   46   * We're not yet doing "clustered" NLM stuff.
  47   47   */
  48   48  int lm_global_nlmid = 0;
  49   49  
  50   50  /*
  51   51   * Call-back hook for clusters: Set lock manager status.
  52   52   * If this hook is set, call this instead of the ususal
  53   53   * flk_set_lockmgr_status(FLK_LOCKMGR_UP / DOWN);
  54   54   */
  55   55  void (*lm_set_nlm_status)(int nlm_id, flk_nlm_status_t) = NULL;
  56   56  
  57   57  /*
  58   58   * Call-back hook for clusters: Delete all locks held by sysid.
  59   59   * Call from code that drops all client locks (for which we're
  60   60   * the server) i.e. after the SM tells us a client has crashed.
  61   61   */
  62   62  void (*lm_remove_file_locks)(int) = NULL;
  63   63  
  64   64  krwlock_t               lm_lck;
  65   65  zone_key_t              nlm_zone_key;
  66   66  
  67   67  /*
  68   68   * Init/fini per-zone stuff for klm
  69   69   */
  70   70  /* ARGSUSED */
  71   71  void *
  72   72  lm_zone_init(zoneid_t zoneid)
  73   73  {
  74   74          struct nlm_globals *g;
  75   75  
  76   76          g = kmem_zalloc(sizeof (*g), KM_SLEEP);
  77   77  
  78   78          avl_create(&g->nlm_hosts_tree, nlm_host_cmp,
  79   79              sizeof (struct nlm_host),
  80   80              offsetof(struct nlm_host, nh_by_addr));
  81   81  
  82   82          g->nlm_hosts_hash = mod_hash_create_idhash("nlm_host_by_sysid",
  83   83              64, mod_hash_null_valdtor);
  84   84  
  
    | 
      ↓ open down ↓ | 
    84 lines elided | 
    
      ↑ open up ↑ | 
  
  85   85          TAILQ_INIT(&g->nlm_idle_hosts);
  86   86          TAILQ_INIT(&g->nlm_slocks);
  87   87  
  88   88          mutex_init(&g->lock, NULL, MUTEX_DEFAULT, NULL);
  89   89          cv_init(&g->nlm_gc_sched_cv, NULL, CV_DEFAULT, NULL);
  90   90          cv_init(&g->nlm_gc_finish_cv, NULL, CV_DEFAULT, NULL);
  91   91          mutex_init(&g->clean_lock, NULL, MUTEX_DEFAULT, NULL);
  92   92  
  93   93          g->lockd_pid = 0;
  94   94          g->run_status = NLM_ST_DOWN;
       95 +        g->nlm_zoneid = zoneid;
  95   96  
  96   97          nlm_globals_register(g);
  97   98          return (g);
  98   99  }
  99  100  
 100  101  /* ARGSUSED */
 101  102  void
 102  103  lm_zone_fini(zoneid_t zoneid, void *data)
 103  104  {
 104  105          struct nlm_globals *g = data;
 105  106  
      107 +        nlm_globals_unregister(g);
      108 +
 106  109          ASSERT(avl_is_empty(&g->nlm_hosts_tree));
 107  110          avl_destroy(&g->nlm_hosts_tree);
 108  111          mod_hash_destroy_idhash(g->nlm_hosts_hash);
 109  112  
 110  113          ASSERT(g->nlm_gc_thread == NULL);
 111  114          mutex_destroy(&g->lock);
 112  115          cv_destroy(&g->nlm_gc_sched_cv);
 113  116          cv_destroy(&g->nlm_gc_finish_cv);
 114  117          mutex_destroy(&g->clean_lock);
 115  118  
 116      -        nlm_globals_unregister(g);
 117  119          kmem_free(g, sizeof (*g));
 118  120  }
 119  121  
 120  122  
 121  123  
 122  124  /*
 123  125   * ****************************************************************
 124  126   * module init, fini, info
 125  127   */
 126  128  int
 127  129  _init()
 128  130  {
 129  131          int retval;
 130  132  
 131  133          rw_init(&lm_lck, NULL, RW_DEFAULT, NULL);
 132  134          nlm_init();
 133  135  
 134  136          zone_key_create(&nlm_zone_key, lm_zone_init, NULL, lm_zone_fini);
 135  137          /* Per-zone lockmgr data.  See: os/flock.c */
 136  138          zone_key_create(&flock_zone_key, flk_zone_init, NULL, flk_zone_fini);
 137  139  
 138  140          retval = mod_install(&modlinkage);
 139  141          if (retval == 0)
 140  142                  return (0);
 141  143  
 142  144          /*
 143  145           * mod_install failed! undo above, reverse order
 144  146           */
 145  147  
 146  148          (void) zone_key_delete(flock_zone_key);
 147  149          flock_zone_key = ZONE_KEY_UNINITIALIZED;
 148  150          (void) zone_key_delete(nlm_zone_key);
 149  151          rw_destroy(&lm_lck);
 150  152  
 151  153          return (retval);
 152  154  }
 153  155  
 154  156  int
 155  157  _fini()
 156  158  {
 157  159          /* Don't unload. */
 158  160          return (EBUSY);
 159  161  }
 160  162  
 161  163  int
 162  164  _info(struct modinfo *modinfop)
 163  165  {
 164  166          return (mod_info(&modlinkage, modinfop));
 165  167  }
 166  168  
 167  169  
 168  170  
 169  171  /*
 170  172   * ****************************************************************
 171  173   * Stubs listed in modstubs.s
 172  174   */
 173  175  
 174  176  /*
 175  177   * klm system calls.  Start service on some endpoint.
 176  178   * Called by nfssys() LM_SVC, from lockd.
 177  179   */
 178  180  int
 179  181  lm_svc(struct lm_svc_args *args)
 180  182  {
 181  183          struct knetconfig knc;
 182  184          const char *netid;
 183  185          struct nlm_globals *g;
 184  186          struct file *fp = NULL;
 185  187          int err = 0;
 186  188  
 187  189          /* Get our "globals" */
 188  190          g = zone_getspecific(nlm_zone_key, curzone);
 189  191  
 190  192          /*
 191  193           * Check version of lockd calling.
 192  194           */
 193  195          if (args->version != LM_SVC_CUR_VERS) {
 194  196                  NLM_ERR("lm_svc: Version mismatch "
 195  197                      "(given 0x%x, expected 0x%x)\n",
 196  198                      args->version, LM_SVC_CUR_VERS);
 197  199                  return (EINVAL);
 198  200          }
 199  201  
 200  202          /*
 201  203           * Build knetconfig, checking arg values.
 202  204           * Also come up with the "netid" string.
 203  205           * (With some knowledge of /etc/netconfig)
 204  206           */
 205  207          bzero(&knc, sizeof (knc));
 206  208          switch (args->n_proto) {
 207  209          case LM_TCP:
 208  210                  knc.knc_semantics = NC_TPI_COTS_ORD;
 209  211                  knc.knc_proto = NC_TCP;
 210  212                  break;
 211  213          case LM_UDP:
 212  214                  knc.knc_semantics = NC_TPI_CLTS;
 213  215                  knc.knc_proto = NC_UDP;
 214  216                  break;
 215  217          default:
 216  218                  NLM_ERR("nlm_build_knetconfig: Unknown "
 217  219                      "lm_proto=0x%x\n", args->n_proto);
 218  220                  return (EINVAL);
 219  221          }
 220  222  
 221  223          switch (args->n_fmly) {
 222  224          case LM_INET:
 223  225                  knc.knc_protofmly = NC_INET;
 224  226                  break;
 225  227          case LM_INET6:
 226  228                  knc.knc_protofmly = NC_INET6;
 227  229                  break;
 228  230          case LM_LOOPBACK:
 229  231                  knc.knc_protofmly = NC_LOOPBACK;
 230  232                  /* Override what we set above. */
 231  233                  knc.knc_proto = NC_NOPROTO;
 232  234                  break;
 233  235          default:
 234  236                  NLM_ERR("nlm_build_knetconfig: Unknown "
 235  237                      "lm_fmly=0x%x\n", args->n_fmly);
 236  238                  return (EINVAL);
 237  239          }
 238  240  
 239  241          knc.knc_rdev = args->n_rdev;
 240  242          netid = nlm_knc_to_netid(&knc);
 241  243          if (!netid)
 242  244                  return (EINVAL);
 243  245  
 244  246          /*
 245  247           * Setup service on the passed transport.
 246  248           * NB: must releasef(fp) after this.
 247  249           */
 248  250          if ((fp = getf(args->fd)) == NULL)
 249  251                  return (EBADF);
 250  252  
 251  253          mutex_enter(&g->lock);
 252  254          /*
 253  255           * Don't try to start while still shutting down,
 254  256           * or lots of things will fail...
 255  257           */
 256  258          if (g->run_status == NLM_ST_STOPPING) {
 257  259                  err = EAGAIN;
 258  260                  goto out;
 259  261          }
 260  262  
 261  263          /*
 262  264           * There is no separate "initialize" sub-call for nfssys,
 263  265           * and we want to do some one-time work when the first
 264  266           * binding comes in from lockd.
 265  267           */
 266  268          if (g->run_status == NLM_ST_DOWN) {
 267  269                  g->run_status = NLM_ST_STARTING;
 268  270                  g->lockd_pid = curproc->p_pid;
 269  271  
 270  272                  /* Save the options. */
 271  273                  g->cn_idle_tmo = args->timout;
 272  274                  g->grace_period = args->grace;
 273  275                  g->retrans_tmo = args->retransmittimeout;
 274  276  
 275  277                  /* See nfs_sys.c (not yet per-zone) */
 276  278                  if (INGLOBALZONE(curproc)) {
 277  279                          rfs4_grace_period = args->grace;
 278  280                          rfs4_lease_time   = args->grace;
 279  281                  }
 280  282  
 281  283                  mutex_exit(&g->lock);
 282  284                  err = nlm_svc_starting(g, fp, netid, &knc);
 283  285                  mutex_enter(&g->lock);
 284  286          } else {
 285  287                  /*
 286  288                   * If KLM is not started and the very first endpoint lockd
 287  289                   * tries to add is not a loopback device, report an error.
 288  290                   */
 289  291                  if (g->run_status != NLM_ST_UP) {
 290  292                          err = ENOTACTIVE;
 291  293                          goto out;
 292  294                  }
 293  295                  if (g->lockd_pid != curproc->p_pid) {
 294  296                          /* Check if caller has the same PID lockd does */
 295  297                          err = EPERM;
 296  298                          goto out;
 297  299                  }
 298  300  
 299  301                  err = nlm_svc_add_ep(fp, netid, &knc);
 300  302          }
 301  303  
 302  304  out:
 303  305          mutex_exit(&g->lock);
 304  306          if (fp != NULL)
 305  307                  releasef(args->fd);
 306  308  
 307  309          return (err);
 308  310  }
 309  311  
 310  312  /*
 311  313   * klm system calls.  Kill the lock manager.
 312  314   * Called by nfssys() KILL_LOCKMGR,
 313  315   * liblm:lm_shutdown() <- unused?
 314  316   */
 315  317  int
 316  318  lm_shutdown(void)
 317  319  {
 318  320          struct nlm_globals *g;
 319  321          proc_t *p;
 320  322          pid_t pid;
 321  323  
 322  324          /* Get our "globals" */
 323  325          g = zone_getspecific(nlm_zone_key, curzone);
 324  326  
 325  327          mutex_enter(&g->lock);
 326  328          if (g->run_status != NLM_ST_UP) {
 327  329                  mutex_exit(&g->lock);
 328  330                  return (EBUSY);
 329  331          }
 330  332  
 331  333          g->run_status = NLM_ST_STOPPING;
 332  334          pid = g->lockd_pid;
 333  335          mutex_exit(&g->lock);
 334  336          nlm_svc_stopping(g);
 335  337  
 336  338          mutex_enter(&pidlock);
 337  339          p = prfind(pid);
 338  340          if (p != NULL)
 339  341                  psignal(p, SIGTERM);
 340  342  
 341  343          mutex_exit(&pidlock);
 342  344          return (0);
 343  345  }
 344  346  
 345  347  /*
 346  348   * Cleanup remote locks on FS un-export.
 347  349   *
 348  350   * NOTE: called from nfs_export.c:unexport()
 349  351   * right before the share is going to
 350  352   * be unexported.
 351  353   */
 352  354  void
 353  355  lm_unexport(struct exportinfo *exi)
 354  356  {
 355  357          nlm_unexport(exi);
 356  358  }
 357  359  
 358  360  /*
 359  361   * CPR suspend/resume hooks.
 360  362   * See:cpr_suspend, cpr_resume
 361  363   *
 362  364   * Before suspend, get current state from "statd" on
 363  365   * all remote systems for which we have locks.
 364  366   *
 365  367   * After resume, check with those systems again,
 366  368   * and either reclaim locks, or do SIGLOST.
 367  369   */
 368  370  void
 369  371  lm_cprsuspend(void)
 370  372  {
 371  373          nlm_cprsuspend();
 372  374  }
 373  375  
 374  376  void
 375  377  lm_cprresume(void)
 376  378  {
 377  379          nlm_cprresume();
 378  380  }
 379  381  
 380  382  /*
 381  383   * Add the nlm_id bits to the sysid (by ref).
 382  384   */
 383  385  void
 384  386  lm_set_nlmid_flk(int *new_sysid)
 385  387  {
 386  388          if (lm_global_nlmid != 0)
 387  389                  *new_sysid |= (lm_global_nlmid << BITS_IN_SYSID);
 388  390  }
 389  391  
 390  392  /*
 391  393   * It seems that closed source klmmod used
 392  394   * this function to release knetconfig stored
 393  395   * in mntinfo structure (see mntinfo's mi_klmconfig
 394  396   * field).
 395  397   * We store knetconfigs differently, thus we don't
 396  398   * need this function.
 397  399   */
 398  400  void
 399  401  lm_free_config(struct knetconfig *knc)
 400  402  {
 401  403          _NOTE(ARGUNUSED(knc));
 402  404  }
 403  405  
 404  406  /*
 405  407   * Called by NFS4 delegation code to check if there are any
 406  408   * NFSv2/v3 locks for the file, so it should not delegate.
 407  409   *
 408  410   * NOTE: called from NFSv4 code
 409  411   * (see nfs4_srv_deleg.c:rfs4_bgrant_delegation())
 410  412   */
 411  413  int
 412  414  lm_vp_active(const vnode_t *vp)
 413  415  {
 414  416          return (nlm_vp_active(vp));
 415  417  }
 416  418  
 417  419  /*
 418  420   * Find or create a "sysid" for given knc+addr.
 419  421   * name is optional.  Sets nc_changed if the
 420  422   * found knc_proto is different from passed.
 421  423   * Increments the reference count.
 422  424   *
 423  425   * Called internally, and in nfs4_find_sysid()
 424  426   */
 425  427  struct lm_sysid *
 426  428  lm_get_sysid(struct knetconfig *knc, struct netbuf *addr,
 427  429      char *name, bool_t *nc_changed)
 428  430  {
 429  431          struct nlm_globals *g;
 430  432          const char *netid;
 431  433          struct nlm_host *hostp;
 432  434  
 433  435          _NOTE(ARGUNUSED(nc_changed));
 434  436          netid = nlm_knc_to_netid(knc);
 435  437          if (netid == NULL)
 436  438                  return (NULL);
 437  439  
 438  440          g = zone_getspecific(nlm_zone_key, curzone);
 439  441  
 440  442          hostp = nlm_host_findcreate(g, name, netid, addr);
 441  443          if (hostp == NULL)
 442  444                  return (NULL);
 443  445  
 444  446          return ((struct lm_sysid *)hostp);
 445  447  }
 446  448  
 447  449  /*
 448  450   * Release a reference on a "sysid".
 449  451   */
 450  452  void
 451  453  lm_rel_sysid(struct lm_sysid *sysid)
 452  454  {
 453  455          struct nlm_globals *g;
 454  456  
 455  457          g = zone_getspecific(nlm_zone_key, curzone);
 456  458          nlm_host_release(g, (struct nlm_host *)sysid);
 457  459  }
 458  460  
 459  461  /*
 460  462   * Alloc/free a sysid_t (a unique number between
 461  463   * LM_SYSID and LM_SYSID_MAX).
 462  464   *
 463  465   * Used by NFSv4 rfs4_op_lockt and smbsrv/smb_fsop_frlock,
 464  466   * both to represent non-local locks outside of klm.
 465  467   *
 466  468   * NOTE: called from NFSv4 and SMBFS to allocate unique
 467  469   * sysid.
 468  470   */
 469  471  sysid_t
 470  472  lm_alloc_sysidt(void)
 471  473  {
 472  474          return (nlm_sysid_alloc());
 473  475  }
 474  476  
 475  477  void
 476  478  lm_free_sysidt(sysid_t sysid)
 477  479  {
 478  480          nlm_sysid_free(sysid);
 479  481  }
 480  482  
 481  483  /* Access private member lms->sysid */
 482  484  sysid_t
 483  485  lm_sysidt(struct lm_sysid *lms)
 484  486  {
 485  487          return (((struct nlm_host *)lms)->nh_sysid);
 486  488  }
 487  489  
 488  490  /*
 489  491   * Called by nfs_frlock to check lock constraints.
 490  492   * Return non-zero if the lock request is "safe", i.e.
 491  493   * the range is not mapped, not MANDLOCK, etc.
 492  494   *
 493  495   * NOTE: callde from NFSv3/NFSv2 frlock() functions to
 494  496   * determine whether it's safe to add new lock.
 495  497   */
 496  498  int
 497  499  lm_safelock(vnode_t *vp, const struct flock64 *fl, cred_t *cr)
 498  500  {
 499  501          return (nlm_safelock(vp, fl, cr));
 500  502  }
 501  503  
 502  504  /*
 503  505   * Called by nfs_lockcompletion to check whether it's "safe"
 504  506   * to map the file (and cache it's data).  Walks the list of
 505  507   * file locks looking for any that are not "whole file".
 506  508   *
 507  509   * NOTE: called from nfs_client.c:nfs_lockcompletion()
 508  510   */
 509  511  int
 510  512  lm_safemap(const vnode_t *vp)
 511  513  {
 512  514          return (nlm_safemap(vp));
 513  515  }
 514  516  
 515  517  /*
 516  518   * Called by nfs_map() for the MANDLOCK case.
 517  519   * Return non-zero if the file has any locks with a
 518  520   * blocked request (sleep).
 519  521   *
 520  522   * NOTE: called from NFSv3/NFSv2 map() functions in
 521  523   * order to determine whether it's safe to add new
 522  524   * mapping.
 523  525   */
 524  526  int
 525  527  lm_has_sleep(const vnode_t *vp)
 526  528  {
 527  529          return (nlm_has_sleep(vp));
 528  530  }
 529  531  
 530  532  /*
 531  533   * ****************************************************************
 532  534   * Stuff needed by klmops?
 533  535   */
  
    | 
      ↓ open down ↓ | 
    407 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX