big-one Wdiff usr/src/uts/common/os/flock.c

Print this page

NEX-3758 Support for remote stale lock detection
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/flock.c
          +++ new/usr/src/uts/common/os/flock.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */

↓ open down ↓

20 lines elided

↑ open up ↑

  21   21  
  22   22  /*
  23   23   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28   28  /*      All Rights Reserved */
  29   29  
  30   30  /*
  31      - * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
       31 + * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  32   32   * Copyright 2015 Joyent, Inc.
  33   33   */
  34   34  
  35   35  #include <sys/flock_impl.h>
  36   36  #include <sys/vfs.h>
  37   37  #include <sys/t_lock.h>         /* for <sys/callb.h> */
  38   38  #include <sys/callb.h>
  39   39  #include <sys/clconf.h>
  40   40  #include <sys/cladm.h>
  41   41  #include <sys/nbmlock.h>
  42   42  #include <sys/cred.h>
  43   43  #include <sys/policy.h>
       44 +#include <sys/list.h>
       45 +#include <sys/sysmacros.h>
       46 +#include <sys/socket.h>
       47 +#include <inet/ip.h>
  44   48  
  45   49  /*
  46   50   * The following four variables are for statistics purposes and they are
  47   51   * not protected by locks. They may not be accurate but will at least be
  48   52   * close to the actual value.
  49   53   */
  50   54  
  51   55  int     flk_lock_allocs;
  52   56  int     flk_lock_frees;
  53   57  int     edge_allocs;

  54   58  int     edge_frees;
  55   59  int     flk_proc_vertex_allocs;
  56   60  int     flk_proc_edge_allocs;
  57   61  int     flk_proc_vertex_frees;
  58   62  int     flk_proc_edge_frees;
  59   63  
  60   64  static kmutex_t flock_lock;
  61   65  
  62   66  #ifdef DEBUG
  63   67  int check_debug = 0;
  64   68  #define CHECK_ACTIVE_LOCKS(gp)  if (check_debug) \
  65   69                                          check_active_locks(gp);
  66   70  #define CHECK_SLEEPING_LOCKS(gp)        if (check_debug) \
  67   71                                                  check_sleeping_locks(gp);
  68   72  #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)   \
  69   73                  if (check_debug)        \
  70   74                          check_owner_locks(gp, pid, sysid, vp);
  71   75  #define CHECK_LOCK_TRANSITION(old_state, new_state) \
  72   76          { \
  73   77                  if (check_lock_transition(old_state, new_state)) { \
  74   78                          cmn_err(CE_PANIC, "Illegal lock transition \
  75   79                              from %d to %d", old_state, new_state); \
  76   80                  } \
  77   81          }
  78   82  #else
  79   83  
  80   84  #define CHECK_ACTIVE_LOCKS(gp)
  81   85  #define CHECK_SLEEPING_LOCKS(gp)
  82   86  #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
  83   87  #define CHECK_LOCK_TRANSITION(old_state, new_state)
  84   88  
  85   89  #endif /* DEBUG */
  86   90  
  87   91  struct kmem_cache       *flk_edge_cache;
  88   92  
  89   93  graph_t         *lock_graph[HASH_SIZE];
  90   94  proc_graph_t    pgraph;
  91   95  
  92   96  /*
  93   97   * Clustering.
  94   98   *
  95   99   * NLM REGISTRY TYPE IMPLEMENTATION
  96  100   *
  97  101   * Assumptions:
  98  102   *  1.  Nodes in a cluster are numbered starting at 1; always non-negative
  99  103   *      integers; maximum node id is returned by clconf_maximum_nodeid().
 100  104   *  2.  We use this node id to identify the node an NLM server runs on.
 101  105   */
 102  106  
 103  107  /*
 104  108   * NLM registry object keeps track of NLM servers via their
 105  109   * nlmids (which are the node ids of the node in the cluster they run on)
 106  110   * that have requested locks at this LLM with which this registry is
 107  111   * associated.
 108  112   *
 109  113   * Representation of abstraction:
 110  114   *    rep = record[     states: array[nlm_state],
 111  115   *                      lock: mutex]
 112  116   *
 113  117   *    Representation invariants:
 114  118   *      1. index i of rep.states is between 0 and n - 1 where n is number
 115  119   *         of elements in the array, which happen to be the maximum number
 116  120   *         of nodes in the cluster configuration + 1.
 117  121   *      2. map nlmid to index i of rep.states
 118  122   *              0   -> 0
 119  123   *              1   -> 1
 120  124   *              2   -> 2
 121  125   *              n-1 -> clconf_maximum_nodeid()+1
 122  126   *      3.  This 1-1 mapping is quite convenient and it avoids errors resulting
 123  127   *          from forgetting to subtract 1 from the index.
 124  128   *      4.  The reason we keep the 0th index is the following.  A legitimate
 125  129   *          cluster configuration includes making a UFS file system NFS
 126  130   *          exportable.  The code is structured so that if you're in a cluster
 127  131   *          you do one thing; otherwise, you do something else.  The problem
 128  132   *          is what to do if you think you're in a cluster with PXFS loaded,
 129  133   *          but you're using UFS not PXFS?  The upper two bytes of the sysid
 130  134   *          encode the node id of the node where NLM server runs; these bytes
 131  135   *          are zero for UFS.  Since the nodeid is used to index into the
 132  136   *          registry, we can record the NLM server state information at index
 133  137   *          0 using the same mechanism used for PXFS file locks!
 134  138   */
 135  139  static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
 136  140  static kmutex_t nlm_reg_lock;                   /* lock to protect arrary */
 137  141  static uint_t nlm_status_size;                  /* size of state array */
 138  142  
 139  143  /*
 140  144   * Although we need a global lock dependency graph (and associated data
 141  145   * structures), we also need a per-zone notion of whether the lock manager is
 142  146   * running, and so whether to allow lock manager requests or not.
 143  147   *
 144  148   * Thus, on a per-zone basis we maintain a ``global'' variable
 145  149   * (flk_lockmgr_status), protected by flock_lock, and set when the lock
 146  150   * manager is determined to be changing state (starting or stopping).
 147  151   *
 148  152   * Each graph/zone pair also has a copy of this variable, which is protected by
 149  153   * the graph's mutex.
 150  154   *
 151  155   * The per-graph copies are used to synchronize lock requests with shutdown

↓ open down ↓

98 lines elided

↑ open up ↑

 152  156   * requests.  The global copy is used to initialize the per-graph field when a
 153  157   * new graph is created.
 154  158   */
 155  159  struct flock_globals {
 156  160          flk_lockmgr_status_t flk_lockmgr_status;
 157  161          flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
 158  162  };
 159  163  
 160  164  zone_key_t flock_zone_key;
 161  165  
      166 +/*
      167 + * Support for the remote stale lock detection
      168 + *
      169 + * The sysid_to_host_translator_lock readers/writer lock protects
      170 + * sysid_to_host_translator_list.
      171 + *
      172 + * The sysid_to_host_translator_list is a list of sysid to host name translator
      173 + * functions.  The new translators are added using the public
      174 + * flk_add_sysid_to_host_translator() call.
      175 + *
      176 + * The stale_lock_timeout is in seconds and it determines the interval for the
      177 + * remote stale lock checking.  When set to 0, the remote stale lock checking
      178 + * is disabled.
      179 + */
      180 +struct sysid_to_host_translator_entry {
      181 +        sysid_to_host_translator_t translator;
      182 +        list_node_t node;
      183 +};
      184 +static krwlock_t sysid_to_host_translator_lock;
      185 +static list_t sysid_to_host_translator_list;
      186 +volatile int stale_lock_timeout = 3600;         /* one hour, in seconds */
      187 +
 162  188  static void create_flock(lock_descriptor_t *, flock64_t *);
 163  189  static lock_descriptor_t        *flk_get_lock(void);
 164  190  static void     flk_free_lock(lock_descriptor_t *lock);
 165      -static void     flk_get_first_blocking_lock(lock_descriptor_t *request);
      191 +static void     flk_get_first_blocking_lock(lock_descriptor_t *);
 166  192  static int flk_process_request(lock_descriptor_t *);
 167  193  static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
 168  194  static edge_t *flk_get_edge(void);
 169  195  static int flk_wait_execute_request(lock_descriptor_t *);
 170  196  static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
 171  197  static void flk_insert_active_lock(lock_descriptor_t *);
 172  198  static void flk_delete_active_lock(lock_descriptor_t *, int);
 173  199  static void flk_insert_sleeping_lock(lock_descriptor_t *);
 174  200  static void flk_graph_uncolor(graph_t *);
 175  201  static void flk_wakeup(lock_descriptor_t *, int);

 176  202  static void flk_free_edge(edge_t *);
 177  203  static void flk_recompute_dependencies(lock_descriptor_t *,
 178  204                          lock_descriptor_t **,  int, int);
 179  205  static int flk_find_barriers(lock_descriptor_t *);
 180  206  static void flk_update_barriers(lock_descriptor_t *);
 181  207  static int flk_color_reachables(lock_descriptor_t *);
 182  208  static int flk_canceled(lock_descriptor_t *);
 183  209  static void flk_delete_locks_by_sysid(lock_descriptor_t *);
 184  210  static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
 185  211  static void wait_for_lock(lock_descriptor_t *);
 186  212  static void unlock_lockmgr_granted(struct flock_globals *);
 187  213  static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
 188  214  
 189  215  /* Clustering hooks */
 190  216  static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
 191  217  static void cl_flk_wakeup_sleeping_nlm_locks(int);
 192  218  static void cl_flk_unlock_nlm_granted(int);
 193  219  
 194  220  #ifdef DEBUG
 195  221  static int check_lock_transition(int, int);
 196  222  static void check_sleeping_locks(graph_t *);
 197  223  static void check_active_locks(graph_t *);
 198  224  static int no_path(lock_descriptor_t *, lock_descriptor_t *);
 199  225  static void path(lock_descriptor_t *, lock_descriptor_t *);
 200  226  static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
 201  227  static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
 202  228  static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
 203  229  #endif
 204  230  
 205  231  /*      proc_graph function definitions */
 206  232  static int flk_check_deadlock(lock_descriptor_t *);
 207  233  static void flk_proc_graph_uncolor(void);
 208  234  static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
 209  235  static proc_edge_t *flk_get_proc_edge(void);
 210  236  static void flk_proc_release(proc_vertex_t *);
 211  237  static void flk_free_proc_edge(proc_edge_t *);
 212  238  static void flk_update_proc_graph(edge_t *, int);
 213  239  
 214  240  /* Non-blocking mandatory locking */
 215  241  static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
 216  242                          u_offset_t);
 217  243  
 218  244  static struct flock_globals *
 219  245  flk_get_globals(void)
 220  246  {
 221  247          /*
 222  248           * The KLM module had better be loaded if we're attempting to handle
 223  249           * lockmgr requests.
 224  250           */
 225  251          ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
 226  252          return (zone_getspecific(flock_zone_key, curproc->p_zone));
 227  253  }
 228  254  
 229  255  static flk_lockmgr_status_t
 230  256  flk_get_lockmgr_status(void)
 231  257  {
 232  258          struct flock_globals *fg;
 233  259  
 234  260          ASSERT(MUTEX_HELD(&flock_lock));
 235  261  
 236  262          if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
 237  263                  /*
 238  264                   * KLM module not loaded; lock manager definitely not running.
 239  265                   */
 240  266                  return (FLK_LOCKMGR_DOWN);
 241  267          }
 242  268          fg = flk_get_globals();
 243  269          return (fg->flk_lockmgr_status);
 244  270  }
 245  271  
 246  272  /*
 247  273   * This implements Open File Description (not descriptor) style record locking.
 248  274   * These locks can also be thought of as pid-less since they are not tied to a
 249  275   * specific process, thus they're preserved across fork.
 250  276   *
 251  277   * Called directly from fcntl.
 252  278   *
 253  279   * See reclock() for the implementation of the traditional POSIX style record
 254  280   * locking scheme (pid-ful). This function is derived from reclock() but
 255  281   * simplified and modified to work for OFD style locking.
 256  282   *
 257  283   * The two primary advantages of OFD style of locking are:
 258  284   * 1) It is per-file description, so closing a file descriptor that refers to a
 259  285   *    different file description for the same file will not drop the lock (i.e.
 260  286   *    two open's of the same file get different descriptions but a dup or fork
 261  287   *    will refer to the same description).
 262  288   * 2) Locks are preserved across fork(2).
 263  289   *
 264  290   * Because these locks are per-description a lock ptr lives at the f_filocks
 265  291   * member of the file_t and the lock_descriptor includes a file_t pointer
 266  292   * to enable unique lock identification and management.
 267  293   *
 268  294   * Since these locks are pid-less we cannot do deadlock detection with the
 269  295   * current process-oriented implementation. This is consistent with OFD locking
 270  296   * behavior on other operating systems such as Linux. Since we don't do
 271  297   * deadlock detection we never interact with the process graph that is
 272  298   * maintained for deadlock detection on the traditional POSIX-style locks.
 273  299   *
 274  300   * Future Work:
 275  301   *
 276  302   * The current implementation does not support record locks. That is,
 277  303   * currently the single lock must cover the entire file. This is validated in
 278  304   * fcntl. To support record locks the f_filock pointer in the file_t needs to
 279  305   * be changed to a list of pointers to the locks. That list needs to be
 280  306   * managed independently of the lock list on the vnode itself and it needs to
 281  307   * be maintained as record locks are created, split, coalesced and deleted.
 282  308   *
 283  309   * The current implementation does not support remote file systems (e.g.
 284  310   * NFS or CIFS). This is handled in fs_frlock(). The design of how OFD locks
 285  311   * interact with the NLM is not clear since the NLM protocol/implementation
 286  312   * appears to be oriented around locks associated with a process. A further
 287  313   * problem is that a design is needed for what nlm_send_siglost() should do and
 288  314   * where it will send SIGLOST. More recent versions of Linux apparently try to
 289  315   * emulate OFD locks on NFS by converting them to traditional POSIX style locks
 290  316   * that work with the NLM. It is not clear that this provides the correct
 291  317   * semantics in all cases.
 292  318   */
 293  319  int
 294  320  ofdlock(file_t *fp, int fcmd, flock64_t *lckdat, int flag, u_offset_t offset)
 295  321  {
 296  322          int cmd = 0;
 297  323          vnode_t *vp;
 298  324          lock_descriptor_t       stack_lock_request;
 299  325          lock_descriptor_t       *lock_request;
 300  326          int error = 0;
 301  327          graph_t *gp;
 302  328          int serialize = 0;
 303  329  
 304  330          if (fcmd != F_OFD_GETLK)
 305  331                  cmd = SETFLCK;
 306  332  
 307  333          if (fcmd == F_OFD_SETLKW || fcmd == F_FLOCKW)
 308  334                  cmd |= SLPFLCK;
 309  335  
 310  336          /* see block comment */
 311  337          VERIFY(lckdat->l_whence == 0);
 312  338          VERIFY(lckdat->l_start == 0);
 313  339          VERIFY(lckdat->l_len == 0);
 314  340  
 315  341          vp = fp->f_vnode;
 316  342  
 317  343          /*
 318  344           * For reclock fs_frlock() would normally have set these in a few
 319  345           * places but for us it's cleaner to centralize it here. Note that
 320  346           * IGN_PID is -1. We use 0 for our pid-less locks.
 321  347           */
 322  348          lckdat->l_pid = 0;
 323  349          lckdat->l_sysid = 0;
 324  350  
 325  351          /*
 326  352           * Check access permissions
 327  353           */
 328  354          if ((fcmd == F_OFD_SETLK || fcmd == F_OFD_SETLKW) &&
 329  355              ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
 330  356              (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
 331  357                  return (EBADF);
 332  358  
 333  359          /*
 334  360           * for query and unlock we use the stack_lock_request
 335  361           */
 336  362          if (lckdat->l_type == F_UNLCK || !(cmd & SETFLCK)) {
 337  363                  lock_request = &stack_lock_request;
 338  364                  (void) bzero((caddr_t)lock_request,
 339  365                      sizeof (lock_descriptor_t));
 340  366  
 341  367                  /*
 342  368                   * following is added to make the assertions in
 343  369                   * flk_execute_request() pass
 344  370                   */
 345  371                  lock_request->l_edge.edge_in_next = &lock_request->l_edge;
 346  372                  lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
 347  373                  lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
 348  374                  lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
 349  375                  lock_request->l_status = FLK_INITIAL_STATE;
 350  376          } else {
 351  377                  lock_request = flk_get_lock();
 352  378                  fp->f_filock = (struct filock *)lock_request;
 353  379          }
 354  380          lock_request->l_state = 0;
 355  381          lock_request->l_vnode = vp;
 356  382          lock_request->l_zoneid = getzoneid();
 357  383          lock_request->l_ofd = fp;
 358  384  
 359  385          /*
 360  386           * Convert the request range into the canonical start and end
 361  387           * values then check the validity of the lock range.
 362  388           */
 363  389          error = flk_convert_lock_data(vp, lckdat, &lock_request->l_start,
 364  390              &lock_request->l_end, offset);
 365  391          if (error)
 366  392                  goto done;
 367  393  
 368  394          error = flk_check_lock_data(lock_request->l_start, lock_request->l_end,
 369  395              MAXEND);
 370  396          if (error)
 371  397                  goto done;
 372  398  
 373  399          ASSERT(lock_request->l_end >= lock_request->l_start);
 374  400  
 375  401          lock_request->l_type = lckdat->l_type;
 376  402          if (cmd & SLPFLCK)
 377  403                  lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
 378  404  
 379  405          if (!(cmd & SETFLCK)) {
 380  406                  if (lock_request->l_type == F_RDLCK ||
 381  407                      lock_request->l_type == F_WRLCK)
 382  408                          lock_request->l_state |= QUERY_LOCK;
 383  409          }
 384  410          lock_request->l_flock = (*lckdat);
 385  411  
 386  412          /*
 387  413           * We are ready for processing the request
 388  414           */
 389  415  
 390  416          if (fcmd != F_OFD_GETLK && lock_request->l_type != F_UNLCK &&
 391  417              nbl_need_check(vp)) {
 392  418                  nbl_start_crit(vp, RW_WRITER);
 393  419                  serialize = 1;
 394  420          }
 395  421  
 396  422          /* Get the lock graph for a particular vnode */
 397  423          gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
 398  424  
 399  425          mutex_enter(&gp->gp_mutex);
 400  426  
 401  427          lock_request->l_state |= REFERENCED_LOCK;
 402  428          lock_request->l_graph = gp;
 403  429  
 404  430          switch (lock_request->l_type) {
 405  431          case F_RDLCK:
 406  432          case F_WRLCK:
 407  433                  if (IS_QUERY_LOCK(lock_request)) {
 408  434                          flk_get_first_blocking_lock(lock_request);
 409  435                          if (lock_request->l_ofd != NULL)
 410  436                                  lock_request->l_flock.l_pid = -1;
 411  437                          (*lckdat) = lock_request->l_flock;
 412  438                  } else {
 413  439                          /* process the request now */
 414  440                          error = flk_process_request(lock_request);
 415  441                  }
 416  442                  break;
 417  443  
 418  444          case F_UNLCK:
 419  445                  /* unlock request will not block so execute it immediately */
 420  446                  error = flk_execute_request(lock_request);
 421  447                  break;
 422  448  
 423  449          default:
 424  450                  error = EINVAL;
 425  451                  break;
 426  452          }
 427  453  
 428  454          if (lock_request == &stack_lock_request) {
 429  455                  flk_set_state(lock_request, FLK_DEAD_STATE);
 430  456          } else {
 431  457                  lock_request->l_state &= ~REFERENCED_LOCK;
 432  458                  if ((error != 0) || IS_DELETED(lock_request)) {
 433  459                          flk_set_state(lock_request, FLK_DEAD_STATE);
 434  460                          flk_free_lock(lock_request);
 435  461                  }
 436  462          }
 437  463  
 438  464          mutex_exit(&gp->gp_mutex);
 439  465          if (serialize)
 440  466                  nbl_end_crit(vp);
 441  467  
 442  468          return (error);
 443  469  
 444  470  done:
 445  471          flk_set_state(lock_request, FLK_DEAD_STATE);
 446  472          if (lock_request != &stack_lock_request)
 447  473                  flk_free_lock(lock_request);
 448  474          return (error);
 449  475  }
 450  476  
 451  477  /*
 452  478   * Remove any lock on the vnode belonging to the given file_t.
 453  479   * Called from closef on last close, file_t is locked.
 454  480   *
 455  481   * This is modeled on the cleanlocks() function but only removes the single
 456  482   * lock associated with fp.
 457  483   */
 458  484  void
 459  485  ofdcleanlock(file_t *fp)
 460  486  {
 461  487          lock_descriptor_t *fplock, *lock, *nlock;
 462  488          vnode_t *vp;
 463  489          graph_t *gp;
 464  490  
 465  491          ASSERT(MUTEX_HELD(&fp->f_tlock));
 466  492  
 467  493          if ((fplock = (lock_descriptor_t *)fp->f_filock) == NULL)
 468  494                  return;
 469  495  
 470  496          fp->f_filock = NULL;
 471  497          vp = fp->f_vnode;
 472  498  
 473  499          gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
 474  500  
 475  501          if (gp == NULL)
 476  502                  return;
 477  503          mutex_enter(&gp->gp_mutex);
 478  504  
 479  505          CHECK_SLEEPING_LOCKS(gp);
 480  506          CHECK_ACTIVE_LOCKS(gp);
 481  507  
 482  508          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
 483  509  
 484  510          if (lock) {
 485  511                  do {
 486  512                          nlock = lock->l_next;
 487  513                          if (fplock == lock) {
 488  514                                  CANCEL_WAKEUP(lock);
 489  515                                  break;
 490  516                          }
 491  517                          lock = nlock;
 492  518                  } while (lock->l_vnode == vp);
 493  519          }
 494  520  
 495  521          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
 496  522  
 497  523          if (lock) {
 498  524                  do {
 499  525                          nlock = lock->l_next;
 500  526                          if (fplock == lock) {
 501  527                                  flk_delete_active_lock(lock, 0);
 502  528                                  flk_wakeup(lock, 1);
 503  529                                  flk_free_lock(lock);
 504  530                                  break;
 505  531                          }
 506  532                          lock = nlock;
 507  533                  } while (lock->l_vnode == vp);
 508  534          }
 509  535  
 510  536          CHECK_SLEEPING_LOCKS(gp);
 511  537          CHECK_ACTIVE_LOCKS(gp);
 512  538          mutex_exit(&gp->gp_mutex);
 513  539  }
 514  540  
 515  541  /*
 516  542   * Routine called from fs_frlock in fs/fs_subr.c
 517  543   *
 518  544   * This implements traditional POSIX style record locking. The two primary
 519  545   * drawbacks to this style of locking are:
 520  546   * 1) It is per-process, so any close of a file descriptor that refers to the
 521  547   *    file will drop the lock (e.g. lock /etc/passwd, call a library function
 522  548   *    which opens /etc/passwd to read the file, when the library closes it's
 523  549   *    file descriptor the application loses its lock and does not know).
 524  550   * 2) Locks are not preserved across fork(2).
 525  551   *
 526  552   * Because these locks are only associated with a PID, they are per-process.
 527  553   * This is why any close will drop the lock and is also why, once the process
 528  554   * forks, the lock is no longer related to the new process. These locks can
 529  555   * be considered as PID-ful.
 530  556   *
 531  557   * See ofdlock() for the implementation of a similar but improved locking
 532  558   * scheme.
 533  559   */
 534  560  int
 535  561  reclock(vnode_t *vp, flock64_t *lckdat, int cmd, int flag, u_offset_t offset,
 536  562      flk_callback_t *flk_cbp)
 537  563  {
 538  564          lock_descriptor_t       stack_lock_request;
 539  565          lock_descriptor_t       *lock_request;
 540  566          int error = 0;
 541  567          graph_t *gp;
 542  568          int                     nlmid;
 543  569  
 544  570          /*
 545  571           * Check access permissions
 546  572           */
 547  573          if ((cmd & SETFLCK) &&
 548  574              ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||

↓ open down ↓

373 lines elided

↑ open up ↑

 549  575              (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
 550  576                          return (EBADF);
 551  577  
 552  578          /*
 553  579           * for query and unlock we use the stack_lock_request
 554  580           */
 555  581  
 556  582          if ((lckdat->l_type == F_UNLCK) ||
 557  583              !((cmd & INOFLCK) || (cmd & SETFLCK))) {
 558  584                  lock_request = &stack_lock_request;
 559      -                (void) bzero((caddr_t)lock_request,
 560      -                    sizeof (lock_descriptor_t));
      585 +                bzero(lock_request, sizeof (lock_descriptor_t));
 561  586  
 562  587                  /*
 563  588                   * following is added to make the assertions in
 564  589                   * flk_execute_request() to pass through
 565  590                   */
 566  591  
 567  592                  lock_request->l_edge.edge_in_next = &lock_request->l_edge;
 568  593                  lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
 569  594                  lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
 570  595                  lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;

 571  596                  lock_request->l_status = FLK_INITIAL_STATE;
 572  597          } else {
 573  598                  lock_request = flk_get_lock();
 574  599          }
 575  600          lock_request->l_state = 0;
 576  601          lock_request->l_vnode = vp;
 577  602          lock_request->l_zoneid = getzoneid();
 578  603  
 579  604          /*
 580  605           * Convert the request range into the canonical start and end
 581  606           * values.  The NLM protocol supports locking over the entire
 582  607           * 32-bit range, so there's no range checking for remote requests,
 583  608           * but we still need to verify that local requests obey the rules.
 584  609           */
 585  610          /* Clustering */
 586  611          if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
 587  612                  ASSERT(lckdat->l_whence == 0);
 588  613                  lock_request->l_start = lckdat->l_start;
 589  614                  lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
 590  615                      lckdat->l_start + (lckdat->l_len - 1);
 591  616          } else {
 592  617                  /* check the validity of the lock range */
 593  618                  error = flk_convert_lock_data(vp, lckdat,
 594  619                      &lock_request->l_start, &lock_request->l_end,
 595  620                      offset);
 596  621                  if (error) {
 597  622                          goto done;
 598  623                  }
 599  624                  error = flk_check_lock_data(lock_request->l_start,
 600  625                      lock_request->l_end, MAXEND);
 601  626                  if (error) {
 602  627                          goto done;
 603  628                  }
 604  629          }
 605  630  
 606  631          ASSERT(lock_request->l_end >= lock_request->l_start);
 607  632  
 608  633          lock_request->l_type = lckdat->l_type;
 609  634          if (cmd & INOFLCK)
 610  635                  lock_request->l_state |= IO_LOCK;
 611  636          if (cmd & SLPFLCK)
 612  637                  lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
 613  638          if (cmd & RCMDLCK)
 614  639                  lock_request->l_state |= LOCKMGR_LOCK;
 615  640          if (cmd & NBMLCK)
 616  641                  lock_request->l_state |= NBMAND_LOCK;
 617  642          /*
 618  643           * Clustering: set flag for PXFS locks
 619  644           * We do not _only_ check for the PCMDLCK flag because PXFS locks could
 620  645           * also be of type 'RCMDLCK'.
 621  646           * We do not _only_ check the GETPXFSID() macro because local PXFS
 622  647           * clients use a pxfsid of zero to permit deadlock detection in the LLM.
 623  648           */
 624  649  
 625  650          if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
 626  651                  lock_request->l_state |= PXFS_LOCK;
 627  652          }
 628  653          if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
 629  654                  if (lock_request->l_type == F_RDLCK ||
 630  655                      lock_request->l_type == F_WRLCK)
 631  656                          lock_request->l_state |= QUERY_LOCK;
 632  657          }
 633  658          lock_request->l_flock = (*lckdat);
 634  659          lock_request->l_callbacks = flk_cbp;
 635  660  
 636  661          /*
 637  662           * We are ready for processing the request
 638  663           */
 639  664          if (IS_LOCKMGR(lock_request)) {
 640  665                  /*
 641  666                   * If the lock request is an NLM server request ....
 642  667                   */
 643  668                  if (nlm_status_size == 0) { /* not booted as cluster */
 644  669                          mutex_enter(&flock_lock);
 645  670                          /*
 646  671                           * Bail out if this is a lock manager request and the
 647  672                           * lock manager is not supposed to be running.
 648  673                           */
 649  674                          if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
 650  675                                  mutex_exit(&flock_lock);
 651  676                                  error = ENOLCK;
 652  677                                  goto done;
 653  678                          }
 654  679                          mutex_exit(&flock_lock);
 655  680                  } else {                        /* booted as a cluster */
 656  681                          nlmid = GETNLMID(lock_request->l_flock.l_sysid);
 657  682                          ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
 658  683  
 659  684                          mutex_enter(&nlm_reg_lock);
 660  685                          /*
 661  686                           * If the NLM registry does not know about this
 662  687                           * NLM server making the request, add its nlmid
 663  688                           * to the registry.
 664  689                           */
 665  690                          if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
 666  691                              nlmid)) {
 667  692                                  FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
 668  693                          } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
 669  694                              nlmid)) {
 670  695                                  /*
 671  696                                   * If the NLM server is already known (has made
 672  697                                   * previous lock requests) and its state is
 673  698                                   * not NLM_UP (means that NLM server is
 674  699                                   * shutting down), then bail out with an
 675  700                                   * error to deny the lock request.
 676  701                                   */
 677  702                                  mutex_exit(&nlm_reg_lock);
 678  703                                  error = ENOLCK;
 679  704                                  goto done;
 680  705                          }
 681  706                          mutex_exit(&nlm_reg_lock);
 682  707                  }
 683  708          }
 684  709  
 685  710          /* Now get the lock graph for a particular vnode */
 686  711          gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
 687  712  
 688  713          /*
 689  714           * We drop rwlock here otherwise this might end up causing a
 690  715           * deadlock if this IOLOCK sleeps. (bugid # 1183392).
 691  716           */
 692  717  
 693  718          if (IS_IO_LOCK(lock_request)) {
 694  719                  VOP_RWUNLOCK(vp,
 695  720                      (lock_request->l_type == F_RDLCK) ?
 696  721                      V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
 697  722          }
 698  723          mutex_enter(&gp->gp_mutex);
 699  724  
 700  725          lock_request->l_state |= REFERENCED_LOCK;
 701  726          lock_request->l_graph = gp;
 702  727  
 703  728          switch (lock_request->l_type) {
 704  729          case F_RDLCK:
 705  730          case F_WRLCK:
 706  731                  if (IS_QUERY_LOCK(lock_request)) {
 707  732                          flk_get_first_blocking_lock(lock_request);
 708  733                          if (lock_request->l_ofd != NULL)
 709  734                                  lock_request->l_flock.l_pid = -1;
 710  735                          (*lckdat) = lock_request->l_flock;
 711  736                          break;
 712  737                  }
 713  738  
 714  739                  /* process the request now */
 715  740  
 716  741                  error = flk_process_request(lock_request);
 717  742                  break;
 718  743  
 719  744          case F_UNLCK:
 720  745                  /* unlock request will not block so execute it immediately */
 721  746  
 722  747                  if (IS_LOCKMGR(lock_request) &&
 723  748                      flk_canceled(lock_request)) {
 724  749                          error = 0;
 725  750                  } else {
 726  751                          error = flk_execute_request(lock_request);
 727  752                  }
 728  753                  break;
 729  754  
 730  755          case F_UNLKSYS:
 731  756                  /*
 732  757                   * Recovery mechanism to release lock manager locks when
 733  758                   * NFS client crashes and restart. NFS server will clear
 734  759                   * old locks and grant new locks.
 735  760                   */
 736  761  
 737  762                  if (lock_request->l_flock.l_sysid == 0) {
 738  763                          mutex_exit(&gp->gp_mutex);
 739  764                          return (EINVAL);
 740  765                  }
 741  766                  if (secpolicy_nfs(CRED()) != 0) {
 742  767                          mutex_exit(&gp->gp_mutex);
 743  768                          return (EPERM);
 744  769                  }
 745  770                  flk_delete_locks_by_sysid(lock_request);
 746  771                  lock_request->l_state &= ~REFERENCED_LOCK;
 747  772                  flk_set_state(lock_request, FLK_DEAD_STATE);
 748  773                  flk_free_lock(lock_request);
 749  774                  mutex_exit(&gp->gp_mutex);
 750  775                  return (0);
 751  776  
 752  777          default:
 753  778                  error = EINVAL;
 754  779                  break;
 755  780          }
 756  781  
 757  782          /* Clustering: For blocked PXFS locks, return */
 758  783          if (error == PXFS_LOCK_BLOCKED) {
 759  784                  lock_request->l_state &= ~REFERENCED_LOCK;
 760  785                  mutex_exit(&gp->gp_mutex);
 761  786                  return (error);
 762  787          }
 763  788  
 764  789          /*
 765  790           * Now that we have seen the status of locks in the system for
 766  791           * this vnode we acquire the rwlock if it is an IO_LOCK.
 767  792           */
 768  793  
 769  794          if (IS_IO_LOCK(lock_request)) {
 770  795                  (void) VOP_RWLOCK(vp,
 771  796                      (lock_request->l_type == F_RDLCK) ?
 772  797                      V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
 773  798                  if (!error) {
 774  799                          lckdat->l_type = F_UNLCK;
 775  800  
 776  801                          /*
 777  802                           * This wake up is needed otherwise
 778  803                           * if IO_LOCK has slept the dependents on this
 779  804                           * will not be woken up at all. (bugid # 1185482).
 780  805                           */
 781  806  
 782  807                          flk_wakeup(lock_request, 1);
 783  808                          flk_set_state(lock_request, FLK_DEAD_STATE);
 784  809                          flk_free_lock(lock_request);
 785  810                  }
 786  811                  /*
 787  812                   * else if error had occurred either flk_process_request()
 788  813                   * has returned EDEADLK in which case there will be no
 789  814                   * dependents for this lock or EINTR from flk_wait_execute_
 790  815                   * request() in which case flk_cancel_sleeping_lock()
 791  816                   * would have been done. same is true with EBADF.
 792  817                   */
 793  818          }
 794  819  
 795  820          if (lock_request == &stack_lock_request) {
 796  821                  flk_set_state(lock_request, FLK_DEAD_STATE);
 797  822          } else {
 798  823                  lock_request->l_state &= ~REFERENCED_LOCK;
 799  824                  if ((error != 0) || IS_DELETED(lock_request)) {
 800  825                          flk_set_state(lock_request, FLK_DEAD_STATE);
 801  826                          flk_free_lock(lock_request);
 802  827                  }
 803  828          }
 804  829  
 805  830          mutex_exit(&gp->gp_mutex);
 806  831          return (error);
 807  832  
 808  833  done:
 809  834          flk_set_state(lock_request, FLK_DEAD_STATE);
 810  835          if (lock_request != &stack_lock_request)
 811  836                  flk_free_lock(lock_request);
 812  837          return (error);
 813  838  }
 814  839  
 815  840  /*
 816  841   * Invoke the callbacks in the given list.  If before sleeping, invoke in
 817  842   * list order.  If after sleeping, invoke in reverse order.
 818  843   *
 819  844   * CPR (suspend/resume) support: if one of the callbacks returns a
 820  845   * callb_cpr_t, return it.   This will be used to make the thread CPR-safe
 821  846   * while it is sleeping.  There should be at most one callb_cpr_t for the
 822  847   * thread.
 823  848   * XXX This is unnecessarily complicated.  The CPR information should just
 824  849   * get passed in directly through VOP_FRLOCK and reclock, rather than
 825  850   * sneaking it in via a callback.
 826  851   */
 827  852  
 828  853  callb_cpr_t *
 829  854  flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
 830  855  {
 831  856          callb_cpr_t *cpr_callbackp = NULL;
 832  857          callb_cpr_t *one_result;
 833  858          flk_callback_t *cb;
 834  859  
 835  860          if (cblist == NULL)
 836  861                  return (NULL);
 837  862  
 838  863          if (when == FLK_BEFORE_SLEEP) {
 839  864                  cb = cblist;
 840  865                  do {
 841  866                          one_result = (*cb->cb_callback)(when, cb->cb_data);
 842  867                          if (one_result != NULL) {
 843  868                                  ASSERT(cpr_callbackp == NULL);
 844  869                                  cpr_callbackp = one_result;
 845  870                          }
 846  871                          cb = cb->cb_next;
 847  872                  } while (cb != cblist);
 848  873          } else {
 849  874                  cb = cblist->cb_prev;
 850  875                  do {
 851  876                          one_result = (*cb->cb_callback)(when, cb->cb_data);
 852  877                          if (one_result != NULL) {
 853  878                                  cpr_callbackp = one_result;
 854  879                          }
 855  880                          cb = cb->cb_prev;
 856  881                  } while (cb != cblist->cb_prev);
 857  882          }
 858  883  
 859  884          return (cpr_callbackp);
 860  885  }
 861  886  
 862  887  /*
 863  888   * Initialize a flk_callback_t to hold the given callback.
 864  889   */
 865  890  
 866  891  void
 867  892  flk_init_callback(flk_callback_t *flk_cb,
 868  893      callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
 869  894  {
 870  895          flk_cb->cb_next = flk_cb;
 871  896          flk_cb->cb_prev = flk_cb;
 872  897          flk_cb->cb_callback = cb_fcn;
 873  898          flk_cb->cb_data = cbdata;
 874  899  }
 875  900  
 876  901  /*
 877  902   * Initialize an flk_callback_t and then link it into the head of an
 878  903   * existing list (which may be NULL).
 879  904   */
 880  905  
 881  906  void
 882  907  flk_add_callback(flk_callback_t *newcb,
 883  908      callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
 884  909      void *cbdata, flk_callback_t *cblist)
 885  910  {
 886  911          flk_init_callback(newcb, cb_fcn, cbdata);
 887  912  
 888  913          if (cblist == NULL)
 889  914                  return;
 890  915  
 891  916          newcb->cb_prev = cblist->cb_prev;
 892  917          newcb->cb_next = cblist;
 893  918          cblist->cb_prev->cb_next = newcb;
 894  919          cblist->cb_prev = newcb;
 895  920  }
 896  921  
 897  922  /*
 898  923   * Remove the callback from a list.
 899  924   */
 900  925  
 901  926  void
 902  927  flk_del_callback(flk_callback_t *flk_cb)
 903  928  {
 904  929          flk_cb->cb_next->cb_prev = flk_cb->cb_prev;
 905  930          flk_cb->cb_prev->cb_next = flk_cb->cb_next;
 906  931  
 907  932          flk_cb->cb_prev = flk_cb;
 908  933          flk_cb->cb_next = flk_cb;
 909  934  }
 910  935  
 911  936  /*
 912  937   * Initialize the flk_edge_cache data structure and create the
 913  938   * nlm_reg_status array.
 914  939   */
 915  940  
 916  941  void
 917  942  flk_init(void)
 918  943  {
 919  944          uint_t  i;
 920  945  
 921  946          flk_edge_cache = kmem_cache_create("flk_edges",
 922  947              sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
 923  948          if (flk_edge_cache == NULL) {
 924  949                  cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
 925  950          }
 926  951          /*
 927  952           * Create the NLM registry object.
 928  953           */
 929  954  
 930  955          if (cluster_bootflags & CLUSTER_BOOTED) {
 931  956                  /*
 932  957                   * This routine tells you the maximum node id that will be used
 933  958                   * in the cluster.  This number will be the size of the nlm
 934  959                   * registry status array.  We add 1 because we will be using
 935  960                   * all entries indexed from 0 to maxnodeid; e.g., from 0
 936  961                   * to 64, for a total of 65 entries.
 937  962                   */
 938  963                  nlm_status_size = clconf_maximum_nodeid() + 1;
 939  964          } else {
 940  965                  nlm_status_size = 0;
 941  966          }
 942  967

↓ open down ↓

372 lines elided

↑ open up ↑

 943  968          if (nlm_status_size != 0) {     /* booted as a cluster */
 944  969                  nlm_reg_status = (flk_nlm_status_t *)
 945  970                      kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
 946  971                      KM_SLEEP);
 947  972  
 948  973                  /* initialize all NLM states in array to NLM_UNKNOWN */
 949  974                  for (i = 0; i < nlm_status_size; i++) {
 950  975                          nlm_reg_status[i] = FLK_NLM_UNKNOWN;
 951  976                  }
 952  977          }
      978 +
      979 +        mutex_init(&flock_lock, NULL, MUTEX_DEFAULT, NULL);
      980 +        mutex_init(&nlm_reg_lock, NULL, MUTEX_DEFAULT, NULL);
      981 +
      982 +        rw_init(&sysid_to_host_translator_lock, NULL, RW_DEFAULT, NULL);
      983 +        list_create(&sysid_to_host_translator_list,
      984 +            sizeof (struct sysid_to_host_translator_entry),
      985 +            offsetof(struct sysid_to_host_translator_entry, node));
 953  986  }
 954  987  
 955  988  /*
 956  989   * Zone constructor/destructor callbacks to be executed when a zone is
 957  990   * created/destroyed.
 958  991   */
 959  992  /* ARGSUSED */
 960  993  void *
 961  994  flk_zone_init(zoneid_t zoneid)
 962  995  {

 963  996          struct flock_globals *fg;
 964  997          uint_t i;
 965  998  
 966  999          fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
 967 1000          fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
 968 1001          for (i = 0; i < HASH_SIZE; i++)
 969 1002                  fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
 970 1003          return (fg);
 971 1004  }
 972 1005  
 973 1006  /* ARGSUSED */
 974 1007  void
 975 1008  flk_zone_fini(zoneid_t zoneid, void *data)
 976 1009  {
 977 1010          struct flock_globals *fg = data;
 978 1011  
 979 1012          kmem_free(fg, sizeof (*fg));
 980 1013  }
 981 1014  
 982 1015  /*
 983 1016   * Get a lock_descriptor structure with initialization of edge lists.
 984 1017   */
 985 1018  
 986 1019  static lock_descriptor_t *
 987 1020  flk_get_lock(void)
 988 1021  {
 989 1022          lock_descriptor_t       *l;
 990 1023  
 991 1024          l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
 992 1025  
 993 1026          cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
 994 1027          l->l_edge.edge_in_next = &l->l_edge;
 995 1028          l->l_edge.edge_in_prev = &l->l_edge;
 996 1029          l->l_edge.edge_adj_next = &l->l_edge;
 997 1030          l->l_edge.edge_adj_prev = &l->l_edge;
 998 1031          l->pvertex = -1;
 999 1032          l->l_status = FLK_INITIAL_STATE;
1000 1033          flk_lock_allocs++;
1001 1034          return (l);
1002 1035  }
1003 1036

↓ open down ↓

41 lines elided

↑ open up ↑

1004 1037  /*
1005 1038   * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
1006 1039   * when some thread has a reference to it as in reclock().
1007 1040   */
1008 1041  
1009 1042  void
1010 1043  flk_free_lock(lock_descriptor_t *lock)
1011 1044  {
1012 1045          file_t *fp;
1013 1046  
     1047 +        ASSERT(lock->l_blocker >= 0);
1014 1048          ASSERT(IS_DEAD(lock));
1015 1049  
1016 1050          if ((fp = lock->l_ofd) != NULL && fp->f_filock == (struct filock *)lock)
1017 1051                  fp->f_filock = NULL;
1018 1052  
1019 1053          if (IS_REFERENCED(lock)) {
1020 1054                  lock->l_state |= DELETED_LOCK;
1021 1055                  return;
1022 1056          }
1023 1057          flk_lock_frees++;
1024      -        kmem_free((void *)lock, sizeof (lock_descriptor_t));
     1058 +        kmem_free(lock, sizeof (lock_descriptor_t));
1025 1059  }
1026 1060  
1027 1061  void
1028 1062  flk_set_state(lock_descriptor_t *lock, int new_state)
1029 1063  {
1030 1064          /*
1031 1065           * Locks in the sleeping list may be woken up in a number of ways,
1032 1066           * and more than once.  If a sleeping lock is signaled awake more
1033 1067           * than once, then it may or may not change state depending on its
1034 1068           * current state.

1035 1069           * Also note that NLM locks that are sleeping could be moved to an
1036 1070           * interrupted state more than once if the unlock request is
1037 1071           * retransmitted by the NLM client - the second time around, this is
1038 1072           * just a nop.
1039 1073           * The ordering of being signaled awake is:
1040 1074           * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
1041 1075           * The checks below implement this ordering.
1042 1076           */
1043 1077          if (IS_INTERRUPTED(lock)) {
1044 1078                  if ((new_state == FLK_CANCELLED_STATE) ||
1045 1079                      (new_state == FLK_GRANTED_STATE) ||
1046 1080                      (new_state == FLK_INTERRUPTED_STATE)) {
1047 1081                          return;
1048 1082                  }
1049 1083          }
1050 1084          if (IS_CANCELLED(lock)) {
1051 1085                  if ((new_state == FLK_GRANTED_STATE) ||
1052 1086                      (new_state == FLK_CANCELLED_STATE)) {
1053 1087                          return;

↓ open down ↓

19 lines elided

↑ open up ↑

1054 1088                  }
1055 1089          }
1056 1090          CHECK_LOCK_TRANSITION(lock->l_status, new_state);
1057 1091          if (IS_PXFS(lock)) {
1058 1092                  cl_flk_state_transition_notify(lock, lock->l_status, new_state);
1059 1093          }
1060 1094          lock->l_status = new_state;
1061 1095  }
1062 1096  
1063 1097  /*
     1098 + * Support for the remote stale lock detection
     1099 + */
     1100 +
     1101 +void
     1102 +flk_add_sysid_to_host_translator(sysid_to_host_translator_t tr)
     1103 +{
     1104 +        struct sysid_to_host_translator_entry *te;
     1105 +
     1106 +        te = kmem_alloc(sizeof (struct sysid_to_host_translator_entry),
     1107 +            KM_SLEEP);
     1108 +
     1109 +        te->translator = tr;
     1110 +
     1111 +        rw_enter(&sysid_to_host_translator_lock, RW_WRITER);
     1112 +        list_insert_head(&sysid_to_host_translator_list, te);
     1113 +        rw_exit(&sysid_to_host_translator_lock);
     1114 +}
     1115 +
     1116 +static void
     1117 +translate_sysid_to_host(zoneid_t zoneid, sysid_t sysid, char *host, size_t hlen,
     1118 +    const char **type)
     1119 +{
     1120 +        struct sockaddr sa;
     1121 +        struct sysid_to_host_translator_entry *te;
     1122 +
     1123 +        /* Some defaults in a case the translation will fail */
     1124 +        *type = "?";
     1125 +        (void) strlcpy(host, "?", hlen);
     1126 +
     1127 +        rw_enter(&sysid_to_host_translator_lock, RW_READER);
     1128 +
     1129 +        for (te = list_head(&sysid_to_host_translator_list); te != NULL;
     1130 +            te = list_next(&sysid_to_host_translator_list, te)) {
     1131 +
     1132 +                if (te->translator(zoneid, sysid, &sa, type) != 0) {
     1133 +                        rw_exit(&sysid_to_host_translator_lock);
     1134 +
     1135 +                        switch (sa.sa_family) {
     1136 +                        case AF_INET:
     1137 +                                (void) inet_ntop(AF_INET,
     1138 +                                    &((struct sockaddr_in *)&sa)->sin_addr,
     1139 +                                    host, hlen);
     1140 +                                break;
     1141 +                        case AF_INET6:
     1142 +                                (void) inet_ntop(AF_INET6,
     1143 +                                    &((struct sockaddr_in6 *)&sa)->sin6_addr,
     1144 +                                    host, hlen);
     1145 +                                break;
     1146 +                        default:
     1147 +                                break;
     1148 +                        }
     1149 +
     1150 +                        return;
     1151 +                }
     1152 +        }
     1153 +
     1154 +        rw_exit(&sysid_to_host_translator_lock);
     1155 +}
     1156 +
     1157 +static char *
     1158 +get_vnode_path(vnode_t *vp)
     1159 +{
     1160 +        size_t len;
     1161 +        char *ret;
     1162 +
     1163 +        mutex_enter(&vp->v_lock);
     1164 +        if (vp->v_path == NULL) {
     1165 +                mutex_exit(&vp->v_lock);
     1166 +                return (NULL);
     1167 +        }
     1168 +        len = strlen(vp->v_path) + 1;
     1169 +        mutex_exit(&vp->v_lock);
     1170 +
     1171 +        ret = kmem_alloc(len, KM_SLEEP);
     1172 +
     1173 +        mutex_enter(&vp->v_lock);
     1174 +        if (vp->v_path == NULL || strlen(vp->v_path) + 1 != len) {
     1175 +                mutex_exit(&vp->v_lock);
     1176 +                kmem_free(ret, len);
     1177 +                return (NULL);
     1178 +        }
     1179 +        bcopy(vp->v_path, ret, len);
     1180 +        mutex_exit(&vp->v_lock);
     1181 +
     1182 +        return (ret);
     1183 +}
     1184 +
     1185 +static void
     1186 +flk_stale_lock_check(lock_descriptor_t *lock)
     1187 +{
     1188 +        char *path;
     1189 +
     1190 +        char host[INET6_ADDRSTRLEN];            /* host name */
     1191 +        const char *type;                       /* host type */
     1192 +
     1193 +        /* temporary variables for the cmn_err() call */
     1194 +        char *p, *t;            /* path, lock type */
     1195 +        pid_t pid;              /* pid */
     1196 +        void *v;                /* vnode */
     1197 +        u_offset_t s, e;        /* start, end */
     1198 +
     1199 +        ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
     1200 +
     1201 +        /*
     1202 +         * Either not a remote lock, or the stale lock checking is disabled, or
     1203 +         * the lock is already reported.
     1204 +         */
     1205 +        if (IS_LOCAL(lock) || stale_lock_timeout == 0 || lock->l_blocker < 0)
     1206 +                return;
     1207 +
     1208 +        /* Seen first time? */
     1209 +        if (lock->l_blocker == 0) {
     1210 +                lock->l_blocker = gethrtime();
     1211 +                return;
     1212 +        }
     1213 +
     1214 +        /* Old enough? */
     1215 +        if ((gethrtime() - lock->l_blocker) / NANOSEC < stale_lock_timeout)
     1216 +                return;
     1217 +
     1218 +        translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
     1219 +            sizeof (host), &type);
     1220 +        path = get_vnode_path(lock->l_vnode);
     1221 +
     1222 +        pid = lock->l_flock.l_pid;
     1223 +        v = (void *)lock->l_vnode;
     1224 +        p = path == NULL ? "?" : path;
     1225 +        t = lock->l_type == F_WRLCK ? "WR" : "RD";
     1226 +        s = lock->l_start;
     1227 +        e = lock->l_end;
     1228 +
     1229 +        /* Report the blocker as stale */
     1230 +        cmn_err(CE_NOTE, "!Stale lock (host: %s (%s), pid: %d, vnode: %p, "
     1231 +            "path: %s, %sLCK: %llu:%llu)", host, type, pid, v, p, t, s, e);
     1232 +
     1233 +        if (path != NULL)
     1234 +                strfree(path);
     1235 +
     1236 +        /* Mark this blocker as reported */
     1237 +        lock->l_blocker = -lock->l_blocker;
     1238 +}
     1239 +
     1240 +static void
     1241 +flk_stale_lock_shrink(lock_descriptor_t *lock, lock_descriptor_t *new)
     1242 +{
     1243 +        char *path;
     1244 +
     1245 +        char host[INET6_ADDRSTRLEN];            /* host name */
     1246 +        const char *type;                       /* host type */
     1247 +
     1248 +        /* temporary variables for the cmn_err() call */
     1249 +        char *p, *t;            /* path, lock type */
     1250 +        pid_t pid;              /* pid */
     1251 +        void *v;                /* vnode */
     1252 +        u_offset_t s, e;        /* start, end */
     1253 +        u_offset_t ns, ne;      /* new start, new end */
     1254 +
     1255 +        ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
     1256 +
     1257 +        translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
     1258 +            sizeof (host), &type);
     1259 +        path = get_vnode_path(lock->l_vnode);
     1260 +
     1261 +        pid = lock->l_flock.l_pid;
     1262 +        v = (void *)lock->l_vnode;
     1263 +        p = path == NULL ? "?" : path;
     1264 +        t = lock->l_type == F_WRLCK ? "WR" : "RD";
     1265 +        s = lock->l_start;
     1266 +        e = lock->l_end;
     1267 +        ns = new->l_start;
     1268 +        ne = new->l_end;
     1269 +
     1270 +        cmn_err(CE_NOTE, "!Stale lock SHRINK (host: %s (%s), pid: %d, "
     1271 +            "vnode: %p, path: %s, %sLCK: %llu:%llu -> %llu:%llu)", host, type,
     1272 +            pid, v, p, t, s, e, ns, ne);
     1273 +
     1274 +        if (path != NULL)
     1275 +                strfree(path);
     1276 +}
     1277 +
     1278 +static void
     1279 +flk_stale_lock_split(lock_descriptor_t *lock, lock_descriptor_t *new1,
     1280 +    lock_descriptor_t *new2)
     1281 +{
     1282 +        char *path;
     1283 +
     1284 +        char host[INET6_ADDRSTRLEN];            /* host name */
     1285 +        const char *type;                       /* host type */
     1286 +
     1287 +        /* temporary variables for the cmn_err() call */
     1288 +        char *p, *t;            /* path, lock type */
     1289 +        pid_t pid;              /* pid */
     1290 +        void *v;                /* vnode */
     1291 +        u_offset_t s, e;        /* start, end */
     1292 +        u_offset_t n1s, n1e;    /* new1 start, new1 end */
     1293 +        u_offset_t n2s, n2e;    /* new2 start, new2 end */
     1294 +
     1295 +        ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
     1296 +
     1297 +        translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
     1298 +            sizeof (host), &type);
     1299 +        path = get_vnode_path(lock->l_vnode);
     1300 +
     1301 +        pid = lock->l_flock.l_pid;
     1302 +        v = (void *)lock->l_vnode;
     1303 +        p = path == NULL ? "?" : path;
     1304 +        t = lock->l_type == F_WRLCK ? "WR" : "RD";
     1305 +        s = lock->l_start;
     1306 +        e = lock->l_end;
     1307 +        n1s = new1->l_start;
     1308 +        n1e = new1->l_end;
     1309 +        n2s = new2->l_start;
     1310 +        n2e = new2->l_end;
     1311 +
     1312 +        cmn_err(CE_NOTE, "!Stale lock SPLIT (host: %s (%s), pid: %d, "
     1313 +            "vnode: %p, path: %s, %sLCK: %llu:%llu -> %llu:%llu and %llu:%llu)",
     1314 +            host, type, pid, v, p, t, s, e, n1s, n1e, n2s, n2e);
     1315 +
     1316 +        if (path != NULL)
     1317 +                strfree(path);
     1318 +}
     1319 +
     1320 +static void
     1321 +flk_stale_lock_release(lock_descriptor_t *lock)
     1322 +{
     1323 +        char *path;
     1324 +
     1325 +        char host[INET6_ADDRSTRLEN];            /* host name */
     1326 +        const char *type;                       /* host type */
     1327 +
     1328 +        /* temporary variables for the cmn_err() call */
     1329 +        char *p, *t;            /* path, lock type */
     1330 +        pid_t pid;              /* pid */
     1331 +        void *v;                /* vnode */
     1332 +        u_offset_t s, e;        /* start, end */
     1333 +
     1334 +        ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
     1335 +
     1336 +        translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
     1337 +            sizeof (host), &type);
     1338 +        path = get_vnode_path(lock->l_vnode);
     1339 +
     1340 +        pid = lock->l_flock.l_pid;
     1341 +        v = (void *)lock->l_vnode;
     1342 +        p = path == NULL ? "?" : path;
     1343 +        t = lock->l_type == F_WRLCK ? "WR" : "RD";
     1344 +        s = lock->l_start;
     1345 +        e = lock->l_end;
     1346 +
     1347 +        cmn_err(CE_NOTE, "!Stale lock RELEASE (host: %s (%s), pid: %d, "
     1348 +            "vnode: %p, path: %s, %sLCK: %llu:%llu)", host, type, pid, v, p, t,
     1349 +            s, e);
     1350 +
     1351 +        if (path != NULL)
     1352 +                strfree(path);
     1353 +}
     1354 +
     1355 +/*
1064 1356   * Routine that checks whether there are any blocking locks in the system.
1065 1357   *
1066 1358   * The policy followed is if a write lock is sleeping we don't allow read
1067 1359   * locks before this write lock even though there may not be any active
1068 1360   * locks corresponding to the read locks' region.
1069 1361   *
1070 1362   * flk_add_edge() function adds an edge between l1 and l2 iff there
1071 1363   * is no path between l1 and l2. This is done to have a "minimum
1072 1364   * storage representation" of the dependency graph.
1073 1365   *

1074 1366   * Another property of the graph is since only the new request throws
1075 1367   * edges to the existing locks in the graph, the graph is always topologically
1076 1368   * ordered.
1077 1369   */
1078 1370  
1079 1371  static int
1080 1372  flk_process_request(lock_descriptor_t *request)
1081 1373  {
1082 1374          graph_t *gp = request->l_graph;
1083 1375          lock_descriptor_t *lock;
1084 1376          int request_blocked_by_active = 0;
1085 1377          int request_blocked_by_granted = 0;
1086 1378          int request_blocked_by_sleeping = 0;
1087 1379          vnode_t *vp = request->l_vnode;
1088 1380          int     error = 0;
1089 1381          int request_will_wait = 0;
1090 1382          int found_covering_lock = 0;
1091 1383          lock_descriptor_t *covered_by = NULL;

↓ open down ↓

18 lines elided

↑ open up ↑

1092 1384  
1093 1385          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1094 1386          request_will_wait = IS_WILLING_TO_SLEEP(request);
1095 1387  
1096 1388          /*
1097 1389           * check active locks
1098 1390           */
1099 1391  
1100 1392          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1101 1393  
1102      -
1103 1394          if (lock) {
1104 1395                  do {
1105 1396                          if (BLOCKS(lock, request)) {
1106      -                                if (!request_will_wait)
     1397 +                                if (!request_will_wait) {
     1398 +                                        flk_stale_lock_check(lock);
1107 1399                                          return (EAGAIN);
     1400 +                                }
1108 1401                                  request_blocked_by_active = 1;
1109 1402                                  break;
1110 1403                          }
1111 1404                          /*
1112 1405                           * Grant lock if it is for the same owner holding active
1113 1406                           * lock that covers the request.
1114 1407                           */
1115 1408  
1116 1409                          if (SAME_OWNER(lock, request) &&
1117 1410                              COVERS(lock, request) &&
1118 1411                              (request->l_type == F_RDLCK))
1119 1412                                  return (flk_execute_request(request));
1120 1413                          lock = lock->l_next;
1121 1414                  } while (lock->l_vnode == vp);
1122 1415          }
1123 1416  
1124 1417          if (!request_blocked_by_active) {
1125      -                        lock_descriptor_t *lk[1];
1126      -                        lock_descriptor_t *first_glock = NULL;
     1418 +                lock_descriptor_t *lk[1];
     1419 +                lock_descriptor_t *first_glock = NULL;
     1420 +
1127 1421                  /*
1128 1422                   * Shall we grant this?! NO!!
1129 1423                   * What about those locks that were just granted and still
1130 1424                   * in sleep queue. Those threads are woken up and so locks
1131 1425                   * are almost active.
1132 1426                   */
1133 1427                  SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1134 1428                  if (lock) {
1135 1429                          do {
1136 1430                                  if (BLOCKS(lock, request)) {

1137 1431                                          if (IS_GRANTED(lock)) {
1138 1432                                                  request_blocked_by_granted = 1;
1139 1433                                          } else {
1140 1434                                                  request_blocked_by_sleeping = 1;
1141 1435                                          }
1142 1436                                  }
1143 1437  
1144 1438                                  lock = lock->l_next;
1145 1439                          } while ((lock->l_vnode == vp));
1146 1440                          first_glock = lock->l_prev;
1147 1441                          ASSERT(first_glock->l_vnode == vp);
1148 1442                  }
1149 1443  
1150 1444                  if (request_blocked_by_granted)
1151 1445                          goto block;
1152 1446  
1153 1447                  if (!request_blocked_by_sleeping) {
1154 1448                          /*
1155 1449                           * If the request isn't going to be blocked by a
1156 1450                           * sleeping request, we know that it isn't going to
1157 1451                           * be blocked; we can just execute the request --
1158 1452                           * without performing costly deadlock detection.
1159 1453                           */
1160 1454                          ASSERT(!request_blocked_by_active);
1161 1455                          return (flk_execute_request(request));
1162 1456                  } else if (request->l_type == F_RDLCK) {
1163 1457                          /*
1164 1458                           * If we have a sleeping writer in the requested
1165 1459                           * lock's range, block.
1166 1460                           */
1167 1461                          goto block;
1168 1462                  }
1169 1463  
1170 1464                  lk[0] = request;
1171 1465                  request->l_state |= RECOMPUTE_LOCK;
1172 1466                  SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);

↓ open down ↓

36 lines elided

↑ open up ↑

1173 1467                  if (lock) {
1174 1468                          do {
1175 1469                                  flk_recompute_dependencies(lock, lk, 1, 0);
1176 1470                                  lock = lock->l_next;
1177 1471                          } while (lock->l_vnode == vp);
1178 1472                  }
1179 1473                  lock = first_glock;
1180 1474                  if (lock) {
1181 1475                          do {
1182 1476                                  if (IS_GRANTED(lock)) {
1183      -                                flk_recompute_dependencies(lock, lk, 1, 0);
     1477 +                                        flk_recompute_dependencies(lock, lk, 1,
     1478 +                                            0);
1184 1479                                  }
1185 1480                                  lock = lock->l_prev;
1186 1481                          } while ((lock->l_vnode == vp));
1187 1482                  }
1188 1483                  request->l_state &= ~RECOMPUTE_LOCK;
1189 1484                  if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
1190 1485                          return (EDEADLK);
1191 1486                  return (flk_execute_request(request));
1192 1487          }
1193 1488

1194 1489  block:
1195 1490          if (request_will_wait)
1196 1491                  flk_graph_uncolor(gp);
1197 1492  
1198 1493          /* check sleeping locks */
1199 1494  
1200 1495          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1201 1496  
1202 1497          /*
1203 1498           * If we find a sleeping write lock that is a superset of the
1204 1499           * region wanted by request we can be assured that by adding an
1205 1500           * edge to this write lock we have paths to all locks in the
1206 1501           * graph that blocks the request except in one case and that is why
1207 1502           * another check for SAME_OWNER in the loop below. The exception
1208 1503           * case is when this process that owns the sleeping write lock 'l1'
1209 1504           * has other locks l2, l3, l4 that are in the system and arrived
1210 1505           * before l1. l1 does not have path to these locks as they are from
1211 1506           * same process. We break when we find a second covering sleeping
1212 1507           * lock l5 owned by a process different from that owning l1, because
1213 1508           * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
1214 1509           * it has l1 would have produced a deadlock already.
1215 1510           */
1216 1511  
1217 1512          if (lock) {
1218 1513                  do {
1219 1514                          if (BLOCKS(lock, request)) {
1220 1515                                  if (!request_will_wait)
1221 1516                                          return (EAGAIN);
1222 1517                                  if (COVERS(lock, request) &&
1223 1518                                      lock->l_type == F_WRLCK) {
1224 1519                                          if (found_covering_lock &&
1225 1520                                              !SAME_OWNER(lock, covered_by)) {
1226 1521                                                  found_covering_lock++;
1227 1522                                                  break;
1228 1523                                          }
1229 1524                                          found_covering_lock = 1;
1230 1525                                          covered_by = lock;
1231 1526                                  }
1232 1527                                  if (found_covering_lock &&
1233 1528                                      !SAME_OWNER(lock, covered_by)) {
1234 1529                                          lock = lock->l_next;

↓ open down ↓

41 lines elided

↑ open up ↑

1235 1530                                          continue;
1236 1531                                  }
1237 1532                                  if ((error = flk_add_edge(request, lock,
1238 1533                                      !found_covering_lock, 0)))
1239 1534                                          return (error);
1240 1535                          }
1241 1536                          lock = lock->l_next;
1242 1537                  } while (lock->l_vnode == vp);
1243 1538          }
1244 1539  
1245      -/*
1246      - * found_covering_lock == 2 iff at this point 'request' has paths
1247      - * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
1248      - * point 'request' has paths to all locks that blocks 'request' whose owners
1249      - * are not same as the one that covers 'request' (covered_by above) and
1250      - * we can have locks whose owner is same as covered_by in the active list.
1251      - */
     1540 +        /*
     1541 +         * found_covering_lock == 2 iff at this point 'request' has paths to
     1542 +         * all locks that blocks 'request'. found_covering_lock == 1 iff at
     1543 +         * this point 'request' has paths to all locks that blocks 'request'
     1544 +         * whose owners are not same as the one that covers 'request'
     1545 +         * (covered_by above) and we can have locks whose owner is same as
     1546 +         * covered_by in the active list.
     1547 +         */
1252 1548  
1253 1549          if (request_blocked_by_active && found_covering_lock != 2) {
1254 1550                  SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1255 1551                  ASSERT(lock != NULL);
1256 1552                  do {
1257 1553                          if (BLOCKS(lock, request)) {
1258 1554                                  if (found_covering_lock &&
1259 1555                                      !SAME_OWNER(lock, covered_by)) {
1260 1556                                          lock = lock->l_next;
1261 1557                                          continue;

1262 1558                                  }
1263 1559                                  if ((error = flk_add_edge(request, lock,
1264 1560                                      CHECK_CYCLE, 0)))
1265 1561                                          return (error);
1266 1562                          }
1267 1563                          lock = lock->l_next;
1268 1564                  } while (lock->l_vnode == vp);
1269 1565          }
1270 1566  
1271 1567          if (NOT_BLOCKED(request)) {
1272 1568                  /*
1273 1569                   * request not dependent on any other locks
1274 1570                   * so execute this request
1275 1571                   */
1276 1572                  return (flk_execute_request(request));
1277 1573          } else {
1278 1574                  /*
1279 1575                   * check for deadlock
1280 1576                   */
1281 1577                  if (flk_check_deadlock(request))
1282 1578                          return (EDEADLK);
1283 1579                  /*
1284 1580                   * this thread has to sleep
1285 1581                   */
1286 1582                  return (flk_wait_execute_request(request));
1287 1583          }
1288 1584  }
1289 1585  
1290 1586  /*
1291 1587   * The actual execution of the request in the simple case is only to
1292 1588   * insert the 'request' in the list of active locks if it is not an
1293 1589   * UNLOCK.
1294 1590   * We have to consider the existing active locks' relation to
1295 1591   * this 'request' if they are owned by same process. flk_relation() does
1296 1592   * this job and sees to that the dependency graph information is maintained
1297 1593   * properly.
1298 1594   */
1299 1595  
1300 1596  int
1301 1597  flk_execute_request(lock_descriptor_t *request)
1302 1598  {
1303 1599          graph_t *gp = request->l_graph;
1304 1600          vnode_t *vp = request->l_vnode;
1305 1601          lock_descriptor_t       *lock, *lock1;
1306 1602          int done_searching = 0;
1307 1603  
1308 1604          CHECK_SLEEPING_LOCKS(gp);
1309 1605          CHECK_ACTIVE_LOCKS(gp);
1310 1606  
1311 1607          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1312 1608  
1313 1609          flk_set_state(request, FLK_START_STATE);

↓ open down ↓

52 lines elided

↑ open up ↑

1314 1610  
1315 1611          ASSERT(NOT_BLOCKED(request));
1316 1612  
1317 1613          /* IO_LOCK requests are only to check status */
1318 1614  
1319 1615          if (IS_IO_LOCK(request))
1320 1616                  return (0);
1321 1617  
1322 1618          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1323 1619  
1324      -        if (lock == NULL && request->l_type == F_UNLCK)
1325      -                return (0);
1326      -        if (lock == NULL) {
1327      -                flk_insert_active_lock(request);
1328      -                return (0);
     1620 +        if (lock != NULL) {
     1621 +                /*
     1622 +                 * There are some active locks so check for relations
     1623 +                 */
     1624 +                do {
     1625 +                        lock1 = lock->l_next;
     1626 +                        if (SAME_OWNER(request, lock)) {
     1627 +                                done_searching = flk_relation(lock, request);
     1628 +                        }
     1629 +                        lock = lock1;
     1630 +                } while (lock->l_vnode == vp && !done_searching);
1329 1631          }
1330 1632  
1331      -        do {
1332      -                lock1 = lock->l_next;
1333      -                if (SAME_OWNER(request, lock)) {
1334      -                        done_searching = flk_relation(lock, request);
1335      -                }
1336      -                lock = lock1;
1337      -        } while (lock->l_vnode == vp && !done_searching);
1338      -
1339 1633          /*
1340 1634           * insert in active queue
1341 1635           */
1342 1636  
1343 1637          if (request->l_type != F_UNLCK)
1344 1638                  flk_insert_active_lock(request);
1345 1639  
1346 1640          return (0);
1347 1641  }
1348 1642

1349 1643  /*
1350 1644   * 'request' is blocked by some one therefore we put it into sleep queue.
1351 1645   */
1352 1646  static int
1353 1647  flk_wait_execute_request(lock_descriptor_t *request)
1354 1648  {
1355 1649          graph_t *gp = request->l_graph;
1356 1650          callb_cpr_t     *cprp;          /* CPR info from callback */
1357 1651          struct flock_globals *fg;
1358 1652          int index;
1359 1653  
1360 1654          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1361 1655          ASSERT(IS_WILLING_TO_SLEEP(request));
1362 1656  
1363 1657          flk_insert_sleeping_lock(request);
1364 1658  
1365 1659          if (IS_LOCKMGR(request)) {
1366 1660                  index = HASH_INDEX(request->l_vnode);
1367 1661                  fg = flk_get_globals();
1368 1662  
1369 1663                  if (nlm_status_size == 0) {     /* not booted as a cluster */
1370 1664                          if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1371 1665                                  flk_cancel_sleeping_lock(request, 1);
1372 1666                                  return (ENOLCK);
1373 1667                          }
1374 1668                  } else {                        /* booted as a cluster */
1375 1669                          /*
1376 1670                           * If the request is an NLM server lock request,
1377 1671                           * and the NLM state of the lock request is not
1378 1672                           * NLM_UP (because the NLM server is shutting
1379 1673                           * down), then cancel the sleeping lock and
1380 1674                           * return error ENOLCK that will encourage the
1381 1675                           * client to retransmit.
1382 1676                           */
1383 1677                          if (!IS_NLM_UP(request)) {
1384 1678                                  flk_cancel_sleeping_lock(request, 1);
1385 1679                                  return (ENOLCK);
1386 1680                          }
1387 1681                  }
1388 1682          }
1389 1683  
1390 1684          /* Clustering: For blocking PXFS locks, return */
1391 1685          if (IS_PXFS(request)) {
1392 1686                  /*
1393 1687                   * PXFS locks sleep on the client side.
1394 1688                   * The callback argument is used to wake up the sleeper
1395 1689                   * when the lock is granted.
1396 1690                   * We return -1 (rather than an errno value) to indicate
1397 1691                   * the client side should sleep
1398 1692                   */
1399 1693                  return (PXFS_LOCK_BLOCKED);
1400 1694          }
1401 1695  
1402 1696          if (request->l_callbacks != NULL) {
1403 1697                  /*
1404 1698                   * To make sure the shutdown code works correctly, either
1405 1699                   * the callback must happen after putting the lock on the
1406 1700                   * sleep list, or we must check the shutdown status after
1407 1701                   * returning from the callback (and before sleeping).  At
1408 1702                   * least for now, we'll use the first option.  If a
1409 1703                   * shutdown or signal or whatever happened while the graph
1410 1704                   * mutex was dropped, that will be detected by
1411 1705                   * wait_for_lock().
1412 1706                   */
1413 1707                  mutex_exit(&gp->gp_mutex);
1414 1708  
1415 1709                  cprp = flk_invoke_callbacks(request->l_callbacks,
1416 1710                      FLK_BEFORE_SLEEP);
1417 1711  
1418 1712                  mutex_enter(&gp->gp_mutex);
1419 1713  
1420 1714                  if (cprp == NULL) {
1421 1715                          wait_for_lock(request);
1422 1716                  } else {
1423 1717                          mutex_enter(cprp->cc_lockp);
1424 1718                          CALLB_CPR_SAFE_BEGIN(cprp);
1425 1719                          mutex_exit(cprp->cc_lockp);
1426 1720                          wait_for_lock(request);
1427 1721                          mutex_enter(cprp->cc_lockp);
1428 1722                          CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1429 1723                          mutex_exit(cprp->cc_lockp);
1430 1724                  }
1431 1725  
1432 1726                  mutex_exit(&gp->gp_mutex);
1433 1727                  (void) flk_invoke_callbacks(request->l_callbacks,
1434 1728                      FLK_AFTER_SLEEP);
1435 1729                  mutex_enter(&gp->gp_mutex);
1436 1730          } else {
1437 1731                  wait_for_lock(request);
1438 1732          }
1439 1733  
1440 1734          if (IS_LOCKMGR(request)) {
1441 1735                  /*
1442 1736                   * If the lock manager is shutting down, return an
1443 1737                   * error that will encourage the client to retransmit.
1444 1738                   */
1445 1739                  if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1446 1740                      !IS_GRANTED(request)) {
1447 1741                          flk_cancel_sleeping_lock(request, 1);
1448 1742                          return (ENOLCK);
1449 1743                  }
1450 1744          }
1451 1745  
1452 1746          if (IS_INTERRUPTED(request)) {
1453 1747                  /* we got a signal, or act like we did */
1454 1748                  flk_cancel_sleeping_lock(request, 1);
1455 1749                  return (EINTR);
1456 1750          }
1457 1751  
1458 1752          /* Cancelled if some other thread has closed the file */
1459 1753  
1460 1754          if (IS_CANCELLED(request)) {
1461 1755                  flk_cancel_sleeping_lock(request, 1);
1462 1756                  return (EBADF);
1463 1757          }
1464 1758  
1465 1759          request->l_state &= ~GRANTED_LOCK;
1466 1760          REMOVE_SLEEP_QUEUE(request);
1467 1761          return (flk_execute_request(request));
1468 1762  }
1469 1763  
1470 1764  /*
1471 1765   * This routine adds an edge between from and to because from depends
1472 1766   * to. If asked to check for deadlock it checks whether there are any
1473 1767   * reachable locks from "from_lock" that is owned by the same process
1474 1768   * as "from_lock".
1475 1769   * NOTE: It is the caller's responsibility to make sure that the color
1476 1770   * of the graph is consistent between the calls to flk_add_edge as done
1477 1771   * in flk_process_request. This routine does not color and check for
1478 1772   * deadlock explicitly.
1479 1773   */
1480 1774  
1481 1775  static int
1482 1776  flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1483 1777      int check_cycle, int update_graph)
1484 1778  {
1485 1779          edge_t  *edge;
1486 1780          edge_t  *ep;
1487 1781          lock_descriptor_t       *vertex;
1488 1782          lock_descriptor_t *vertex_stack;
1489 1783  
1490 1784          STACK_INIT(vertex_stack);
1491 1785  
1492 1786          /*
1493 1787           * if to vertex already has mark_color just return
1494 1788           * don't add an edge as it is reachable from from vertex
1495 1789           * before itself.
1496 1790           */
1497 1791  
1498 1792          if (COLORED(to_lock))
1499 1793                  return (0);
1500 1794  
1501 1795          edge = flk_get_edge();
1502 1796  
1503 1797          /*
1504 1798           * set the from and to vertex
1505 1799           */
1506 1800  
1507 1801          edge->from_vertex = from_lock;
1508 1802          edge->to_vertex = to_lock;
1509 1803  
1510 1804          /*
1511 1805           * put in adjacency list of from vertex
1512 1806           */
1513 1807  
1514 1808          from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1515 1809          edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1516 1810          edge->edge_adj_prev = &from_lock->l_edge;
1517 1811          from_lock->l_edge.edge_adj_next = edge;
1518 1812  
1519 1813          /*
1520 1814           * put in list of to vertex
1521 1815           */
1522 1816  
1523 1817          to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1524 1818          edge->edge_in_next = to_lock->l_edge.edge_in_next;
1525 1819          to_lock->l_edge.edge_in_next = edge;
1526 1820          edge->edge_in_prev = &to_lock->l_edge;
1527 1821  
1528 1822  
1529 1823          if (update_graph) {
1530 1824                  flk_update_proc_graph(edge, 0);
1531 1825                  return (0);
1532 1826          }
1533 1827          if (!check_cycle) {
1534 1828                  return (0);
1535 1829          }
1536 1830  
1537 1831          STACK_PUSH(vertex_stack, from_lock, l_stack);
1538 1832  
1539 1833          while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1540 1834  
1541 1835                  STACK_POP(vertex_stack, l_stack);
1542 1836  
1543 1837                  for (ep = FIRST_ADJ(vertex);
1544 1838                      ep != HEAD(vertex);
1545 1839                      ep = NEXT_ADJ(ep)) {
1546 1840                          if (COLORED(ep->to_vertex))
1547 1841                                  continue;
1548 1842                          COLOR(ep->to_vertex);
1549 1843                          if (SAME_OWNER(ep->to_vertex, from_lock))
1550 1844                                  goto dead_lock;
1551 1845                          STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1552 1846                  }
1553 1847          }
1554 1848          return (0);
1555 1849  
1556 1850  dead_lock:
1557 1851  
1558 1852          /*
1559 1853           * remove all edges
1560 1854           */
1561 1855  
1562 1856          ep = FIRST_ADJ(from_lock);
1563 1857  
1564 1858          while (ep != HEAD(from_lock)) {
1565 1859                  IN_LIST_REMOVE(ep);
1566 1860                  from_lock->l_sedge = NEXT_ADJ(ep);
1567 1861                  ADJ_LIST_REMOVE(ep);
1568 1862                  flk_free_edge(ep);
1569 1863                  ep = from_lock->l_sedge;
1570 1864          }
1571 1865          return (EDEADLK);
1572 1866  }
1573 1867  
1574 1868  /*
1575 1869   * Get an edge structure for representing the dependency between two locks.
1576 1870   */
1577 1871  
1578 1872  static edge_t *
1579 1873  flk_get_edge()
1580 1874  {
1581 1875          edge_t  *ep;
1582 1876  
1583 1877          ASSERT(flk_edge_cache != NULL);
1584 1878  
1585 1879          ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1586 1880          edge_allocs++;
1587 1881          return (ep);
1588 1882  }
1589 1883  
1590 1884  /*
1591 1885   * Free the edge structure.

↓ open down ↓

243 lines elided

↑ open up ↑

1592 1886   */
1593 1887  
1594 1888  static void
1595 1889  flk_free_edge(edge_t *ep)
1596 1890  {
1597 1891          edge_frees++;
1598 1892          kmem_cache_free(flk_edge_cache, (void *)ep);
1599 1893  }
1600 1894  
1601 1895  /*
1602      - * Check the relationship of request with lock and perform the
1603      - * recomputation of dependencies, break lock if required, and return
1604      - * 1 if request cannot have any more relationship with the next
     1896 + * Check the relationship of 'request' with 'lock' and perform the
     1897 + * recomputation of dependencies, break 'lock' if required, and return
     1898 + * 1 if 'request' cannot have any more relationship with the next
1605 1899   * active locks.
     1900 + *
1606 1901   * The 'lock' and 'request' are compared and in case of overlap we
1607 1902   * delete the 'lock' and form new locks to represent the non-overlapped
1608 1903   * portion of original 'lock'. This function has side effects such as
1609 1904   * 'lock' will be freed, new locks will be added to the active list.
1610 1905   */
1611 1906  
1612 1907  static int
1613 1908  flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1614 1909  {
1615 1910          int lock_effect;
1616      -        lock_descriptor_t *lock1, *lock2;
1617 1911          lock_descriptor_t *topology[3];
1618 1912          int nvertex = 0;
1619 1913          int i;
1620 1914          edge_t  *ep;
1621      -        graph_t *gp = (lock->l_graph);
     1915 +        graph_t *gp = lock->l_graph;
     1916 +        boolean_t mergeable;
1622 1917  
     1918 +        ASSERT(request->l_blocker == 0);
1623 1919  
1624 1920          CHECK_SLEEPING_LOCKS(gp);
1625 1921          CHECK_ACTIVE_LOCKS(gp);
1626 1922  
1627 1923          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1628 1924  
1629 1925          topology[0] = topology[1] = topology[2] = NULL;
1630 1926  
1631 1927          if (request->l_type == F_UNLCK)
1632 1928                  lock_effect = FLK_UNLOCK;
1633 1929          else if (request->l_type == F_RDLCK &&
1634 1930              lock->l_type == F_WRLCK)
1635 1931                  lock_effect = FLK_DOWNGRADE;
1636 1932          else if (request->l_type == F_WRLCK &&
1637 1933              lock->l_type == F_RDLCK)
1638 1934                  lock_effect = FLK_UPGRADE;
1639 1935          else
1640 1936                  lock_effect = FLK_STAY_SAME;
1641 1937  
     1938 +        /*
     1939 +         * The 'lock' and 'request' are merged only in a case the effect of
     1940 +         * both locks is same (FLK_STAY_SAME) and their blocker status
     1941 +         * (l_blocker) is same as well.  We do not merge 'lock' and 'request'
     1942 +         * with different l_blocker values because such merge might affect the
     1943 +         * stale lock detection.  It might cause either false positives, or
     1944 +         * miss some stale locks.
     1945 +         */
     1946 +        mergeable = lock_effect == FLK_STAY_SAME &&
     1947 +            lock->l_blocker == request->l_blocker;
     1948 +
1642 1949          if (lock->l_end < request->l_start) {
1643      -                if (lock->l_end == request->l_start - 1 &&
1644      -                    lock_effect == FLK_STAY_SAME) {
1645      -                        topology[0] = request;
     1950 +                /* If the 'lock' is just next to 'request', try to merge them */
     1951 +                if (lock->l_end == request->l_start - 1 && mergeable) {
1646 1952                          request->l_start = lock->l_start;
1647      -                        nvertex = 1;
1648 1953                          goto recompute;
1649      -                } else {
1650      -                        return (0);
1651 1954                  }
     1955 +
     1956 +                /* Otherwise, they do not overlap, so return immediately */
     1957 +                return (0);
1652 1958          }
1653 1959  
1654      -        if (lock->l_start > request->l_end) {
1655      -                if (request->l_end == lock->l_start - 1 &&
1656      -                    lock_effect == FLK_STAY_SAME) {
1657      -                        topology[0] = request;
     1960 +        if (request->l_end < lock->l_start) {
     1961 +                /* If the 'request' is just next to 'lock', try to merge them */
     1962 +                if (request->l_end == lock->l_start - 1 && mergeable) {
1658 1963                          request->l_end = lock->l_end;
1659      -                        nvertex = 1;
1660 1964                          goto recompute;
     1965 +                }
     1966 +
     1967 +                /* Otherwise, they do not overlap, so return immediately */
     1968 +                return (1);
     1969 +        }
     1970 +
     1971 +        /*
     1972 +         * Here we are sure the 'lock' and 'request' overlaps, so the 'request'
     1973 +         * will replace the 'lock' (either fully, or at least partially).
     1974 +         */
     1975 +
     1976 +        /*
     1977 +         * If the 'request' does not fully cover the 'lock' at the start,
     1978 +         * either move the start of the 'request' to cover the 'lock', or split
     1979 +         * the 'lock'.
     1980 +         */
     1981 +        if (lock->l_start < request->l_start) {
     1982 +                if (mergeable) {
     1983 +                        request->l_start = lock->l_start;
1661 1984                  } else {
1662      -                        return (1);
     1985 +                        lock_descriptor_t *new_lock = flk_get_lock();
     1986 +
     1987 +                        COPY(new_lock, lock);
     1988 +                        new_lock->l_end = request->l_start - 1;
     1989 +
     1990 +                        topology[nvertex++] = new_lock;
1663 1991                  }
1664 1992          }
1665 1993  
     1994 +        /*
     1995 +         * If the 'request' does not fully cover the 'lock' at the end, either
     1996 +         * move the end of the 'request' to cover the 'lock', or split the
     1997 +         * 'lock'.
     1998 +         */
1666 1999          if (request->l_end < lock->l_end) {
1667      -                if (request->l_start > lock->l_start) {
1668      -                        if (lock_effect == FLK_STAY_SAME) {
1669      -                                request->l_start = lock->l_start;
1670      -                                request->l_end = lock->l_end;
1671      -                                topology[0] = request;
1672      -                                nvertex = 1;
1673      -                        } else {
1674      -                                lock1 = flk_get_lock();
1675      -                                lock2 = flk_get_lock();
1676      -                                COPY(lock1, lock);
1677      -                                COPY(lock2, lock);
1678      -                                lock1->l_start = lock->l_start;
1679      -                                lock1->l_end = request->l_start - 1;
1680      -                                lock2->l_start = request->l_end + 1;
1681      -                                lock2->l_end = lock->l_end;
1682      -                                topology[0] = lock1;
1683      -                                topology[1] = lock2;
1684      -                                topology[2] = request;
1685      -                                nvertex = 3;
1686      -                        }
1687      -                } else if (request->l_start < lock->l_start) {
1688      -                        if (lock_effect == FLK_STAY_SAME) {
1689      -                                request->l_end = lock->l_end;
1690      -                                topology[0] = request;
1691      -                                nvertex = 1;
1692      -                        } else {
1693      -                                lock1 = flk_get_lock();
1694      -                                COPY(lock1, lock);
1695      -                                lock1->l_start = request->l_end + 1;
1696      -                                topology[0] = lock1;
1697      -                                topology[1] = request;
1698      -                                nvertex = 2;
1699      -                        }
1700      -                } else  {
1701      -                        if (lock_effect == FLK_STAY_SAME) {
1702      -                                request->l_start = lock->l_start;
1703      -                                request->l_end = lock->l_end;
1704      -                                topology[0] = request;
1705      -                                nvertex = 1;
1706      -                        } else {
1707      -                                lock1 = flk_get_lock();
1708      -                                COPY(lock1, lock);
1709      -                                lock1->l_start = request->l_end + 1;
1710      -                                topology[0] = lock1;
1711      -                                topology[1] = request;
1712      -                                nvertex = 2;
1713      -                        }
1714      -                }
1715      -        } else if (request->l_end > lock->l_end) {
1716      -                if (request->l_start > lock->l_start)  {
1717      -                        if (lock_effect == FLK_STAY_SAME) {
1718      -                                request->l_start = lock->l_start;
1719      -                                topology[0] = request;
1720      -                                nvertex = 1;
1721      -                        } else {
1722      -                                lock1 = flk_get_lock();
1723      -                                COPY(lock1, lock);
1724      -                                lock1->l_end = request->l_start - 1;
1725      -                                topology[0] = lock1;
1726      -                                topology[1] = request;
1727      -                                nvertex = 2;
1728      -                        }
1729      -                } else if (request->l_start < lock->l_start)  {
1730      -                        topology[0] = request;
1731      -                        nvertex = 1;
     2000 +                if (mergeable) {
     2001 +                        request->l_end = lock->l_end;
1732 2002                  } else {
1733      -                        topology[0] = request;
1734      -                        nvertex = 1;
     2003 +                        lock_descriptor_t *new_lock = flk_get_lock();
     2004 +
     2005 +                        COPY(new_lock, lock);
     2006 +                        new_lock->l_start = request->l_end + 1;
     2007 +
     2008 +                        topology[nvertex++] = new_lock;
1735 2009                  }
1736      -        } else {
1737      -                if (request->l_start > lock->l_start) {
1738      -                        if (lock_effect == FLK_STAY_SAME) {
1739      -                                request->l_start = lock->l_start;
1740      -                                topology[0] = request;
1741      -                                nvertex = 1;
1742      -                        } else {
1743      -                                lock1 = flk_get_lock();
1744      -                                COPY(lock1, lock);
1745      -                                lock1->l_end = request->l_start - 1;
1746      -                                topology[0] = lock1;
1747      -                                topology[1] = request;
1748      -                                nvertex = 2;
1749      -                        }
1750      -                } else if (request->l_start < lock->l_start) {
1751      -                        topology[0] = request;
1752      -                        nvertex = 1;
1753      -                } else {
1754      -                        if (lock_effect !=  FLK_UNLOCK) {
1755      -                                topology[0] = request;
1756      -                                nvertex = 1;
1757      -                        } else {
1758      -                                flk_delete_active_lock(lock, 0);
1759      -                                flk_wakeup(lock, 1);
1760      -                                flk_free_lock(lock);
1761      -                                CHECK_SLEEPING_LOCKS(gp);
1762      -                                CHECK_ACTIVE_LOCKS(gp);
1763      -                                return (1);
1764      -                        }
1765      -                }
1766 2010          }
1767 2011  
1768      -recompute:
     2012 +        /*
     2013 +         * Log the blocker change
     2014 +         */
     2015 +        if (nvertex > 0 && lock->l_blocker < 0) {
     2016 +                if (nvertex == 1)
     2017 +                        flk_stale_lock_shrink(lock, topology[0]);
     2018 +                if (nvertex == 2)
     2019 +                        flk_stale_lock_split(lock, topology[0], topology[1]);
1769 2020  
     2021 +                lock->l_blocker = 0;
     2022 +        }
     2023 +
     2024 +recompute:
1770 2025          /*
1771 2026           * For unlock we don't send the 'request' to for recomputing
1772 2027           * dependencies because no lock will add an edge to this.
1773 2028           */
     2029 +        if (lock_effect != FLK_UNLOCK)
     2030 +                topology[nvertex++] = request;
1774 2031  
1775      -        if (lock_effect == FLK_UNLOCK) {
1776      -                topology[nvertex-1] = NULL;
1777      -                nvertex--;
1778      -        }
1779 2032          for (i = 0; i < nvertex; i++) {
1780 2033                  topology[i]->l_state |= RECOMPUTE_LOCK;
1781 2034                  topology[i]->l_color = NO_COLOR;
1782 2035          }
1783 2036  
1784 2037          ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1785 2038  
1786 2039          /*
1787 2040           * we remove the adjacent edges for all vertices' to this vertex
1788 2041           * 'lock'.
1789 2042           */
1790      -
1791 2043          ep = FIRST_IN(lock);
1792 2044          while (ep != HEAD(lock)) {
1793 2045                  ADJ_LIST_REMOVE(ep);
1794 2046                  ep = NEXT_IN(ep);
1795 2047          }
1796 2048  
1797 2049          flk_delete_active_lock(lock, 0);
1798 2050  
1799 2051          /* We are ready for recomputing the dependencies now */
1800      -
1801 2052          flk_recompute_dependencies(lock, topology, nvertex, 1);
1802 2053  
1803 2054          for (i = 0; i < nvertex; i++) {
1804 2055                  topology[i]->l_state &= ~RECOMPUTE_LOCK;
1805 2056                  topology[i]->l_color = NO_COLOR;
1806 2057          }
1807 2058  
1808      -
1809 2059          if (lock_effect == FLK_UNLOCK) {
1810 2060                  nvertex++;
1811 2061          }
1812 2062          for (i = 0; i < nvertex - 1; i++) {
1813 2063                  flk_insert_active_lock(topology[i]);
1814 2064          }
1815 2065  
1816      -
1817 2066          if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1818 2067                  flk_wakeup(lock, 0);
1819 2068          } else {
1820 2069                  ep = FIRST_IN(lock);
1821 2070                  while (ep != HEAD(lock)) {
1822 2071                          lock->l_sedge = NEXT_IN(ep);
1823 2072                          IN_LIST_REMOVE(ep);
1824 2073                          flk_update_proc_graph(ep, 1);
1825 2074                          flk_free_edge(ep);
1826 2075                          ep = lock->l_sedge;

1827 2076                  }
1828 2077          }
1829 2078          flk_free_lock(lock);
1830 2079  
1831 2080          CHECK_SLEEPING_LOCKS(gp);
1832 2081          CHECK_ACTIVE_LOCKS(gp);
1833 2082          return (0);
1834 2083  }
1835 2084  
1836 2085  /*
1837 2086   * Insert a lock into the active queue.
1838 2087   */
1839 2088  
1840 2089  static void
1841 2090  flk_insert_active_lock(lock_descriptor_t *new_lock)
1842 2091  {
1843 2092          graph_t *gp = new_lock->l_graph;
1844 2093          vnode_t *vp = new_lock->l_vnode;
1845 2094          lock_descriptor_t *first_lock, *lock;
1846 2095  
1847 2096          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1848 2097  
1849 2098          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1850 2099          first_lock = lock;
1851 2100  
1852 2101          if (first_lock != NULL) {
1853 2102                  for (; (lock->l_vnode == vp &&
1854 2103                      lock->l_start < new_lock->l_start); lock = lock->l_next)
1855 2104                          ;
1856 2105          } else {
1857 2106                  lock = ACTIVE_HEAD(gp);
1858 2107          }
1859 2108  
1860 2109          lock->l_prev->l_next = new_lock;
1861 2110          new_lock->l_next = lock;
1862 2111          new_lock->l_prev = lock->l_prev;
1863 2112          lock->l_prev = new_lock;
1864 2113  
1865 2114          if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1866 2115                  vp->v_filocks = (struct filock *)new_lock;
1867 2116          }
1868 2117          flk_set_state(new_lock, FLK_ACTIVE_STATE);
1869 2118          new_lock->l_state |= ACTIVE_LOCK;
1870 2119  
1871 2120          CHECK_ACTIVE_LOCKS(gp);
1872 2121          CHECK_SLEEPING_LOCKS(gp);
1873 2122  }
1874 2123  
1875 2124  /*
1876 2125   * Delete the active lock : Performs two functions depending on the
1877 2126   * value of second parameter. One is to remove from the active lists
1878 2127   * only and other is to both remove and free the lock.
1879 2128   */
1880 2129  
1881 2130  static void
1882 2131  flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1883 2132  {
1884 2133          vnode_t *vp = lock->l_vnode;

↓ open down ↓

58 lines elided

↑ open up ↑

1885 2134          graph_t *gp = lock->l_graph;
1886 2135  
1887 2136          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1888 2137          if (free_lock)
1889 2138                  ASSERT(NO_DEPENDENTS(lock));
1890 2139          ASSERT(NOT_BLOCKED(lock));
1891 2140          ASSERT(IS_ACTIVE(lock));
1892 2141  
1893 2142          ASSERT((vp->v_filocks != NULL));
1894 2143  
     2144 +        if (lock->l_blocker < 0) {
     2145 +                /* Log the blocker release */
     2146 +                flk_stale_lock_release(lock);
     2147 +                lock->l_blocker = 0;
     2148 +        }
     2149 +
1895 2150          if (vp->v_filocks == (struct filock *)lock) {
1896 2151                  vp->v_filocks = (struct filock *)
1897 2152                      ((lock->l_next->l_vnode == vp) ? lock->l_next :
1898 2153                      NULL);
1899 2154          }
1900 2155          lock->l_next->l_prev = lock->l_prev;
1901 2156          lock->l_prev->l_next = lock->l_next;
1902 2157          lock->l_next = lock->l_prev = NULL;
1903 2158          flk_set_state(lock, FLK_DEAD_STATE);
1904 2159          lock->l_state &= ~ACTIVE_LOCK;

1905 2160  
1906 2161          if (free_lock)
1907 2162                  flk_free_lock(lock);
1908 2163          CHECK_ACTIVE_LOCKS(gp);
1909 2164          CHECK_SLEEPING_LOCKS(gp);
1910 2165  }
1911 2166  
1912 2167  /*
1913 2168   * Insert into the sleep queue.
1914 2169   */
1915 2170  
1916 2171  static void
1917 2172  flk_insert_sleeping_lock(lock_descriptor_t *request)
1918 2173  {
1919 2174          graph_t *gp = request->l_graph;
1920 2175          vnode_t *vp = request->l_vnode;
1921 2176          lock_descriptor_t       *lock;
1922 2177  
1923 2178          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1924 2179          ASSERT(IS_INITIAL(request));
1925 2180  
1926 2181          for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1927 2182              lock->l_vnode < vp); lock = lock->l_next)
1928 2183                  ;
1929 2184  
1930 2185          lock->l_prev->l_next = request;

↓ open down ↓

26 lines elided

↑ open up ↑

1931 2186          request->l_prev = lock->l_prev;
1932 2187          lock->l_prev = request;
1933 2188          request->l_next = lock;
1934 2189          flk_set_state(request, FLK_SLEEPING_STATE);
1935 2190          request->l_state |= SLEEPING_LOCK;
1936 2191  }
1937 2192  
1938 2193  /*
1939 2194   * Cancelling a sleeping lock implies removing a vertex from the
1940 2195   * dependency graph and therefore we should recompute the dependencies
1941      - * of all vertices that have a path  to this vertex, w.r.t. all
     2196 + * of all vertices that have a path to this vertex, w.r.t. all
1942 2197   * vertices reachable from this vertex.
1943 2198   */
1944 2199  
1945 2200  void
1946 2201  flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1947 2202  {
1948 2203          graph_t *gp = request->l_graph;
1949 2204          vnode_t *vp = request->l_vnode;
1950 2205          lock_descriptor_t **topology = NULL;
1951 2206          edge_t  *ep;

1952 2207          lock_descriptor_t *vertex, *lock;
1953 2208          int nvertex = 0;
1954 2209          int i;
1955 2210          lock_descriptor_t *vertex_stack;
1956 2211  
1957 2212          STACK_INIT(vertex_stack);
1958 2213  
1959 2214          ASSERT(MUTEX_HELD(&gp->gp_mutex));
1960 2215          /*
1961 2216           * count number of vertex pointers that has to be allocated
1962 2217           * All vertices that are reachable from request.
1963 2218           */
1964 2219  
1965 2220          STACK_PUSH(vertex_stack, request, l_stack);
1966 2221  
1967 2222          while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1968 2223                  STACK_POP(vertex_stack, l_stack);
1969 2224                  for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1970 2225                      ep = NEXT_ADJ(ep)) {
1971 2226                          if (IS_RECOMPUTE(ep->to_vertex))
1972 2227                                  continue;
1973 2228                          ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1974 2229                          STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1975 2230                          nvertex++;
1976 2231                  }
1977 2232          }
1978 2233  
1979 2234          /*
1980 2235           * allocate memory for holding the vertex pointers
1981 2236           */
1982 2237  
1983 2238          if (nvertex) {
1984 2239                  topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1985 2240                      KM_SLEEP);
1986 2241          }
1987 2242  
1988 2243          /*
1989 2244           * one more pass to actually store the vertices in the
1990 2245           * allocated array.
1991 2246           * We first check sleeping locks and then active locks
1992 2247           * so that topology array will be in a topological
1993 2248           * order.
1994 2249           */
1995 2250  
1996 2251          nvertex = 0;
1997 2252          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1998 2253  
1999 2254          if (lock) {
2000 2255                  do {
2001 2256                          if (IS_RECOMPUTE(lock)) {
2002 2257                                  lock->l_index = nvertex;
2003 2258                                  topology[nvertex++] = lock;
2004 2259                          }
2005 2260                          lock->l_color = NO_COLOR;
2006 2261                          lock = lock->l_next;
2007 2262                  } while (lock->l_vnode == vp);
2008 2263          }
2009 2264  
2010 2265          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2011 2266  
2012 2267          if (lock) {
2013 2268                  do {
2014 2269                          if (IS_RECOMPUTE(lock)) {
2015 2270                                  lock->l_index = nvertex;
2016 2271                                  topology[nvertex++] = lock;
2017 2272                          }
2018 2273                          lock->l_color = NO_COLOR;
2019 2274                          lock = lock->l_next;
2020 2275                  } while (lock->l_vnode == vp);
2021 2276          }
2022 2277  
2023 2278          /*
2024 2279           * remove in and out edges of request
2025 2280           * They are freed after updating proc_graph below.
2026 2281           */
2027 2282  
2028 2283          for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
2029 2284                  ADJ_LIST_REMOVE(ep);
2030 2285          }
2031 2286  
2032 2287  
2033 2288          if (remove_from_queue)
2034 2289                  REMOVE_SLEEP_QUEUE(request);
2035 2290  
2036 2291          /* we are ready to recompute */
2037 2292  
2038 2293          flk_recompute_dependencies(request, topology, nvertex, 1);
2039 2294  
2040 2295          ep = FIRST_ADJ(request);
2041 2296          while (ep != HEAD(request)) {
2042 2297                  IN_LIST_REMOVE(ep);
2043 2298                  request->l_sedge = NEXT_ADJ(ep);
2044 2299                  ADJ_LIST_REMOVE(ep);
2045 2300                  flk_update_proc_graph(ep, 1);
2046 2301                  flk_free_edge(ep);
2047 2302                  ep = request->l_sedge;
2048 2303          }
2049 2304  
2050 2305  
2051 2306          /*
2052 2307           * unset the RECOMPUTE flag in those vertices

↓ open down ↓

101 lines elided

↑ open up ↑

2053 2308           */
2054 2309  
2055 2310          for (i = 0; i < nvertex; i++) {
2056 2311                  topology[i]->l_state &= ~RECOMPUTE_LOCK;
2057 2312          }
2058 2313  
2059 2314          /*
2060 2315           * free the topology
2061 2316           */
2062 2317          if (nvertex)
2063      -                kmem_free((void *)topology,
     2318 +                kmem_free(topology,
2064 2319                      (nvertex * sizeof (lock_descriptor_t *)));
2065 2320          /*
2066 2321           * Possibility of some locks unblocked now
2067 2322           */
2068 2323  
2069 2324          flk_wakeup(request, 0);
2070 2325  
2071 2326          /*
2072 2327           * we expect to have a correctly recomputed graph  now.
2073 2328           */

2074 2329          flk_set_state(request, FLK_DEAD_STATE);
2075 2330          flk_free_lock(request);
2076 2331          CHECK_SLEEPING_LOCKS(gp);
2077 2332          CHECK_ACTIVE_LOCKS(gp);
2078 2333  
2079 2334  }
2080 2335  
2081 2336  /*
2082 2337   * Uncoloring the graph is simply to increment the mark value of the graph
2083 2338   * And only when wrap round takes place will we color all vertices in
2084 2339   * the graph explicitly.
2085 2340   */
2086 2341  
2087 2342  static void
2088 2343  flk_graph_uncolor(graph_t *gp)
2089 2344  {
2090 2345          lock_descriptor_t *lock;
2091 2346  
2092 2347          if (gp->mark == UINT_MAX) {
2093 2348                  gp->mark = 1;
2094 2349          for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
2095 2350              lock = lock->l_next)
2096 2351                          lock->l_color  = 0;
2097 2352  
2098 2353          for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
2099 2354              lock = lock->l_next)
2100 2355                          lock->l_color  = 0;
2101 2356          } else {
2102 2357                  gp->mark++;
2103 2358          }
2104 2359  }
2105 2360  
2106 2361  /*
2107 2362   * Wake up locks that are blocked on the given lock.
2108 2363   */
2109 2364  
2110 2365  static void
2111 2366  flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
2112 2367  {
2113 2368          edge_t  *ep;
2114 2369          graph_t *gp = lock->l_graph;
2115 2370          lock_descriptor_t       *lck;
2116 2371  
2117 2372          ASSERT(MUTEX_HELD(&gp->gp_mutex));
2118 2373          if (NO_DEPENDENTS(lock))
2119 2374                  return;
2120 2375          ep = FIRST_IN(lock);
2121 2376          do {
2122 2377                  /*
2123 2378                   * delete the edge from the adjacency list
2124 2379                   * of from vertex. if no more adjacent edges
2125 2380                   * for this vertex wake this process.
2126 2381                   */
2127 2382                  lck = ep->from_vertex;
2128 2383                  if (adj_list_remove)
2129 2384                          ADJ_LIST_REMOVE(ep);
2130 2385                  flk_update_proc_graph(ep, 1);
2131 2386                  if (NOT_BLOCKED(lck)) {
2132 2387                          GRANT_WAKEUP(lck);
2133 2388                  }
2134 2389                  lock->l_sedge = NEXT_IN(ep);
2135 2390                  IN_LIST_REMOVE(ep);
2136 2391                  flk_free_edge(ep);
2137 2392                  ep = lock->l_sedge;
2138 2393          } while (ep != HEAD(lock));
2139 2394          ASSERT(NO_DEPENDENTS(lock));
2140 2395  }
2141 2396  
2142 2397  /*
2143 2398   * The dependents of request, is checked for its dependency against the
2144 2399   * locks in topology (called topology because the array is and should be in
2145 2400   * topological order for this algorithm, if not in topological order the
2146 2401   * inner loop below might add more edges than necessary. Topological ordering
2147 2402   * of vertices satisfies the property that all edges will be from left to
2148 2403   * right i.e., topology[i] can have an edge to  topology[j], iff i<j)
2149 2404   * If lock l1 in the dependent set of request is dependent (blocked by)
2150 2405   * on lock l2 in topology but does not have a path to it, we add an edge
2151 2406   * in the inner loop below.
2152 2407   *
2153 2408   * We don't want to add an edge between l1 and l2 if there exists
2154 2409   * already a path from l1 to l2, so care has to be taken for those vertices
2155 2410   * that  have two paths to 'request'. These vertices are referred to here
2156 2411   * as barrier locks.
2157 2412   *
2158 2413   * The barriers has to be found (those vertex that originally had two paths
2159 2414   * to request) because otherwise we may end up adding edges unnecessarily
2160 2415   * to vertices in topology, and thus barrier vertices can have an edge
2161 2416   * to a vertex in topology as well a path to it.
2162 2417   */
2163 2418  
2164 2419  static void
2165 2420  flk_recompute_dependencies(lock_descriptor_t *request,
2166 2421      lock_descriptor_t **topology, int nvertex, int update_graph)
2167 2422  {
2168 2423          lock_descriptor_t *vertex, *lock;
2169 2424          graph_t *gp = request->l_graph;
2170 2425          int i, count;
2171 2426          int barrier_found = 0;
2172 2427          edge_t  *ep;
2173 2428          lock_descriptor_t *vertex_stack;
2174 2429  
2175 2430          STACK_INIT(vertex_stack);
2176 2431  
2177 2432          ASSERT(MUTEX_HELD(&gp->gp_mutex));
2178 2433          if (nvertex == 0)
2179 2434                  return;
2180 2435          flk_graph_uncolor(request->l_graph);
2181 2436          barrier_found = flk_find_barriers(request);
2182 2437          request->l_state |= RECOMPUTE_DONE;
2183 2438  
2184 2439          STACK_PUSH(vertex_stack, request, l_stack);
2185 2440          request->l_sedge = FIRST_IN(request);
2186 2441  
2187 2442  
2188 2443          while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2189 2444                  if (vertex->l_state & RECOMPUTE_DONE) {
2190 2445                          count = 0;
2191 2446                          goto next_in_edge;
2192 2447                  }
2193 2448                  if (IS_BARRIER(vertex)) {
2194 2449                          /* decrement the barrier count */
2195 2450                          if (vertex->l_index) {
2196 2451                                  vertex->l_index--;
2197 2452                                  /* this guy will be pushed again anyway ? */
2198 2453                                  STACK_POP(vertex_stack, l_stack);
2199 2454                                  if (vertex->l_index == 0)  {
2200 2455                                  /*
2201 2456                                   * barrier is over we can recompute
2202 2457                                   * dependencies for this lock in the
2203 2458                                   * next stack pop
2204 2459                                   */
2205 2460                                          vertex->l_state &= ~BARRIER_LOCK;
2206 2461                                  }
2207 2462                                  continue;
2208 2463                          }
2209 2464                  }
2210 2465                  vertex->l_state |= RECOMPUTE_DONE;
2211 2466                  flk_graph_uncolor(gp);
2212 2467                  count = flk_color_reachables(vertex);
2213 2468                  for (i = 0; i < nvertex; i++) {
2214 2469                          lock = topology[i];
2215 2470                          if (COLORED(lock))
2216 2471                                  continue;
2217 2472                          if (BLOCKS(lock, vertex)) {
2218 2473                                  (void) flk_add_edge(vertex, lock,
2219 2474                                      NO_CHECK_CYCLE, update_graph);
2220 2475                                  COLOR(lock);
2221 2476                                  count++;
2222 2477                                  count += flk_color_reachables(lock);
2223 2478                          }
2224 2479  
2225 2480                  }
2226 2481  
2227 2482  next_in_edge:
2228 2483                  if (count == nvertex ||
2229 2484                      vertex->l_sedge == HEAD(vertex)) {
2230 2485                          /* prune the tree below this */
2231 2486                          STACK_POP(vertex_stack, l_stack);
2232 2487                          vertex->l_state &= ~RECOMPUTE_DONE;
2233 2488                          /* update the barrier locks below this! */
2234 2489                          if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
2235 2490                                  flk_graph_uncolor(gp);
2236 2491                                  flk_update_barriers(vertex);
2237 2492                          }
2238 2493                          continue;
2239 2494                  }
2240 2495  
2241 2496                  ep = vertex->l_sedge;
2242 2497                  lock = ep->from_vertex;
2243 2498                  STACK_PUSH(vertex_stack, lock, l_stack);
2244 2499                  lock->l_sedge = FIRST_IN(lock);
2245 2500                  vertex->l_sedge = NEXT_IN(ep);
2246 2501          }
2247 2502  
2248 2503  }
2249 2504  
2250 2505  /*
2251 2506   * Color all reachable vertices from vertex that belongs to topology (here
2252 2507   * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
2253 2508   *
2254 2509   * Note: we need to use a different stack_link l_stack1 because this is
2255 2510   * called from flk_recompute_dependencies() that already uses a stack with
2256 2511   * l_stack as stack_link.
2257 2512   */
2258 2513  
2259 2514  static int
2260 2515  flk_color_reachables(lock_descriptor_t *vertex)
2261 2516  {
2262 2517          lock_descriptor_t *ver, *lock;
2263 2518          int count;
2264 2519          edge_t  *ep;
2265 2520          lock_descriptor_t *vertex_stack;
2266 2521  
2267 2522          STACK_INIT(vertex_stack);
2268 2523  
2269 2524          STACK_PUSH(vertex_stack, vertex, l_stack1);
2270 2525          count = 0;
2271 2526          while ((ver = STACK_TOP(vertex_stack)) != NULL) {
2272 2527  
2273 2528                  STACK_POP(vertex_stack, l_stack1);
2274 2529                  for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
2275 2530                      ep = NEXT_ADJ(ep)) {
2276 2531                          lock = ep->to_vertex;
2277 2532                          if (COLORED(lock))
2278 2533                                  continue;
2279 2534                          COLOR(lock);
2280 2535                          if (IS_RECOMPUTE(lock))
2281 2536                                  count++;
2282 2537                          STACK_PUSH(vertex_stack, lock, l_stack1);
2283 2538                  }
2284 2539  
2285 2540          }
2286 2541          return (count);
2287 2542  }
2288 2543  
2289 2544  /*
2290 2545   * Called from flk_recompute_dependencies() this routine decrements
2291 2546   * the barrier count of barrier vertices that are reachable from lock.
2292 2547   */
2293 2548  
2294 2549  static void
2295 2550  flk_update_barriers(lock_descriptor_t *lock)
2296 2551  {
2297 2552          lock_descriptor_t *vertex, *lck;
2298 2553          edge_t  *ep;
2299 2554          lock_descriptor_t *vertex_stack;
2300 2555  
2301 2556          STACK_INIT(vertex_stack);
2302 2557  
2303 2558          STACK_PUSH(vertex_stack, lock, l_stack1);
2304 2559  
2305 2560          while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2306 2561                  STACK_POP(vertex_stack, l_stack1);
2307 2562                  for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2308 2563                      ep = NEXT_IN(ep)) {
2309 2564                          lck = ep->from_vertex;
2310 2565                          if (COLORED(lck)) {
2311 2566                                  if (IS_BARRIER(lck)) {
2312 2567                                          ASSERT(lck->l_index > 0);
2313 2568                                          lck->l_index--;
2314 2569                                          if (lck->l_index == 0)
2315 2570                                                  lck->l_state &= ~BARRIER_LOCK;
2316 2571                                  }
2317 2572                                  continue;
2318 2573                          }
2319 2574                          COLOR(lck);
2320 2575                          if (IS_BARRIER(lck)) {
2321 2576                                  ASSERT(lck->l_index > 0);
2322 2577                                  lck->l_index--;
2323 2578                                  if (lck->l_index == 0)
2324 2579                                          lck->l_state &= ~BARRIER_LOCK;
2325 2580                          }
2326 2581                          STACK_PUSH(vertex_stack, lck, l_stack1);
2327 2582                  }
2328 2583          }
2329 2584  }
2330 2585  
2331 2586  /*
2332 2587   * Finds all vertices that are reachable from 'lock' more than once and
2333 2588   * mark them as barrier vertices and increment their barrier count.
2334 2589   * The barrier count is one minus the total number of paths from lock
2335 2590   * to that vertex.
2336 2591   */
2337 2592  
2338 2593  static int
2339 2594  flk_find_barriers(lock_descriptor_t *lock)
2340 2595  {
2341 2596          lock_descriptor_t *vertex, *lck;
2342 2597          int found = 0;
2343 2598          edge_t  *ep;
2344 2599          lock_descriptor_t *vertex_stack;
2345 2600  
2346 2601          STACK_INIT(vertex_stack);
2347 2602  
2348 2603          STACK_PUSH(vertex_stack, lock, l_stack1);
2349 2604  
2350 2605          while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2351 2606                  STACK_POP(vertex_stack, l_stack1);
2352 2607                  for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2353 2608                      ep = NEXT_IN(ep)) {
2354 2609                          lck = ep->from_vertex;
2355 2610                          if (COLORED(lck)) {
2356 2611                                  /* this is a barrier */
2357 2612                                  lck->l_state |= BARRIER_LOCK;
2358 2613                                  /* index will have barrier count */
2359 2614                                  lck->l_index++;
2360 2615                                  if (!found)
2361 2616                                          found = 1;
2362 2617                                  continue;
2363 2618                          }
2364 2619                          COLOR(lck);
2365 2620                          lck->l_index = 0;
2366 2621                          STACK_PUSH(vertex_stack, lck, l_stack1);
2367 2622                  }
2368 2623          }
2369 2624          return (found);
2370 2625  }
2371 2626  
2372 2627  /*
2373 2628   * Finds the first lock that is mainly responsible for blocking this
2374 2629   * request.  If there is no such lock, request->l_flock.l_type is set to
2375 2630   * F_UNLCK.  Otherwise, request->l_flock is filled in with the particulars
2376 2631   * of the blocking lock.
2377 2632   *
2378 2633   * Note: It is possible a request is blocked by a sleeping lock because
2379 2634   * of the fairness policy used in flk_process_request() to construct the
2380 2635   * dependencies. (see comments before flk_process_request()).
2381 2636   */
2382 2637  
2383 2638  static void
2384 2639  flk_get_first_blocking_lock(lock_descriptor_t *request)
2385 2640  {
2386 2641          graph_t *gp = request->l_graph;
2387 2642          vnode_t *vp = request->l_vnode;
2388 2643          lock_descriptor_t *lock, *blocker;
2389 2644  
2390 2645          ASSERT(MUTEX_HELD(&gp->gp_mutex));
2391 2646          blocker = NULL;
2392 2647          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2393 2648  
2394 2649          if (lock) {
2395 2650                  do {
2396 2651                          if (BLOCKS(lock, request)) {
2397 2652                                  blocker = lock;
2398 2653                                  break;
2399 2654                          }
2400 2655                          lock = lock->l_next;
2401 2656                  } while (lock->l_vnode == vp);
2402 2657          }
2403 2658  
2404 2659          if (blocker == NULL && request->l_flock.l_type == F_RDLCK) {
2405 2660                  /*
2406 2661                   * No active lock is blocking this request, but if a read
2407 2662                   * lock is requested, it may also get blocked by a waiting
2408 2663                   * writer. So search all sleeping locks and see if there is
2409 2664                   * a writer waiting.
2410 2665                   */
2411 2666                  SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2412 2667                  if (lock) {
2413 2668                          do {
2414 2669                                  if (BLOCKS(lock, request)) {
2415 2670                                          blocker = lock;
2416 2671                                          break;
2417 2672                                  }
2418 2673                                  lock = lock->l_next;
2419 2674                          } while (lock->l_vnode == vp);
2420 2675                  }
2421 2676          }
2422 2677  
2423 2678          if (blocker) {
2424 2679                  report_blocker(blocker, request);
2425 2680          } else
2426 2681                  request->l_flock.l_type = F_UNLCK;
2427 2682  }
2428 2683  
2429 2684  /*
2430 2685   * Get the graph_t structure associated with a vnode.
2431 2686   * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2432 2687   * not yet been initialized, then a new element is allocated and returned.
2433 2688   */
2434 2689  graph_t *
2435 2690  flk_get_lock_graph(vnode_t *vp, int initialize)
2436 2691  {
2437 2692          graph_t *gp;
2438 2693          graph_t *gp_alloc = NULL;
2439 2694          int index = HASH_INDEX(vp);
2440 2695  
2441 2696          if (initialize == FLK_USE_GRAPH) {
2442 2697                  mutex_enter(&flock_lock);
2443 2698                  gp = lock_graph[index];
2444 2699                  mutex_exit(&flock_lock);
2445 2700                  return (gp);
2446 2701          }
2447 2702  
2448 2703          ASSERT(initialize == FLK_INIT_GRAPH);
2449 2704  
2450 2705          if (lock_graph[index] == NULL) {
2451 2706  
2452 2707                  gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2453 2708  
2454 2709                  /* Initialize the graph */
2455 2710  
2456 2711                  gp_alloc->active_locks.l_next =
2457 2712                      gp_alloc->active_locks.l_prev =
2458 2713                      (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2459 2714                  gp_alloc->sleeping_locks.l_next =
2460 2715                      gp_alloc->sleeping_locks.l_prev =
2461 2716                      (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2462 2717                  gp_alloc->index = index;
2463 2718                  mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2464 2719          }
2465 2720  
2466 2721          mutex_enter(&flock_lock);
2467 2722  
2468 2723          gp = lock_graph[index];
2469 2724  
2470 2725          /* Recheck the value within flock_lock */
2471 2726          if (gp == NULL) {
2472 2727                  struct flock_globals *fg;
2473 2728  
2474 2729                  /* We must have previously allocated the graph_t structure */
2475 2730                  ASSERT(gp_alloc != NULL);
2476 2731                  lock_graph[index] = gp = gp_alloc;
2477 2732                  /*
2478 2733                   * The lockmgr status is only needed if KLM is loaded.
2479 2734                   */
2480 2735                  if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2481 2736                          fg = flk_get_globals();
2482 2737                          fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2483 2738                  }
2484 2739          }
2485 2740  
2486 2741          mutex_exit(&flock_lock);
2487 2742  
2488 2743          if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2489 2744                  /* There was a race to allocate the graph_t and we lost */
2490 2745                  mutex_destroy(&gp_alloc->gp_mutex);
2491 2746                  kmem_free(gp_alloc, sizeof (graph_t));
2492 2747          }
2493 2748  
2494 2749          return (gp);
2495 2750  }
2496 2751  
2497 2752  /*
2498 2753   * PSARC case 1997/292
2499 2754   */
2500 2755  int
2501 2756  cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2502 2757  {
2503 2758          lock_descriptor_t *lock;
2504 2759          int result = 0;
2505 2760          graph_t *gp;
2506 2761          int                     lock_nlmid;
2507 2762  
2508 2763          /*
2509 2764           * Check to see if node is booted as a cluster. If not, return.
2510 2765           */
2511 2766          if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2512 2767                  return (0);
2513 2768          }
2514 2769  
2515 2770          gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2516 2771          if (gp == NULL) {
2517 2772                  return (0);
2518 2773          }
2519 2774  
2520 2775          mutex_enter(&gp->gp_mutex);
2521 2776  
2522 2777          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2523 2778  
2524 2779          if (lock) {
2525 2780                  while (lock->l_vnode == vp) {
2526 2781                          /* get NLM id from sysid */
2527 2782                          lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2528 2783  
2529 2784                          /*
2530 2785                           * If NLM server request _and_ nlmid of lock matches
2531 2786                           * nlmid of argument, then we've found a remote lock.
2532 2787                           */
2533 2788                          if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2534 2789                                  result = 1;
2535 2790                                  goto done;
2536 2791                          }
2537 2792                          lock = lock->l_next;
2538 2793                  }
2539 2794          }
2540 2795  
2541 2796          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2542 2797  
2543 2798          if (lock) {
2544 2799                  while (lock->l_vnode == vp) {
2545 2800                          /* get NLM id from sysid */
2546 2801                          lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2547 2802  
2548 2803                          /*
2549 2804                           * If NLM server request _and_ nlmid of lock matches
2550 2805                           * nlmid of argument, then we've found a remote lock.
2551 2806                           */
2552 2807                          if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2553 2808                                  result = 1;
2554 2809                                  goto done;
2555 2810                          }
2556 2811                          lock = lock->l_next;
2557 2812                  }
2558 2813          }
2559 2814  
2560 2815  done:
2561 2816          mutex_exit(&gp->gp_mutex);
2562 2817          return (result);
2563 2818  }
2564 2819  
2565 2820  /*
2566 2821   * Determine whether there are any locks for the given vnode with a remote
2567 2822   * sysid.  Returns zero if not, non-zero if there are.
2568 2823   *
2569 2824   * Note that the return value from this function is potentially invalid
2570 2825   * once it has been returned.  The caller is responsible for providing its
2571 2826   * own synchronization mechanism to ensure that the return value is useful
2572 2827   * (e.g., see nfs_lockcompletion()).
2573 2828   */
2574 2829  int
2575 2830  flk_has_remote_locks(vnode_t *vp)
2576 2831  {
2577 2832          lock_descriptor_t *lock;
2578 2833          int result = 0;
2579 2834          graph_t *gp;
2580 2835  
2581 2836          gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2582 2837          if (gp == NULL) {
2583 2838                  return (0);
2584 2839          }
2585 2840  
2586 2841          mutex_enter(&gp->gp_mutex);
2587 2842  
2588 2843          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2589 2844  
2590 2845          if (lock) {
2591 2846                  while (lock->l_vnode == vp) {
2592 2847                          if (IS_REMOTE(lock)) {
2593 2848                                  result = 1;
2594 2849                                  goto done;
2595 2850                          }
2596 2851                          lock = lock->l_next;
2597 2852                  }
2598 2853          }
2599 2854  
2600 2855          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2601 2856  
2602 2857          if (lock) {
2603 2858                  while (lock->l_vnode == vp) {
2604 2859                          if (IS_REMOTE(lock)) {
2605 2860                                  result = 1;
2606 2861                                  goto done;
2607 2862                          }
2608 2863                          lock = lock->l_next;
2609 2864                  }
2610 2865          }
2611 2866  
2612 2867  done:
2613 2868          mutex_exit(&gp->gp_mutex);
2614 2869          return (result);
2615 2870  }
2616 2871  
2617 2872  /*
2618 2873   * Determine whether there are any locks for the given vnode with a remote
2619 2874   * sysid matching given sysid.
2620 2875   * Used by the new (open source) NFS Lock Manager (NLM)
2621 2876   */
2622 2877  int
2623 2878  flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
2624 2879  {
2625 2880          lock_descriptor_t *lock;
2626 2881          int result = 0;
2627 2882          graph_t *gp;
2628 2883  
2629 2884          if (sysid == 0)
2630 2885                  return (0);
2631 2886  
2632 2887          gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2633 2888          if (gp == NULL) {
2634 2889                  return (0);
2635 2890          }
2636 2891  
2637 2892          mutex_enter(&gp->gp_mutex);
2638 2893  
2639 2894          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2640 2895  
2641 2896          if (lock) {
2642 2897                  while (lock->l_vnode == vp) {
2643 2898                          if (lock->l_flock.l_sysid == sysid) {
2644 2899                                  result = 1;
2645 2900                                  goto done;
2646 2901                          }
2647 2902                          lock = lock->l_next;
2648 2903                  }
2649 2904          }
2650 2905  
2651 2906          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2652 2907  
2653 2908          if (lock) {
2654 2909                  while (lock->l_vnode == vp) {
2655 2910                          if (lock->l_flock.l_sysid == sysid) {
2656 2911                                  result = 1;
2657 2912                                  goto done;
2658 2913                          }
2659 2914                          lock = lock->l_next;
2660 2915                  }
2661 2916          }
2662 2917  
2663 2918  done:
2664 2919          mutex_exit(&gp->gp_mutex);
2665 2920          return (result);
2666 2921  }
2667 2922  
2668 2923  /*
2669 2924   * Determine if there are any locks owned by the given sysid.
2670 2925   * Returns zero if not, non-zero if there are.  Note that this return code
2671 2926   * could be derived from flk_get_{sleeping,active}_locks, but this routine
2672 2927   * avoids all the memory allocations of those routines.
2673 2928   *
2674 2929   * This routine has the same synchronization issues as
2675 2930   * flk_has_remote_locks.
2676 2931   */
2677 2932  
2678 2933  int
2679 2934  flk_sysid_has_locks(int sysid, int lck_type)
2680 2935  {
2681 2936          int             has_locks = 0;
2682 2937          lock_descriptor_t       *lock;
2683 2938          graph_t         *gp;
2684 2939          int             i;
2685 2940  
2686 2941          for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2687 2942                  mutex_enter(&flock_lock);
2688 2943                  gp = lock_graph[i];
2689 2944                  mutex_exit(&flock_lock);
2690 2945                  if (gp == NULL) {
2691 2946                          continue;
2692 2947                  }
2693 2948  
2694 2949                  mutex_enter(&gp->gp_mutex);
2695 2950  
2696 2951                  if (lck_type & FLK_QUERY_ACTIVE) {
2697 2952                          for (lock = ACTIVE_HEAD(gp)->l_next;
2698 2953                              lock != ACTIVE_HEAD(gp) && !has_locks;
2699 2954                              lock = lock->l_next) {
2700 2955                                  if (lock->l_flock.l_sysid == sysid)
2701 2956                                          has_locks = 1;
2702 2957                          }
2703 2958                  }
2704 2959  
2705 2960                  if (lck_type & FLK_QUERY_SLEEPING) {
2706 2961                          for (lock = SLEEPING_HEAD(gp)->l_next;
2707 2962                              lock != SLEEPING_HEAD(gp) && !has_locks;
2708 2963                              lock = lock->l_next) {
2709 2964                                  if (lock->l_flock.l_sysid == sysid)
2710 2965                                          has_locks = 1;
2711 2966                          }
2712 2967                  }
2713 2968                  mutex_exit(&gp->gp_mutex);
2714 2969          }
2715 2970  
2716 2971          return (has_locks);
2717 2972  }
2718 2973  
2719 2974  
2720 2975  /*
2721 2976   * PSARC case 1997/292
2722 2977   *
2723 2978   * Requires: "sysid" is a pair [nlmid, sysid].  The lower half is 16-bit
2724 2979   *  quantity, the real sysid generated by the NLM server; the upper half
2725 2980   *  identifies the node of the cluster where the NLM server ran.
2726 2981   *  This routine is only called by an NLM server running in a cluster.
2727 2982   * Effects: Remove all locks held on behalf of the client identified
2728 2983   *  by "sysid."
2729 2984   */
2730 2985  void
2731 2986  cl_flk_remove_locks_by_sysid(int sysid)
2732 2987  {
2733 2988          graph_t *gp;
2734 2989          int i;
2735 2990          lock_descriptor_t *lock, *nlock;
2736 2991  
2737 2992          /*
2738 2993           * Check to see if node is booted as a cluster. If not, return.
2739 2994           */
2740 2995          if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2741 2996                  return;
2742 2997          }
2743 2998  
2744 2999          ASSERT(sysid != 0);
2745 3000          for (i = 0; i < HASH_SIZE; i++) {
2746 3001                  mutex_enter(&flock_lock);
2747 3002                  gp = lock_graph[i];
2748 3003                  mutex_exit(&flock_lock);
2749 3004  
2750 3005                  if (gp == NULL)
2751 3006                          continue;
2752 3007  
2753 3008                  mutex_enter(&gp->gp_mutex);     /*  get mutex on lock graph */
2754 3009  
2755 3010                  /* signal sleeping requests so that they bail out */
2756 3011                  lock = SLEEPING_HEAD(gp)->l_next;
2757 3012                  while (lock != SLEEPING_HEAD(gp)) {
2758 3013                          nlock = lock->l_next;
2759 3014                          if (lock->l_flock.l_sysid == sysid) {
2760 3015                                  INTERRUPT_WAKEUP(lock);
2761 3016                          }
2762 3017                          lock = nlock;
2763 3018                  }
2764 3019  
2765 3020                  /* delete active locks */
2766 3021                  lock = ACTIVE_HEAD(gp)->l_next;
2767 3022                  while (lock != ACTIVE_HEAD(gp)) {
2768 3023                          nlock = lock->l_next;
2769 3024                          if (lock->l_flock.l_sysid == sysid) {
2770 3025                                  flk_delete_active_lock(lock, 0);
2771 3026                                  flk_wakeup(lock, 1);
2772 3027                                  flk_free_lock(lock);
2773 3028                          }
2774 3029                          lock = nlock;
2775 3030                  }
2776 3031                  mutex_exit(&gp->gp_mutex);    /* release mutex on lock graph */
2777 3032          }
2778 3033  }
2779 3034  
2780 3035  /*
2781 3036   * Delete all locks in the system that belongs to the sysid of the request.
2782 3037   */
2783 3038  
2784 3039  static void
2785 3040  flk_delete_locks_by_sysid(lock_descriptor_t *request)
2786 3041  {
2787 3042          int     sysid  = request->l_flock.l_sysid;
2788 3043          lock_descriptor_t *lock, *nlock;
2789 3044          graph_t *gp;
2790 3045          int i;
2791 3046  
2792 3047          ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2793 3048          ASSERT(sysid != 0);
2794 3049  
2795 3050          mutex_exit(&request->l_graph->gp_mutex);
2796 3051  
2797 3052          for (i = 0; i < HASH_SIZE; i++) {
2798 3053                  mutex_enter(&flock_lock);
2799 3054                  gp = lock_graph[i];
2800 3055                  mutex_exit(&flock_lock);
2801 3056  
2802 3057                  if (gp == NULL)
2803 3058                          continue;
2804 3059  
2805 3060                  mutex_enter(&gp->gp_mutex);
2806 3061  
2807 3062                  /* signal sleeping requests so that they bail out */
2808 3063                  lock = SLEEPING_HEAD(gp)->l_next;
2809 3064                  while (lock != SLEEPING_HEAD(gp)) {
2810 3065                          nlock = lock->l_next;
2811 3066                          if (lock->l_flock.l_sysid == sysid) {
2812 3067                                  INTERRUPT_WAKEUP(lock);
2813 3068                          }
2814 3069                          lock = nlock;
2815 3070                  }
2816 3071  
2817 3072                  /* delete active locks */
2818 3073                  lock = ACTIVE_HEAD(gp)->l_next;
2819 3074                  while (lock != ACTIVE_HEAD(gp)) {
2820 3075                          nlock = lock->l_next;
2821 3076                          if (lock->l_flock.l_sysid == sysid) {
2822 3077                                  flk_delete_active_lock(lock, 0);
2823 3078                                  flk_wakeup(lock, 1);
2824 3079                                  flk_free_lock(lock);
2825 3080                          }
2826 3081                          lock = nlock;
2827 3082                  }
2828 3083                  mutex_exit(&gp->gp_mutex);
2829 3084          }
2830 3085  
2831 3086          mutex_enter(&request->l_graph->gp_mutex);
2832 3087  }
2833 3088  
2834 3089  /*
2835 3090   * Clustering: Deletes PXFS locks
2836 3091   * Effects: Delete all locks on files in the given file system and with the
2837 3092   *  given PXFS id.
2838 3093   */
2839 3094  void
2840 3095  cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2841 3096  {
2842 3097          lock_descriptor_t *lock, *nlock;
2843 3098          graph_t *gp;
2844 3099          int i;
2845 3100  
2846 3101          for (i = 0; i < HASH_SIZE; i++) {
2847 3102                  mutex_enter(&flock_lock);
2848 3103                  gp = lock_graph[i];
2849 3104                  mutex_exit(&flock_lock);
2850 3105  
2851 3106                  if (gp == NULL)
2852 3107                          continue;
2853 3108  
2854 3109                  mutex_enter(&gp->gp_mutex);
2855 3110  
2856 3111                  /* signal sleeping requests so that they bail out */
2857 3112                  lock = SLEEPING_HEAD(gp)->l_next;
2858 3113                  while (lock != SLEEPING_HEAD(gp)) {
2859 3114                          nlock = lock->l_next;
2860 3115                          if (lock->l_vnode->v_vfsp == vfsp) {
2861 3116                                  ASSERT(IS_PXFS(lock));
2862 3117                                  if (GETPXFSID(lock->l_flock.l_sysid) ==
2863 3118                                      pxfsid) {
2864 3119                                          flk_set_state(lock,
2865 3120                                              FLK_CANCELLED_STATE);
2866 3121                                          flk_cancel_sleeping_lock(lock, 1);
2867 3122                                  }
2868 3123                          }
2869 3124                          lock = nlock;
2870 3125                  }
2871 3126  
2872 3127                  /* delete active locks */
2873 3128                  lock = ACTIVE_HEAD(gp)->l_next;
2874 3129                  while (lock != ACTIVE_HEAD(gp)) {
2875 3130                          nlock = lock->l_next;
2876 3131                          if (lock->l_vnode->v_vfsp == vfsp) {
2877 3132                                  ASSERT(IS_PXFS(lock));
2878 3133                                  if (GETPXFSID(lock->l_flock.l_sysid) ==
2879 3134                                      pxfsid) {
2880 3135                                          flk_delete_active_lock(lock, 0);
2881 3136                                          flk_wakeup(lock, 1);
2882 3137                                          flk_free_lock(lock);
2883 3138                                  }
2884 3139                          }
2885 3140                          lock = nlock;
2886 3141                  }
2887 3142                  mutex_exit(&gp->gp_mutex);
2888 3143          }
2889 3144  }
2890 3145  
2891 3146  /*
2892 3147   * Search for a sleeping lock manager lock which matches exactly this lock
2893 3148   * request; if one is found, fake a signal to cancel it.
2894 3149   *
2895 3150   * Return 1 if a matching lock was found, 0 otherwise.
2896 3151   */
2897 3152  
2898 3153  static int
2899 3154  flk_canceled(lock_descriptor_t *request)
2900 3155  {
2901 3156          lock_descriptor_t *lock, *nlock;
2902 3157          graph_t *gp = request->l_graph;
2903 3158          vnode_t *vp = request->l_vnode;
2904 3159  
2905 3160          ASSERT(MUTEX_HELD(&gp->gp_mutex));
2906 3161          ASSERT(IS_LOCKMGR(request));
2907 3162          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2908 3163  
2909 3164          if (lock) {
2910 3165                  while (lock->l_vnode == vp) {
2911 3166                          nlock = lock->l_next;
2912 3167                          if (SAME_OWNER(lock, request) &&
2913 3168                              lock->l_start == request->l_start &&
2914 3169                              lock->l_end == request->l_end) {
2915 3170                                  INTERRUPT_WAKEUP(lock);
2916 3171                                  return (1);
2917 3172                          }
2918 3173                          lock = nlock;
2919 3174                  }
2920 3175          }
2921 3176          return (0);
2922 3177  }
2923 3178  
2924 3179  /*
2925 3180   * Remove all non-OFD locks for the vnode belonging to the given pid and sysid.
2926 3181   * That is, since OFD locks are pid-less we'll never match on the incoming
2927 3182   * pid. OFD locks are removed earlier in the close() path via closef() and
2928 3183   * ofdcleanlock().
2929 3184   */
2930 3185  void
2931 3186  cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2932 3187  {
2933 3188          graph_t *gp;
2934 3189          lock_descriptor_t *lock, *nlock;
2935 3190          lock_descriptor_t *link_stack;
2936 3191  
2937 3192          STACK_INIT(link_stack);
2938 3193  
2939 3194          gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2940 3195  
2941 3196          if (gp == NULL)
2942 3197                  return;
2943 3198          mutex_enter(&gp->gp_mutex);
2944 3199  
2945 3200          CHECK_SLEEPING_LOCKS(gp);
2946 3201          CHECK_ACTIVE_LOCKS(gp);
2947 3202  
2948 3203          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2949 3204  
2950 3205          if (lock) {
2951 3206                  do {
2952 3207                          nlock = lock->l_next;
2953 3208                          if ((lock->l_flock.l_pid == pid ||
2954 3209                              pid == IGN_PID) &&
2955 3210                              lock->l_flock.l_sysid == sysid) {
2956 3211                                  CANCEL_WAKEUP(lock);
2957 3212                          }
2958 3213                          lock = nlock;
2959 3214                  } while (lock->l_vnode == vp);
2960 3215          }
2961 3216  
2962 3217          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2963 3218  
2964 3219          if (lock) {
2965 3220                  do {
2966 3221                          nlock = lock->l_next;
2967 3222                          if ((lock->l_flock.l_pid == pid ||
2968 3223                              pid == IGN_PID) &&
2969 3224                              lock->l_flock.l_sysid == sysid) {
2970 3225                                  flk_delete_active_lock(lock, 0);
2971 3226                                  STACK_PUSH(link_stack, lock, l_stack);
2972 3227                          }
2973 3228                          lock = nlock;
2974 3229                  } while (lock->l_vnode == vp);
2975 3230          }
2976 3231  
2977 3232          while ((lock = STACK_TOP(link_stack)) != NULL) {
2978 3233                  STACK_POP(link_stack, l_stack);
2979 3234                  flk_wakeup(lock, 1);
2980 3235                  flk_free_lock(lock);
2981 3236          }
2982 3237  
2983 3238          CHECK_SLEEPING_LOCKS(gp);
2984 3239          CHECK_ACTIVE_LOCKS(gp);
2985 3240          CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2986 3241          mutex_exit(&gp->gp_mutex);
2987 3242  }
2988 3243

↓ open down ↓

915 lines elided

↑ open up ↑

2989 3244  
2990 3245  /*
2991 3246   * Called from 'fs' read and write routines for files that have mandatory
2992 3247   * locking enabled.
2993 3248   */
2994 3249  
2995 3250  int
2996 3251  chklock(struct vnode *vp, int iomode, u_offset_t offset, ssize_t len, int fmode,
2997 3252      caller_context_t *ct)
2998 3253  {
2999      -        register int    i;
     3254 +        int             i;
3000 3255          struct flock64  bf;
3001 3256          int             error = 0;
3002 3257  
3003 3258          bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
3004 3259          bf.l_whence = 0;
3005 3260          bf.l_start = offset;
3006 3261          bf.l_len = len;
3007 3262          if (ct == NULL) {
3008 3263                  bf.l_pid = curproc->p_pid;
3009 3264                  bf.l_sysid = 0;

3010 3265          } else {
3011 3266                  bf.l_pid = ct->cc_pid;
3012 3267                  bf.l_sysid = ct->cc_sysid;
3013 3268          }
3014 3269          i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
3015 3270          if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
3016 3271              bf.l_type != F_UNLCK)
3017 3272                  error = i ? i : EAGAIN;
3018 3273          return (error);
3019 3274  }
3020 3275  
3021 3276  /*
3022 3277   * convoff - converts the given data (start, whence) to the
3023 3278   * given whence.
3024 3279   */
3025 3280  int
3026 3281  convoff(struct vnode *vp, struct flock64 *lckdat, int whence, offset_t offset)
3027 3282  {
3028 3283          int             error;
3029 3284          struct vattr    vattr;
3030 3285  
3031 3286          if ((lckdat->l_whence == 2) || (whence == 2)) {
3032 3287                  vattr.va_mask = AT_SIZE;
3033 3288                  if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
3034 3289                          return (error);
3035 3290          }
3036 3291  
3037 3292          switch (lckdat->l_whence) {
3038 3293          case 1:
3039 3294                  lckdat->l_start += offset;
3040 3295                  break;
3041 3296          case 2:
3042 3297                  lckdat->l_start += vattr.va_size;
3043 3298                  /* FALLTHRU */
3044 3299          case 0:
3045 3300                  break;
3046 3301          default:
3047 3302                  return (EINVAL);
3048 3303          }
3049 3304  
3050 3305          if (lckdat->l_start < 0)
3051 3306                  return (EINVAL);
3052 3307  
3053 3308          switch (whence) {
3054 3309          case 1:
3055 3310                  lckdat->l_start -= offset;
3056 3311                  break;
3057 3312          case 2:
3058 3313                  lckdat->l_start -= vattr.va_size;
3059 3314                  /* FALLTHRU */
3060 3315          case 0:
3061 3316                  break;
3062 3317          default:
3063 3318                  return (EINVAL);
3064 3319          }
3065 3320  
3066 3321          lckdat->l_whence = (short)whence;
3067 3322          return (0);
3068 3323  }
3069 3324  
3070 3325  
3071 3326  /*      proc_graph function definitions */
3072 3327  
3073 3328  /*
3074 3329   * Function checks for deadlock due to the new 'lock'. If deadlock found
3075 3330   * edges of this lock are freed and returned.
3076 3331   */
3077 3332  
3078 3333  static int
3079 3334  flk_check_deadlock(lock_descriptor_t *lock)
3080 3335  {
3081 3336          proc_vertex_t   *start_vertex, *pvertex;
3082 3337          proc_vertex_t *dvertex;
3083 3338          proc_edge_t *pep, *ppep;
3084 3339          edge_t  *ep, *nep;
3085 3340          proc_vertex_t *process_stack;
3086 3341  
3087 3342          /*
3088 3343           * OFD style locks are not associated with any process so there is
3089 3344           * no proc graph for these. Thus we cannot, and do not, do deadlock
3090 3345           * detection.
3091 3346           */
3092 3347          if (lock->l_ofd != NULL)
3093 3348                  return (0);
3094 3349  
3095 3350          STACK_INIT(process_stack);
3096 3351  
3097 3352          mutex_enter(&flock_lock);
3098 3353          start_vertex = flk_get_proc_vertex(lock);
3099 3354          ASSERT(start_vertex != NULL);
3100 3355  
3101 3356          /* construct the edges from this process to other processes */
3102 3357  
3103 3358          ep = FIRST_ADJ(lock);
3104 3359          while (ep != HEAD(lock)) {
3105 3360                  proc_vertex_t *adj_proc;
3106 3361  
3107 3362                  adj_proc = flk_get_proc_vertex(ep->to_vertex);
3108 3363                  for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
3109 3364                          if (pep->to_proc == adj_proc) {
3110 3365                                  ASSERT(pep->refcount);
3111 3366                                  pep->refcount++;
3112 3367                                  break;
3113 3368                          }
3114 3369                  }
3115 3370                  if (pep == NULL) {
3116 3371                          pep = flk_get_proc_edge();
3117 3372                          pep->to_proc = adj_proc;
3118 3373                          pep->refcount = 1;
3119 3374                          adj_proc->incount++;
3120 3375                          pep->next = start_vertex->edge;
3121 3376                          start_vertex->edge = pep;
3122 3377                  }
3123 3378                  ep = NEXT_ADJ(ep);
3124 3379          }
3125 3380  
3126 3381          ep = FIRST_IN(lock);
3127 3382  
3128 3383          while (ep != HEAD(lock)) {
3129 3384                  proc_vertex_t *in_proc;
3130 3385  
3131 3386                  in_proc = flk_get_proc_vertex(ep->from_vertex);
3132 3387  
3133 3388                  for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
3134 3389                          if (pep->to_proc == start_vertex) {
3135 3390                                  ASSERT(pep->refcount);
3136 3391                                  pep->refcount++;
3137 3392                                  break;
3138 3393                          }
3139 3394                  }
3140 3395                  if (pep == NULL) {
3141 3396                          pep = flk_get_proc_edge();
3142 3397                          pep->to_proc = start_vertex;
3143 3398                          pep->refcount = 1;
3144 3399                          start_vertex->incount++;
3145 3400                          pep->next = in_proc->edge;
3146 3401                          in_proc->edge = pep;
3147 3402                  }
3148 3403                  ep = NEXT_IN(ep);
3149 3404          }
3150 3405  
3151 3406          if (start_vertex->incount == 0) {
3152 3407                  mutex_exit(&flock_lock);
3153 3408                  return (0);
3154 3409          }
3155 3410  
3156 3411          flk_proc_graph_uncolor();
3157 3412  
3158 3413          start_vertex->p_sedge = start_vertex->edge;
3159 3414  
3160 3415          STACK_PUSH(process_stack, start_vertex, p_stack);
3161 3416  
3162 3417          while ((pvertex = STACK_TOP(process_stack)) != NULL) {
3163 3418                  for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
3164 3419                          dvertex = pep->to_proc;
3165 3420                          if (!PROC_ARRIVED(dvertex)) {
3166 3421                                  STACK_PUSH(process_stack, dvertex, p_stack);
3167 3422                                  dvertex->p_sedge = dvertex->edge;
3168 3423                                  PROC_ARRIVE(pvertex);
3169 3424                                  pvertex->p_sedge = pep->next;
3170 3425                                  break;
3171 3426                          }
3172 3427                          if (!PROC_DEPARTED(dvertex))
3173 3428                                  goto deadlock;
3174 3429                  }
3175 3430                  if (pep == NULL) {
3176 3431                          PROC_DEPART(pvertex);
3177 3432                          STACK_POP(process_stack, p_stack);
3178 3433                  }
3179 3434          }
3180 3435          mutex_exit(&flock_lock);
3181 3436          return (0);
3182 3437  
3183 3438  deadlock:
3184 3439  
3185 3440          /* we remove all lock edges and proc edges */
3186 3441  
3187 3442          ep = FIRST_ADJ(lock);
3188 3443          while (ep != HEAD(lock)) {
3189 3444                  proc_vertex_t *adj_proc;
3190 3445                  adj_proc = flk_get_proc_vertex(ep->to_vertex);
3191 3446                  nep = NEXT_ADJ(ep);
3192 3447                  IN_LIST_REMOVE(ep);
3193 3448                  ADJ_LIST_REMOVE(ep);
3194 3449                  flk_free_edge(ep);
3195 3450                  ppep = start_vertex->edge;
3196 3451                  for (pep = start_vertex->edge; pep != NULL; ppep = pep,
3197 3452                      pep = ppep->next) {
3198 3453                          if (pep->to_proc == adj_proc) {
3199 3454                                  pep->refcount--;
3200 3455                                  if (pep->refcount == 0) {
3201 3456                                          if (pep == ppep) {
3202 3457                                                  start_vertex->edge = pep->next;
3203 3458                                          } else {
3204 3459                                                  ppep->next = pep->next;
3205 3460                                          }
3206 3461                                          adj_proc->incount--;
3207 3462                                          flk_proc_release(adj_proc);
3208 3463                                          flk_free_proc_edge(pep);
3209 3464                                  }
3210 3465                                  break;
3211 3466                          }
3212 3467                  }
3213 3468                  ep = nep;
3214 3469          }
3215 3470          ep = FIRST_IN(lock);
3216 3471          while (ep != HEAD(lock)) {
3217 3472                  proc_vertex_t *in_proc;
3218 3473                  in_proc = flk_get_proc_vertex(ep->from_vertex);
3219 3474                  nep = NEXT_IN(ep);
3220 3475                  IN_LIST_REMOVE(ep);
3221 3476                  ADJ_LIST_REMOVE(ep);
3222 3477                  flk_free_edge(ep);
3223 3478                  ppep = in_proc->edge;
3224 3479                  for (pep = in_proc->edge; pep != NULL; ppep = pep,
3225 3480                      pep = ppep->next) {
3226 3481                          if (pep->to_proc == start_vertex) {
3227 3482                                  pep->refcount--;
3228 3483                                  if (pep->refcount == 0) {
3229 3484                                          if (pep == ppep) {
3230 3485                                                  in_proc->edge = pep->next;
3231 3486                                          } else {
3232 3487                                                  ppep->next = pep->next;
3233 3488                                          }
3234 3489                                          start_vertex->incount--;
3235 3490                                          flk_proc_release(in_proc);
3236 3491                                          flk_free_proc_edge(pep);
3237 3492                                  }
3238 3493                                  break;
3239 3494                          }
3240 3495                  }
3241 3496                  ep = nep;
3242 3497          }
3243 3498          flk_proc_release(start_vertex);
3244 3499          mutex_exit(&flock_lock);
3245 3500          return (1);
3246 3501  }
3247 3502  
3248 3503  /*
3249 3504   * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
3250 3505   * from the list we return that, otherwise we allocate one. If necessary,
3251 3506   * we grow the list of vertices also.
3252 3507   */
3253 3508  
3254 3509  static proc_vertex_t *
3255 3510  flk_get_proc_vertex(lock_descriptor_t *lock)
3256 3511  {
3257 3512          int i;
3258 3513          proc_vertex_t   *pv;
3259 3514          proc_vertex_t   **palloc;
3260 3515  
3261 3516          ASSERT(MUTEX_HELD(&flock_lock));
3262 3517          if (lock->pvertex != -1) {
3263 3518                  ASSERT(lock->pvertex >= 0);
3264 3519                  pv = pgraph.proc[lock->pvertex];
3265 3520                  if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
3266 3521                          return (pv);
3267 3522                  }
3268 3523          }
3269 3524          for (i = 0; i < pgraph.gcount; i++) {
3270 3525                  pv = pgraph.proc[i];
3271 3526                  if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
3272 3527                          lock->pvertex = pv->index = i;
3273 3528                          return (pv);
3274 3529                  }
3275 3530          }
3276 3531          pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
3277 3532          pv->pid = lock->l_flock.l_pid;
3278 3533          pv->sysid = lock->l_flock.l_sysid;
3279 3534          flk_proc_vertex_allocs++;
3280 3535          if (pgraph.free != 0) {
3281 3536                  for (i = 0; i < pgraph.gcount; i++) {
3282 3537                          if (pgraph.proc[i] == NULL) {
3283 3538                                  pgraph.proc[i] = pv;
3284 3539                                  lock->pvertex = pv->index = i;
3285 3540                                  pgraph.free--;
3286 3541                                  return (pv);
3287 3542                          }
3288 3543                  }
3289 3544          }
3290 3545          palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
3291 3546              sizeof (proc_vertex_t *), KM_SLEEP);
3292 3547  
3293 3548          if (pgraph.proc) {
3294 3549                  bcopy(pgraph.proc, palloc,
3295 3550                      pgraph.gcount * sizeof (proc_vertex_t *));
3296 3551  
3297 3552                  kmem_free(pgraph.proc,
3298 3553                      pgraph.gcount * sizeof (proc_vertex_t *));
3299 3554          }
3300 3555          pgraph.proc = palloc;
3301 3556          pgraph.free += (PROC_CHUNK - 1);
3302 3557          pv->index = lock->pvertex = pgraph.gcount;
3303 3558          pgraph.gcount += PROC_CHUNK;
3304 3559          pgraph.proc[pv->index] = pv;
3305 3560          return (pv);
3306 3561  }
3307 3562  
3308 3563  /*
3309 3564   * Allocate a proc edge.
3310 3565   */
3311 3566  
3312 3567  static proc_edge_t *
3313 3568  flk_get_proc_edge()
3314 3569  {
3315 3570          proc_edge_t *pep;
3316 3571  
3317 3572          pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
3318 3573          flk_proc_edge_allocs++;
3319 3574          return (pep);

↓ open down ↓

310 lines elided

↑ open up ↑

3320 3575  }
3321 3576  
3322 3577  /*
3323 3578   * Free the proc edge. Called whenever its reference count goes to zero.
3324 3579   */
3325 3580  
3326 3581  static void
3327 3582  flk_free_proc_edge(proc_edge_t *pep)
3328 3583  {
3329 3584          ASSERT(pep->refcount == 0);
3330      -        kmem_free((void *)pep, sizeof (proc_edge_t));
     3585 +        kmem_free(pep, sizeof (proc_edge_t));
3331 3586          flk_proc_edge_frees++;
3332 3587  }
3333 3588  
3334 3589  /*
3335 3590   * Color the graph explicitly done only when the mark value hits max value.
3336 3591   */
3337 3592  
3338 3593  static void
3339 3594  flk_proc_graph_uncolor()
3340 3595  {

3341 3596          int i;
3342 3597  
3343 3598          if (pgraph.mark == UINT_MAX) {
3344 3599                  for (i = 0; i < pgraph.gcount; i++)
3345 3600                          if (pgraph.proc[i] != NULL) {
3346 3601                                  pgraph.proc[i]->atime = 0;
3347 3602                                  pgraph.proc[i]->dtime = 0;
3348 3603                          }
3349 3604                  pgraph.mark = 1;
3350 3605          } else {
3351 3606                  pgraph.mark++;
3352 3607          }
3353 3608  }
3354 3609  
3355 3610  /*
3356 3611   * Release the proc vertex iff both there are no in edges and out edges
3357 3612   */
3358 3613  
3359 3614  static void
3360 3615  flk_proc_release(proc_vertex_t *proc)
3361 3616  {
3362 3617          ASSERT(MUTEX_HELD(&flock_lock));
3363 3618          if (proc->edge == NULL && proc->incount == 0) {
3364 3619                  pgraph.proc[proc->index] = NULL;
3365 3620                  pgraph.free++;
3366 3621                  kmem_free(proc, sizeof (proc_vertex_t));
3367 3622                  flk_proc_vertex_frees++;
3368 3623          }
3369 3624  }
3370 3625  
3371 3626  /*
3372 3627   * Updates process graph to reflect change in a lock_graph.
3373 3628   * Note: We should call this function only after we have a correctly
3374 3629   * recomputed lock graph. Otherwise we might miss a deadlock detection.
3375 3630   * eg: in function flk_relation() we call this function after flk_recompute_
3376 3631   * dependencies() otherwise if a process tries to lock a vnode hashed
3377 3632   * into another graph it might sleep for ever.
3378 3633   */
3379 3634  
3380 3635  static void
3381 3636  flk_update_proc_graph(edge_t *ep, int delete)
3382 3637  {
3383 3638          proc_vertex_t *toproc, *fromproc;
3384 3639          proc_edge_t *pep, *prevpep;
3385 3640  
3386 3641          mutex_enter(&flock_lock);
3387 3642  
3388 3643          /*
3389 3644           * OFD style locks are not associated with any process so there is
3390 3645           * no proc graph for these.
3391 3646           */
3392 3647          if (ep->from_vertex->l_ofd != NULL) {
3393 3648                  mutex_exit(&flock_lock);
3394 3649                  return;
3395 3650          }
3396 3651  
3397 3652          toproc = flk_get_proc_vertex(ep->to_vertex);
3398 3653          fromproc = flk_get_proc_vertex(ep->from_vertex);
3399 3654  
3400 3655          if (!delete)
3401 3656                  goto add;
3402 3657          pep = prevpep = fromproc->edge;
3403 3658  
3404 3659          ASSERT(pep != NULL);
3405 3660          while (pep != NULL) {
3406 3661                  if (pep->to_proc == toproc) {
3407 3662                          ASSERT(pep->refcount > 0);
3408 3663                          pep->refcount--;
3409 3664                          if (pep->refcount == 0) {
3410 3665                                  if (pep == prevpep) {
3411 3666                                          fromproc->edge = pep->next;
3412 3667                                  } else {
3413 3668                                          prevpep->next = pep->next;
3414 3669                                  }
3415 3670                                  toproc->incount--;
3416 3671                                  flk_proc_release(toproc);
3417 3672                                  flk_free_proc_edge(pep);
3418 3673                          }
3419 3674                          break;
3420 3675                  }
3421 3676                  prevpep = pep;
3422 3677                  pep = pep->next;
3423 3678          }
3424 3679          flk_proc_release(fromproc);
3425 3680          mutex_exit(&flock_lock);
3426 3681          return;
3427 3682  add:
3428 3683  
3429 3684          pep = fromproc->edge;
3430 3685  
3431 3686          while (pep != NULL) {
3432 3687                  if (pep->to_proc == toproc) {
3433 3688                          ASSERT(pep->refcount > 0);
3434 3689                          pep->refcount++;
3435 3690                          break;
3436 3691                  }
3437 3692                  pep = pep->next;
3438 3693          }
3439 3694          if (pep == NULL) {
3440 3695                  pep = flk_get_proc_edge();
3441 3696                  pep->to_proc = toproc;
3442 3697                  pep->refcount = 1;
3443 3698                  toproc->incount++;
3444 3699                  pep->next = fromproc->edge;
3445 3700                  fromproc->edge = pep;
3446 3701          }
3447 3702          mutex_exit(&flock_lock);
3448 3703  }
3449 3704  
3450 3705  /*
3451 3706   * Set the control status for lock manager requests.
3452 3707   *
3453 3708   */
3454 3709  
3455 3710  /*
3456 3711   * PSARC case 1997/292
3457 3712   *
3458 3713   * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3459 3714   * Effects: Set the state of the NLM server identified by "nlmid"
3460 3715   *   in the NLM registry to state "nlm_state."
3461 3716   *   Raises exception no_such_nlm if "nlmid" doesn't identify a known
3462 3717   *   NLM server to this LLM.
3463 3718   *   Note that when this routine is called with NLM_SHUTTING_DOWN there
3464 3719   *   may be locks requests that have gotten started but not finished.  In
3465 3720   *   particular, there may be blocking requests that are in the callback code
3466 3721   *   before sleeping (so they're not holding the lock for the graph).  If
3467 3722   *   such a thread reacquires the graph's lock (to go to sleep) after
3468 3723   *   NLM state in the NLM registry  is set to a non-up value,
3469 3724   *   it will notice the status and bail out.  If the request gets
3470 3725   *   granted before the thread can check the NLM registry, let it
3471 3726   *   continue normally.  It will get flushed when we are called with NLM_DOWN.
3472 3727   *
3473 3728   * Modifies: nlm_reg_obj (global)
3474 3729   * Arguments:
3475 3730   *    nlmid     (IN):    id uniquely identifying an NLM server
3476 3731   *    nlm_state (IN):    NLM server state to change "nlmid" to
3477 3732   */
3478 3733  void
3479 3734  cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3480 3735  {
3481 3736          /*
3482 3737           * Check to see if node is booted as a cluster. If not, return.
3483 3738           */
3484 3739          if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3485 3740                  return;
3486 3741          }
3487 3742  
3488 3743          /*
3489 3744           * Check for development/debugging.  It is possible to boot a node
3490 3745           * in non-cluster mode, and then run a special script, currently
3491 3746           * available only to developers, to bring up the node as part of a
3492 3747           * cluster.  The problem is that running such a script does not
3493 3748           * result in the routine flk_init() being called and hence global array
3494 3749           * nlm_reg_status is NULL.  The NLM thinks it's in cluster mode,
3495 3750           * but the LLM needs to do an additional check to see if the global
3496 3751           * array has been created or not. If nlm_reg_status is NULL, then
3497 3752           * return, else continue.
3498 3753           */
3499 3754          if (nlm_reg_status == NULL) {
3500 3755                  return;
3501 3756          }
3502 3757  
3503 3758          ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3504 3759          mutex_enter(&nlm_reg_lock);
3505 3760  
3506 3761          if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3507 3762                  /*
3508 3763                   * If the NLM server "nlmid" is unknown in the NLM registry,
3509 3764                   * add it to the registry in the nlm shutting down state.
3510 3765                   */
3511 3766                  FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3512 3767                      FLK_NLM_SHUTTING_DOWN);
3513 3768          } else {
3514 3769                  /*
3515 3770                   * Change the state of the NLM server identified by "nlmid"
3516 3771                   * in the NLM registry to the argument "nlm_state."
3517 3772                   */
3518 3773                  FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3519 3774                      nlm_state);
3520 3775          }
3521 3776  
3522 3777          /*
3523 3778           *  The reason we must register the NLM server that is shutting down
3524 3779           *  with an LLM that doesn't already know about it (never sent a lock
3525 3780           *  request) is to handle correctly a race between shutdown and a new
3526 3781           *  lock request.  Suppose that a shutdown request from the NLM server
3527 3782           *  invokes this routine at the LLM, and a thread is spawned to
3528 3783           *  service the request. Now suppose a new lock request is in
3529 3784           *  progress and has already passed the first line of defense in
3530 3785           *  reclock(), which denies new locks requests from NLM servers
3531 3786           *  that are not in the NLM_UP state.  After the current routine
3532 3787           *  is invoked for both phases of shutdown, the routine will return,
3533 3788           *  having done nothing, and the lock request will proceed and
3534 3789           *  probably be granted.  The problem is that the shutdown was ignored
3535 3790           *  by the lock request because there was no record of that NLM server
3536 3791           *  shutting down.   We will be in the peculiar position of thinking
3537 3792           *  that we've shutdown the NLM server and all locks at all LLMs have
3538 3793           *  been discarded, but in fact there's still one lock held.
3539 3794           *  The solution is to record the existence of NLM server and change
3540 3795           *  its state immediately to NLM_SHUTTING_DOWN.  The lock request in
3541 3796           *  progress may proceed because the next phase NLM_DOWN will catch
3542 3797           *  this lock and discard it.
3543 3798           */
3544 3799          mutex_exit(&nlm_reg_lock);
3545 3800  
3546 3801          switch (nlm_state) {
3547 3802          case FLK_NLM_UP:
3548 3803                  /*
3549 3804                   * Change the NLM state of all locks still held on behalf of
3550 3805                   * the NLM server identified by "nlmid" to NLM_UP.
3551 3806                   */
3552 3807                  cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3553 3808                  break;
3554 3809  
3555 3810          case FLK_NLM_SHUTTING_DOWN:
3556 3811                  /*
3557 3812                   * Wake up all sleeping locks for the NLM server identified
3558 3813                   * by "nlmid." Note that eventually all woken threads will
3559 3814                   * have their lock requests cancelled and descriptors
3560 3815                   * removed from the sleeping lock list.  Note that the NLM
3561 3816                   * server state associated with each lock descriptor is
3562 3817                   * changed to FLK_NLM_SHUTTING_DOWN.
3563 3818                   */
3564 3819                  cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3565 3820                  break;
3566 3821  
3567 3822          case FLK_NLM_DOWN:
3568 3823                  /*
3569 3824                   * Discard all active, granted locks for this NLM server
3570 3825                   * identified by "nlmid."
3571 3826                   */
3572 3827                  cl_flk_unlock_nlm_granted(nlmid);
3573 3828                  break;
3574 3829  
3575 3830          default:
3576 3831                  panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3577 3832          }
3578 3833  }
3579 3834  
3580 3835  /*
3581 3836   * Set the control status for lock manager requests.
3582 3837   *
3583 3838   * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3584 3839   * may be locks requests that have gotten started but not finished.  In
3585 3840   * particular, there may be blocking requests that are in the callback code
3586 3841   * before sleeping (so they're not holding the lock for the graph).  If
3587 3842   * such a thread reacquires the graph's lock (to go to sleep) after
3588 3843   * flk_lockmgr_status is set to a non-up value, it will notice the status
3589 3844   * and bail out.  If the request gets granted before the thread can check
3590 3845   * flk_lockmgr_status, let it continue normally.  It will get flushed when
3591 3846   * we are called with FLK_LOCKMGR_DOWN.
3592 3847   */
3593 3848  
3594 3849  void
3595 3850  flk_set_lockmgr_status(flk_lockmgr_status_t status)
3596 3851  {
3597 3852          int i;
3598 3853          graph_t *gp;
3599 3854          struct flock_globals *fg;
3600 3855  
3601 3856          fg = flk_get_globals();
3602 3857          ASSERT(fg != NULL);
3603 3858  
3604 3859          mutex_enter(&flock_lock);
3605 3860          fg->flk_lockmgr_status = status;
3606 3861          mutex_exit(&flock_lock);
3607 3862  
3608 3863          /*
3609 3864           * If the lock manager is coming back up, all that's needed is to
3610 3865           * propagate this information to the graphs.  If the lock manager
3611 3866           * is going down, additional action is required, and each graph's
3612 3867           * copy of the state is updated atomically with this other action.
3613 3868           */
3614 3869          switch (status) {
3615 3870          case FLK_LOCKMGR_UP:
3616 3871                  for (i = 0; i < HASH_SIZE; i++) {
3617 3872                          mutex_enter(&flock_lock);
3618 3873                          gp = lock_graph[i];
3619 3874                          mutex_exit(&flock_lock);
3620 3875                          if (gp == NULL)
3621 3876                                  continue;
3622 3877                          mutex_enter(&gp->gp_mutex);
3623 3878                          fg->lockmgr_status[i] = status;
3624 3879                          mutex_exit(&gp->gp_mutex);
3625 3880                  }
3626 3881                  break;
3627 3882          case FLK_WAKEUP_SLEEPERS:
3628 3883                  wakeup_sleeping_lockmgr_locks(fg);
3629 3884                  break;
3630 3885          case FLK_LOCKMGR_DOWN:
3631 3886                  unlock_lockmgr_granted(fg);
3632 3887                  break;
3633 3888          default:
3634 3889                  panic("flk_set_lockmgr_status: bad status (%d)", status);
3635 3890                  break;
3636 3891          }
3637 3892  }
3638 3893  
3639 3894  /*
3640 3895   * This routine returns all the locks that are active or sleeping and are
3641 3896   * associated with a particular set of identifiers.  If lock_state != 0, then
3642 3897   * only locks that match the lock_state are returned. If lock_state == 0, then
3643 3898   * all locks are returned. If pid == NOPID, the pid is ignored.  If
3644 3899   * use_sysid is FALSE, then the sysid is ignored.  If vp is NULL, then the
3645 3900   * vnode pointer is ignored.
3646 3901   *
3647 3902   * A list containing the vnode pointer and an flock structure
3648 3903   * describing the lock is returned.  Each element in the list is
3649 3904   * dynamically allocated and must be freed by the caller.  The
3650 3905   * last item in the list is denoted by a NULL value in the ll_next
3651 3906   * field.
3652 3907   *
3653 3908   * The vnode pointers returned are held.  The caller is responsible
3654 3909   * for releasing these.  Note that the returned list is only a snapshot of
3655 3910   * the current lock information, and that it is a snapshot of a moving
3656 3911   * target (only one graph is locked at a time).
3657 3912   */
3658 3913  
3659 3914  locklist_t *
3660 3915  get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3661 3916      pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3662 3917  {
3663 3918          lock_descriptor_t       *lock;
3664 3919          lock_descriptor_t       *graph_head;
3665 3920          locklist_t              listhead;
3666 3921          locklist_t              *llheadp;
3667 3922          locklist_t              *llp;
3668 3923          locklist_t              *lltp;
3669 3924          graph_t                 *gp;
3670 3925          int                     i;
3671 3926          int                     first_index; /* graph index */
3672 3927          int                     num_indexes; /* graph index */
3673 3928  
3674 3929          ASSERT((list_type == FLK_ACTIVE_STATE) ||
3675 3930              (list_type == FLK_SLEEPING_STATE));
3676 3931  
3677 3932          /*
3678 3933           * Get a pointer to something to use as a list head while building
3679 3934           * the rest of the list.
3680 3935           */
3681 3936          llheadp = &listhead;
3682 3937          lltp = llheadp;
3683 3938          llheadp->ll_next = (locklist_t *)NULL;
3684 3939  
3685 3940          /* Figure out which graphs we want to look at. */
3686 3941          if (vp == NULL) {
3687 3942                  first_index = 0;
3688 3943                  num_indexes = HASH_SIZE;
3689 3944          } else {
3690 3945                  first_index = HASH_INDEX(vp);
3691 3946                  num_indexes = 1;
3692 3947          }
3693 3948  
3694 3949          for (i = first_index; i < first_index + num_indexes; i++) {
3695 3950                  mutex_enter(&flock_lock);
3696 3951                  gp = lock_graph[i];
3697 3952                  mutex_exit(&flock_lock);
3698 3953                  if (gp == NULL) {
3699 3954                          continue;
3700 3955                  }
3701 3956  
3702 3957                  mutex_enter(&gp->gp_mutex);
3703 3958                  graph_head = (list_type == FLK_ACTIVE_STATE) ?
3704 3959                      ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3705 3960                  for (lock = graph_head->l_next;
3706 3961                      lock != graph_head;
3707 3962                      lock = lock->l_next) {
3708 3963                          if (use_sysid && lock->l_flock.l_sysid != sysid)
3709 3964                                  continue;
3710 3965                          if (pid != NOPID && lock->l_flock.l_pid != pid)
3711 3966                                  continue;
3712 3967                          if (vp != NULL && lock->l_vnode != vp)
3713 3968                                  continue;
3714 3969                          if (lock_state && !(lock_state & lock->l_state))
3715 3970                                  continue;
3716 3971                          if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3717 3972                                  continue;
3718 3973                          /*
3719 3974                           * A matching lock was found.  Allocate
3720 3975                           * space for a new locklist entry and fill
3721 3976                           * it in.
3722 3977                           */
3723 3978                          llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3724 3979                          lltp->ll_next = llp;
3725 3980                          VN_HOLD(lock->l_vnode);
3726 3981                          llp->ll_vp = lock->l_vnode;
3727 3982                          create_flock(lock, &(llp->ll_flock));
3728 3983                          llp->ll_next = (locklist_t *)NULL;
3729 3984                          lltp = llp;
3730 3985                  }
3731 3986                  mutex_exit(&gp->gp_mutex);
3732 3987          }
3733 3988  
3734 3989          llp = llheadp->ll_next;
3735 3990          return (llp);
3736 3991  }
3737 3992  
3738 3993  /*
3739 3994   * These two functions are simply interfaces to get_lock_list.  They return
3740 3995   * a list of sleeping or active locks for the given sysid and pid.  See
3741 3996   * get_lock_list for details.
3742 3997   *
3743 3998   * In either case we don't particularly care to specify the zone of interest;
3744 3999   * the sysid-space is global across zones, so the sysid will map to exactly one
3745 4000   * zone, and we'll return information for that zone.
3746 4001   */
3747 4002  
3748 4003  locklist_t *
3749 4004  flk_get_sleeping_locks(int sysid, pid_t pid)
3750 4005  {
3751 4006          return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3752 4007              ALL_ZONES));
3753 4008  }
3754 4009  
3755 4010  locklist_t *
3756 4011  flk_get_active_locks(int sysid, pid_t pid)
3757 4012  {
3758 4013          return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3759 4014              ALL_ZONES));
3760 4015  }
3761 4016  
3762 4017  /*
3763 4018   * Another interface to get_lock_list.  This one returns all the active
3764 4019   * locks for a given vnode.  Again, see get_lock_list for details.
3765 4020   *
3766 4021   * We don't need to specify which zone's locks we're interested in.  The matter
3767 4022   * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3768 4023   * be used by multiple zones, so the list of locks will all be from the right
3769 4024   * zone.
3770 4025   */
3771 4026  
3772 4027  locklist_t *
3773 4028  flk_active_locks_for_vp(const vnode_t *vp)
3774 4029  {
3775 4030          return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3776 4031              ALL_ZONES));
3777 4032  }
3778 4033  
3779 4034  /*
3780 4035   * Another interface to get_lock_list.  This one returns all the active
3781 4036   * nbmand locks for a given vnode.  Again, see get_lock_list for details.
3782 4037   *
3783 4038   * See the comment for flk_active_locks_for_vp() for why we don't care to
3784 4039   * specify the particular zone of interest.
3785 4040   */
3786 4041  locklist_t *
3787 4042  flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3788 4043  {
3789 4044          return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3790 4045              NOPID, vp, ALL_ZONES));
3791 4046  }
3792 4047  
3793 4048  /*
3794 4049   * Another interface to get_lock_list.  This one returns all the active
3795 4050   * nbmand locks for a given pid.  Again, see get_lock_list for details.
3796 4051   *
3797 4052   * The zone doesn't need to be specified here; the locks held by a
3798 4053   * particular process will either be local (ie, non-NFS) or from the zone
3799 4054   * the process is executing in.  This is because other parts of the system
3800 4055   * ensure that an NFS vnode can't be used in a zone other than that in
3801 4056   * which it was opened.
3802 4057   */
3803 4058  locklist_t *
3804 4059  flk_active_nbmand_locks(pid_t pid)
3805 4060  {
3806 4061          return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3807 4062              pid, NULL, ALL_ZONES));
3808 4063  }
3809 4064  
3810 4065  /*
3811 4066   * Free up all entries in the locklist.
3812 4067   */
3813 4068  void
3814 4069  flk_free_locklist(locklist_t *llp)
3815 4070  {
3816 4071          locklist_t *next_llp;
3817 4072  
3818 4073          while (llp) {
3819 4074                  next_llp = llp->ll_next;
3820 4075                  VN_RELE(llp->ll_vp);
3821 4076                  kmem_free(llp, sizeof (*llp));
3822 4077                  llp = next_llp;
3823 4078          }
3824 4079  }
3825 4080  
3826 4081  static void
3827 4082  cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3828 4083  {
3829 4084          /*
3830 4085           * For each graph "lg" in the hash table lock_graph do
3831 4086           * a.  Get the list of sleeping locks
3832 4087           * b.  For each lock descriptor in the list do
3833 4088           *      i.   If the requested lock is an NLM server request AND
3834 4089           *              the nlmid is the same as the routine argument then
3835 4090           *              change the lock descriptor's state field to
3836 4091           *              "nlm_state."
3837 4092           * c.  Get the list of active locks
3838 4093           * d.  For each lock descriptor in the list do
3839 4094           *      i.   If the requested lock is an NLM server request AND
3840 4095           *              the nlmid is the same as the routine argument then
3841 4096           *              change the lock descriptor's state field to
3842 4097           *              "nlm_state."
3843 4098           */
3844 4099  
3845 4100          int                     i;
3846 4101          graph_t                 *gp;                    /* lock graph */
3847 4102          lock_descriptor_t       *lock;                  /* lock */
3848 4103          lock_descriptor_t       *nlock = NULL;          /* next lock */
3849 4104          int                     lock_nlmid;
3850 4105  
3851 4106          for (i = 0; i < HASH_SIZE; i++) {
3852 4107                  mutex_enter(&flock_lock);
3853 4108                  gp = lock_graph[i];
3854 4109                  mutex_exit(&flock_lock);
3855 4110                  if (gp == NULL) {
3856 4111                          continue;
3857 4112                  }
3858 4113  
3859 4114                  /* Get list of sleeping locks in current lock graph. */
3860 4115                  mutex_enter(&gp->gp_mutex);
3861 4116                  for (lock = SLEEPING_HEAD(gp)->l_next;
3862 4117                      lock != SLEEPING_HEAD(gp);
3863 4118                      lock = nlock) {
3864 4119                          nlock = lock->l_next;
3865 4120                          /* get NLM id */
3866 4121                          lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3867 4122  
3868 4123                          /*
3869 4124                           * If NLM server request AND nlmid of lock matches
3870 4125                           * nlmid of argument, then set the NLM state of the
3871 4126                           * lock to "nlm_state."
3872 4127                           */
3873 4128                          if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3874 4129                                  SET_NLM_STATE(lock, nlm_state);
3875 4130                          }
3876 4131                  }
3877 4132  
3878 4133                  /* Get list of active locks in current lock graph. */
3879 4134                  for (lock = ACTIVE_HEAD(gp)->l_next;
3880 4135                      lock != ACTIVE_HEAD(gp);
3881 4136                      lock = nlock) {
3882 4137                          nlock = lock->l_next;
3883 4138                          /* get NLM id */
3884 4139                          lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3885 4140  
3886 4141                          /*
3887 4142                           * If NLM server request AND nlmid of lock matches
3888 4143                           * nlmid of argument, then set the NLM state of the
3889 4144                           * lock to "nlm_state."
3890 4145                           */
3891 4146                          if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3892 4147                                  ASSERT(IS_ACTIVE(lock));
3893 4148                                  SET_NLM_STATE(lock, nlm_state);
3894 4149                          }
3895 4150                  }
3896 4151                  mutex_exit(&gp->gp_mutex);
3897 4152          }
3898 4153  }
3899 4154  
3900 4155  /*
3901 4156   * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3902 4157   * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3903 4158   *   identified by "nlmid." Poke those lock requests.
3904 4159   */
3905 4160  static void
3906 4161  cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3907 4162  {
3908 4163          lock_descriptor_t *lock;
3909 4164          lock_descriptor_t *nlock = NULL; /* next lock */
3910 4165          int i;
3911 4166          graph_t *gp;
3912 4167          int     lock_nlmid;
3913 4168  
3914 4169          for (i = 0; i < HASH_SIZE; i++) {
3915 4170                  mutex_enter(&flock_lock);
3916 4171                  gp = lock_graph[i];
3917 4172                  mutex_exit(&flock_lock);
3918 4173                  if (gp == NULL) {
3919 4174                          continue;
3920 4175                  }
3921 4176  
3922 4177                  mutex_enter(&gp->gp_mutex);
3923 4178                  for (lock = SLEEPING_HEAD(gp)->l_next;
3924 4179                      lock != SLEEPING_HEAD(gp);
3925 4180                      lock = nlock) {
3926 4181                          nlock = lock->l_next;
3927 4182                          /*
3928 4183                           * If NLM server request _and_ nlmid of lock matches
3929 4184                           * nlmid of argument, then set the NLM state of the
3930 4185                           * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3931 4186                           * request.
3932 4187                           */
3933 4188                          if (IS_LOCKMGR(lock)) {
3934 4189                                  /* get NLM id */
3935 4190                                  lock_nlmid =
3936 4191                                      GETNLMID(lock->l_flock.l_sysid);
3937 4192                                  if (nlmid == lock_nlmid) {
3938 4193                                          SET_NLM_STATE(lock,
3939 4194                                              FLK_NLM_SHUTTING_DOWN);
3940 4195                                          INTERRUPT_WAKEUP(lock);
3941 4196                                  }
3942 4197                          }
3943 4198                  }
3944 4199                  mutex_exit(&gp->gp_mutex);
3945 4200          }
3946 4201  }
3947 4202  
3948 4203  /*
3949 4204   * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3950 4205   * Effects:  Find all active (granted) lock manager locks _only_ for the
3951 4206   *   NLM server identified by "nlmid" and release them.
3952 4207   */
3953 4208  static void
3954 4209  cl_flk_unlock_nlm_granted(int nlmid)
3955 4210  {
3956 4211          lock_descriptor_t *lock;
3957 4212          lock_descriptor_t *nlock = NULL; /* next lock */
3958 4213          int i;
3959 4214          graph_t *gp;
3960 4215          int     lock_nlmid;
3961 4216  
3962 4217          for (i = 0; i < HASH_SIZE; i++) {
3963 4218                  mutex_enter(&flock_lock);
3964 4219                  gp = lock_graph[i];
3965 4220                  mutex_exit(&flock_lock);
3966 4221                  if (gp == NULL) {
3967 4222                          continue;
3968 4223                  }
3969 4224  
3970 4225                  mutex_enter(&gp->gp_mutex);
3971 4226                  for (lock = ACTIVE_HEAD(gp)->l_next;
3972 4227                      lock != ACTIVE_HEAD(gp);
3973 4228                      lock = nlock) {
3974 4229                          nlock = lock->l_next;
3975 4230                          ASSERT(IS_ACTIVE(lock));
3976 4231  
3977 4232                          /*
3978 4233                           * If it's an  NLM server request _and_ nlmid of
3979 4234                           * the lock matches nlmid of argument, then
3980 4235                           * remove the active lock the list, wakup blocked
3981 4236                           * threads, and free the storage for the lock.
3982 4237                           * Note that there's no need to mark the NLM state
3983 4238                           * of this lock to NLM_DOWN because the lock will
3984 4239                           * be deleted anyway and its storage freed.
3985 4240                           */
3986 4241                          if (IS_LOCKMGR(lock)) {
3987 4242                                  /* get NLM id */
3988 4243                                  lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3989 4244                                  if (nlmid == lock_nlmid) {
3990 4245                                          flk_delete_active_lock(lock, 0);
3991 4246                                          flk_wakeup(lock, 1);
3992 4247                                          flk_free_lock(lock);
3993 4248                                  }
3994 4249                          }
3995 4250                  }
3996 4251                  mutex_exit(&gp->gp_mutex);
3997 4252          }
3998 4253  }
3999 4254  
4000 4255  /*
4001 4256   * Find all sleeping lock manager requests and poke them.
4002 4257   */
4003 4258  static void
4004 4259  wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
4005 4260  {
4006 4261          lock_descriptor_t *lock;
4007 4262          lock_descriptor_t *nlock = NULL; /* next lock */
4008 4263          int i;
4009 4264          graph_t *gp;
4010 4265          zoneid_t zoneid = getzoneid();
4011 4266  
4012 4267          for (i = 0; i < HASH_SIZE; i++) {
4013 4268                  mutex_enter(&flock_lock);
4014 4269                  gp = lock_graph[i];
4015 4270                  mutex_exit(&flock_lock);
4016 4271                  if (gp == NULL) {
4017 4272                          continue;
4018 4273                  }
4019 4274  
4020 4275                  mutex_enter(&gp->gp_mutex);
4021 4276                  fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
4022 4277                  for (lock = SLEEPING_HEAD(gp)->l_next;
4023 4278                      lock != SLEEPING_HEAD(gp);
4024 4279                      lock = nlock) {
4025 4280                          nlock = lock->l_next;
4026 4281                          if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
4027 4282                                  INTERRUPT_WAKEUP(lock);
4028 4283                          }
4029 4284                  }
4030 4285                  mutex_exit(&gp->gp_mutex);
4031 4286          }
4032 4287  }
4033 4288  
4034 4289  
4035 4290  /*
4036 4291   * Find all active (granted) lock manager locks and release them.
4037 4292   */
4038 4293  static void
4039 4294  unlock_lockmgr_granted(struct flock_globals *fg)
4040 4295  {
4041 4296          lock_descriptor_t *lock;
4042 4297          lock_descriptor_t *nlock = NULL; /* next lock */
4043 4298          int i;
4044 4299          graph_t *gp;
4045 4300          zoneid_t zoneid = getzoneid();
4046 4301  
4047 4302          for (i = 0; i < HASH_SIZE; i++) {
4048 4303                  mutex_enter(&flock_lock);
4049 4304                  gp = lock_graph[i];
4050 4305                  mutex_exit(&flock_lock);
4051 4306                  if (gp == NULL) {
4052 4307                          continue;
4053 4308                  }
4054 4309  
4055 4310                  mutex_enter(&gp->gp_mutex);
4056 4311                  fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
4057 4312                  for (lock = ACTIVE_HEAD(gp)->l_next;
4058 4313                      lock != ACTIVE_HEAD(gp);
4059 4314                      lock = nlock) {
4060 4315                          nlock = lock->l_next;
4061 4316                          if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {

↓ open down ↓

721 lines elided

↑ open up ↑

4062 4317                                  ASSERT(IS_ACTIVE(lock));
4063 4318                                  flk_delete_active_lock(lock, 0);
4064 4319                                  flk_wakeup(lock, 1);
4065 4320                                  flk_free_lock(lock);
4066 4321                          }
4067 4322                  }
4068 4323                  mutex_exit(&gp->gp_mutex);
4069 4324          }
4070 4325  }
4071 4326  
4072      -
4073 4327  /*
4074 4328   * Wait until a lock is granted, cancelled, or interrupted.
4075 4329   */
4076 4330  
4077 4331  static void
4078 4332  wait_for_lock(lock_descriptor_t *request)
4079 4333  {
4080 4334          graph_t *gp = request->l_graph;
     4335 +        vnode_t *vp = request->l_vnode;
4081 4336  
4082 4337          ASSERT(MUTEX_HELD(&gp->gp_mutex));
4083 4338  
4084 4339          while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
4085 4340              !(IS_INTERRUPTED(request))) {
4086      -                if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
     4341 +                lock_descriptor_t *lock;
     4342 +
     4343 +                if (stale_lock_timeout == 0) {
     4344 +                        /* The stale lock detection is disabled */
     4345 +                        if (cv_wait_sig(&request->l_cv, &gp->gp_mutex) == 0) {
     4346 +                                flk_set_state(request, FLK_INTERRUPTED_STATE);
     4347 +                                request->l_state |= INTERRUPTED_LOCK;
     4348 +                        }
     4349 +
     4350 +                        continue;
     4351 +                }
     4352 +
     4353 +                SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
     4354 +
     4355 +                if (lock != NULL) {
     4356 +                        do {
     4357 +                                if (BLOCKS(lock, request)) {
     4358 +                                        flk_stale_lock_check(lock);
     4359 +                                        break;
     4360 +                                }
     4361 +                                lock = lock->l_next;
     4362 +                        } while (lock->l_vnode == vp);
     4363 +                }
     4364 +
     4365 +                if (cv_timedwait_sig(&request->l_cv, &gp->gp_mutex,
     4366 +                    ddi_get_lbolt() + SEC_TO_TICK(stale_lock_timeout)) == 0) {
4087 4367                          flk_set_state(request, FLK_INTERRUPTED_STATE);
4088 4368                          request->l_state |= INTERRUPTED_LOCK;
4089 4369                  }
4090 4370          }
4091 4371  }
4092 4372  
4093 4373  /*
4094 4374   * Create an flock structure from the existing lock information
4095 4375   *
4096 4376   * This routine is used to create flock structures for the lock manager

4097 4377   * to use in a reclaim request.  Since the lock was originated on this
4098 4378   * host, it must be conforming to UNIX semantics, so no checking is
4099 4379   * done to make sure it falls within the lower half of the 32-bit range.
4100 4380   */
4101 4381  
4102 4382  static void
4103 4383  create_flock(lock_descriptor_t *lp, flock64_t *flp)
4104 4384  {
4105 4385          ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
4106 4386          ASSERT(lp->l_end >= lp->l_start);
4107 4387  
4108 4388          flp->l_type = lp->l_type;
4109 4389          flp->l_whence = 0;
4110 4390          flp->l_start = lp->l_start;
4111 4391          flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
4112 4392              (lp->l_end - lp->l_start + 1);
4113 4393          flp->l_sysid = lp->l_flock.l_sysid;
4114 4394          flp->l_pid = lp->l_flock.l_pid;
4115 4395  }
4116 4396  
4117 4397  /*
4118 4398   * Convert flock_t data describing a lock range into unsigned long starting
4119 4399   * and ending points, which are put into lock_request.  Returns 0 or an
4120 4400   * errno value.
4121 4401   */
4122 4402  
4123 4403  int
4124 4404  flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
4125 4405      u_offset_t *start, u_offset_t *end, offset_t offset)
4126 4406  {
4127 4407          struct vattr    vattr;
4128 4408          int     error;
4129 4409  
4130 4410          /*
4131 4411           * Determine the starting point of the request

↓ open down ↓

35 lines elided

↑ open up ↑

4132 4412           */
4133 4413          switch (flp->l_whence) {
4134 4414          case 0:         /* SEEK_SET */
4135 4415                  *start = (u_offset_t)flp->l_start;
4136 4416                  break;
4137 4417          case 1:         /* SEEK_CUR */
4138 4418                  *start = (u_offset_t)(flp->l_start + offset);
4139 4419                  break;
4140 4420          case 2:         /* SEEK_END */
4141 4421                  vattr.va_mask = AT_SIZE;
4142      -                if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
     4422 +                if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
4143 4423                          return (error);
4144 4424                  *start = (u_offset_t)(flp->l_start + vattr.va_size);
4145 4425                  break;
4146 4426          default:
4147 4427                  return (EINVAL);
4148 4428          }
4149 4429  
4150 4430          /*
4151 4431           * Determine the range covered by the request.
4152 4432           */

4153 4433          if (flp->l_len == 0)
4154 4434                  *end = MAX_U_OFFSET_T;
4155 4435          else if ((offset_t)flp->l_len > 0) {
4156 4436                  *end = (u_offset_t)(*start + (flp->l_len - 1));
4157 4437          } else {
4158 4438                  /*
4159 4439                   * Negative length; why do we even allow this ?
4160 4440                   * Because this allows easy specification of
4161 4441                   * the last n bytes of the file.
4162 4442                   */
4163 4443                  *end = *start;
4164 4444                  *start += (u_offset_t)flp->l_len;

↓ open down ↓

12 lines elided

↑ open up ↑

4165 4445                  (*start)++;
4166 4446          }
4167 4447          return (0);
4168 4448  }
4169 4449  
4170 4450  /*
4171 4451   * Check the validity of lock data.  This can used by the NFS
4172 4452   * frlock routines to check data before contacting the server.  The
4173 4453   * server must support semantics that aren't as restrictive as
4174 4454   * the UNIX API, so the NFS client is required to check.
4175      - * The maximum is now passed in by the caller.
     4455 + * The maximum is passed in by the caller.
4176 4456   */
4177 4457  
4178 4458  int
4179 4459  flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
4180 4460  {
4181 4461          /*
4182 4462           * The end (length) for local locking should never be greater
4183      -         * than MAXEND. However, the representation for
     4463 +         * than max. However, the representation for
4184 4464           * the entire file is MAX_U_OFFSET_T.
4185 4465           */
4186 4466          if ((start > max) ||
4187 4467              ((end > max) && (end != MAX_U_OFFSET_T))) {
4188 4468                  return (EINVAL);
4189 4469          }
4190 4470          if (start > end) {
4191 4471                  return (EINVAL);
4192 4472          }
4193 4473          return (0);

4194 4474  }
4195 4475  
4196 4476  /*
4197 4477   * Fill in request->l_flock with information about the lock blocking the
4198 4478   * request.  The complexity here is that lock manager requests are allowed
4199 4479   * to see into the upper part of the 32-bit address range, whereas local
4200 4480   * requests are only allowed to see signed values.
4201 4481   *
4202 4482   * What should be done when "blocker" is a lock manager lock that uses the
4203 4483   * upper portion of the 32-bit range, but "request" is local?  Since the
4204 4484   * request has already been determined to have been blocked by the blocker,
4205 4485   * at least some portion of "blocker" must be in the range of the request,
4206 4486   * or the request extends to the end of file.  For the first case, the
4207 4487   * portion in the lower range is returned with the indication that it goes
4208 4488   * "to EOF."  For the second case, the last byte of the lower range is
4209 4489   * returned with the indication that it goes "to EOF."
4210 4490   */
4211 4491  
4212 4492  static void
4213 4493  report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
4214 4494  {
4215 4495          flock64_t *flrp;                        /* l_flock portion of request */
4216 4496  
4217 4497          ASSERT(blocker != NULL);
4218 4498  
4219 4499          flrp = &request->l_flock;
4220 4500          flrp->l_whence = 0;
4221 4501          flrp->l_type = blocker->l_type;
4222 4502          flrp->l_pid = blocker->l_flock.l_pid;
4223 4503          flrp->l_sysid = blocker->l_flock.l_sysid;
4224 4504          request->l_ofd = blocker->l_ofd;
4225 4505  
4226 4506          if (IS_LOCKMGR(request)) {
4227 4507                  flrp->l_start = blocker->l_start;
4228 4508                  if (blocker->l_end == MAX_U_OFFSET_T)
4229 4509                          flrp->l_len = 0;
4230 4510                  else
4231 4511                          flrp->l_len = blocker->l_end - blocker->l_start + 1;
4232 4512          } else {
4233 4513                  if (blocker->l_start > MAXEND) {
4234 4514                          flrp->l_start = MAXEND;
4235 4515                          flrp->l_len = 0;
4236 4516                  } else {
4237 4517                          flrp->l_start = blocker->l_start;
4238 4518                          if (blocker->l_end == MAX_U_OFFSET_T)
4239 4519                                  flrp->l_len = 0;
4240 4520                          else
4241 4521                                  flrp->l_len = blocker->l_end -
4242 4522                                      blocker->l_start + 1;
4243 4523                  }
4244 4524          }
4245 4525  }
4246 4526  
4247 4527  /*
4248 4528   * PSARC case 1997/292
4249 4529   */
4250 4530  /*
4251 4531   * This is the public routine exported by flock.h.
4252 4532   */
4253 4533  void
4254 4534  cl_flk_change_nlm_state_to_unknown(int nlmid)
4255 4535  {
4256 4536          /*
4257 4537           * Check to see if node is booted as a cluster. If not, return.
4258 4538           */
4259 4539          if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
4260 4540                  return;
4261 4541          }
4262 4542  
4263 4543          /*
4264 4544           * See comment in cl_flk_set_nlm_status().
4265 4545           */
4266 4546          if (nlm_reg_status == NULL) {
4267 4547                  return;
4268 4548          }
4269 4549  
4270 4550          /*
4271 4551           * protect NLM registry state with a mutex.
4272 4552           */
4273 4553          ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
4274 4554          mutex_enter(&nlm_reg_lock);
4275 4555          FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
4276 4556          mutex_exit(&nlm_reg_lock);
4277 4557  }
4278 4558  
4279 4559  /*
4280 4560   * Return non-zero if the given I/O request conflicts with an active NBMAND
4281 4561   * lock.
4282 4562   * If svmand is non-zero, it means look at all active locks, not just NBMAND
4283 4563   * locks.
4284 4564   */
4285 4565  
4286 4566  int
4287 4567  nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
4288 4568      ssize_t length, int svmand, caller_context_t *ct)
4289 4569  {
4290 4570          int conflict = 0;
4291 4571          graph_t                 *gp;
4292 4572          lock_descriptor_t       *lock;
4293 4573          pid_t pid;
4294 4574          int sysid;
4295 4575  
4296 4576          if (ct == NULL) {
4297 4577                  pid = curproc->p_pid;
4298 4578                  sysid = 0;
4299 4579          } else {
4300 4580                  pid = ct->cc_pid;
4301 4581                  sysid = ct->cc_sysid;
4302 4582          }
4303 4583  
4304 4584          mutex_enter(&flock_lock);
4305 4585          gp = lock_graph[HASH_INDEX(vp)];
4306 4586          mutex_exit(&flock_lock);
4307 4587          if (gp == NULL)
4308 4588                  return (0);
4309 4589  
4310 4590          mutex_enter(&gp->gp_mutex);
4311 4591          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4312 4592  
4313 4593          for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
4314 4594                  if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
4315 4595                      (lock->l_flock.l_sysid != sysid ||
4316 4596                      lock->l_flock.l_pid != pid) &&
4317 4597                      lock_blocks_io(op, offset, length,
4318 4598                      lock->l_type, lock->l_start, lock->l_end)) {
4319 4599                          conflict = 1;
4320 4600                          break;
4321 4601                  }
4322 4602          }
4323 4603          mutex_exit(&gp->gp_mutex);
4324 4604  
4325 4605          return (conflict);
4326 4606  }
4327 4607  
4328 4608  /*
4329 4609   * Return non-zero if the given I/O request conflicts with the given lock.
4330 4610   */
4331 4611  
4332 4612  static int
4333 4613  lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
4334 4614      int lock_type, u_offset_t lock_start, u_offset_t lock_end)
4335 4615  {
4336 4616          ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
4337 4617          ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
4338 4618  
4339 4619          if (op == NBL_READ && lock_type == F_RDLCK)
4340 4620                  return (0);
4341 4621  
4342 4622          if (offset <= lock_start && lock_start < offset + length)
4343 4623                  return (1);
4344 4624          if (lock_start <= offset && offset <= lock_end)
4345 4625                  return (1);
4346 4626  
4347 4627          return (0);
4348 4628  }
4349 4629  
4350 4630  #ifdef DEBUG
4351 4631  static void
4352 4632  check_active_locks(graph_t *gp)
4353 4633  {
4354 4634          lock_descriptor_t *lock, *lock1;
4355 4635          edge_t  *ep;
4356 4636  
4357 4637          for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
4358 4638              lock = lock->l_next) {
4359 4639                  ASSERT(IS_ACTIVE(lock));
4360 4640                  ASSERT(NOT_BLOCKED(lock));
4361 4641                  ASSERT(!IS_BARRIER(lock));
4362 4642  
4363 4643                  ep = FIRST_IN(lock);
4364 4644  
4365 4645                  while (ep != HEAD(lock)) {
4366 4646                          ASSERT(IS_SLEEPING(ep->from_vertex));
4367 4647                          ASSERT(!NOT_BLOCKED(ep->from_vertex));
4368 4648                          ep = NEXT_IN(ep);
4369 4649                  }
4370 4650  
4371 4651                  for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
4372 4652                      lock1 = lock1->l_next) {
4373 4653                          if (lock1->l_vnode == lock->l_vnode) {
4374 4654                          if (BLOCKS(lock1, lock)) {
4375 4655                                  cmn_err(CE_PANIC,
4376 4656                                      "active lock %p blocks %p",
4377 4657                                      (void *)lock1, (void *)lock);
4378 4658                          } else if (BLOCKS(lock, lock1)) {
4379 4659                                  cmn_err(CE_PANIC,
4380 4660                                      "active lock %p blocks %p",
4381 4661                                      (void *)lock, (void *)lock1);
4382 4662                          }
4383 4663                          }
4384 4664                  }
4385 4665          }
4386 4666  }
4387 4667  
4388 4668  /*
4389 4669   * Effect: This functions checks to see if the transition from 'old_state' to
4390 4670   *      'new_state' is a valid one.  It returns 0 if the transition is valid
4391 4671   *      and 1 if it is not.
4392 4672   *      For a map of valid transitions, see sys/flock_impl.h
4393 4673   */
4394 4674  static int
4395 4675  check_lock_transition(int old_state, int new_state)
4396 4676  {
4397 4677          switch (old_state) {
4398 4678          case FLK_INITIAL_STATE:
4399 4679                  if ((new_state == FLK_START_STATE) ||
4400 4680                      (new_state == FLK_SLEEPING_STATE) ||
4401 4681                      (new_state == FLK_ACTIVE_STATE) ||
4402 4682                      (new_state == FLK_DEAD_STATE)) {
4403 4683                          return (0);
4404 4684                  } else {
4405 4685                          return (1);
4406 4686                  }
4407 4687          case FLK_START_STATE:
4408 4688                  if ((new_state == FLK_ACTIVE_STATE) ||
4409 4689                      (new_state == FLK_DEAD_STATE)) {
4410 4690                          return (0);
4411 4691                  } else {
4412 4692                          return (1);
4413 4693                  }
4414 4694          case FLK_ACTIVE_STATE:
4415 4695                  if (new_state == FLK_DEAD_STATE) {
4416 4696                          return (0);
4417 4697                  } else {
4418 4698                          return (1);
4419 4699                  }
4420 4700          case FLK_SLEEPING_STATE:
4421 4701                  if ((new_state == FLK_GRANTED_STATE) ||
4422 4702                      (new_state == FLK_INTERRUPTED_STATE) ||
4423 4703                      (new_state == FLK_CANCELLED_STATE)) {
4424 4704                          return (0);
4425 4705                  } else {
4426 4706                          return (1);
4427 4707                  }
4428 4708          case FLK_GRANTED_STATE:
4429 4709                  if ((new_state == FLK_START_STATE) ||
4430 4710                      (new_state == FLK_INTERRUPTED_STATE) ||
4431 4711                      (new_state == FLK_CANCELLED_STATE)) {
4432 4712                          return (0);
4433 4713                  } else {
4434 4714                          return (1);
4435 4715                  }
4436 4716          case FLK_CANCELLED_STATE:
4437 4717                  if ((new_state == FLK_INTERRUPTED_STATE) ||
4438 4718                      (new_state == FLK_DEAD_STATE)) {
4439 4719                          return (0);
4440 4720                  } else {
4441 4721                          return (1);
4442 4722                  }
4443 4723          case FLK_INTERRUPTED_STATE:
4444 4724                  if (new_state == FLK_DEAD_STATE) {
4445 4725                          return (0);
4446 4726                  } else {
4447 4727                          return (1);
4448 4728                  }
4449 4729          case FLK_DEAD_STATE:
4450 4730                  /* May be set more than once */
4451 4731                  if (new_state == FLK_DEAD_STATE) {
4452 4732                          return (0);
4453 4733                  } else {
4454 4734                          return (1);
4455 4735                  }
4456 4736          default:
4457 4737                  return (1);
4458 4738          }
4459 4739  }
4460 4740  
4461 4741  static void
4462 4742  check_sleeping_locks(graph_t *gp)
4463 4743  {
4464 4744          lock_descriptor_t *lock1, *lock2;
4465 4745          edge_t *ep;
4466 4746          for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4467 4747              lock1 = lock1->l_next) {
4468 4748                                  ASSERT(!IS_BARRIER(lock1));
4469 4749          for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4470 4750              lock2 = lock2->l_next) {
4471 4751                  if (lock1->l_vnode == lock2->l_vnode) {
4472 4752                          if (BLOCKS(lock2, lock1)) {
4473 4753                                  ASSERT(!IS_GRANTED(lock1));
4474 4754                                  ASSERT(!NOT_BLOCKED(lock1));
4475 4755                                  path(lock1, lock2);
4476 4756                          }
4477 4757                  }
4478 4758          }
4479 4759  
4480 4760          for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4481 4761              lock2 = lock2->l_next) {
4482 4762                                  ASSERT(!IS_BARRIER(lock1));
4483 4763                  if (lock1->l_vnode == lock2->l_vnode) {
4484 4764                          if (BLOCKS(lock2, lock1)) {
4485 4765                                  ASSERT(!IS_GRANTED(lock1));
4486 4766                                  ASSERT(!NOT_BLOCKED(lock1));
4487 4767                                  path(lock1, lock2);
4488 4768                          }
4489 4769                  }
4490 4770          }
4491 4771          ep = FIRST_ADJ(lock1);
4492 4772          while (ep != HEAD(lock1)) {
4493 4773                  ASSERT(BLOCKS(ep->to_vertex, lock1));
4494 4774                  ep = NEXT_ADJ(ep);
4495 4775          }
4496 4776          }
4497 4777  }
4498 4778  
4499 4779  static int
4500 4780  level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4501 4781  {
4502 4782          edge_t  *ep;
4503 4783          lock_descriptor_t       *vertex;
4504 4784          lock_descriptor_t *vertex_stack;
4505 4785  
4506 4786          STACK_INIT(vertex_stack);
4507 4787  
4508 4788          flk_graph_uncolor(lock1->l_graph);
4509 4789          ep = FIRST_ADJ(lock1);
4510 4790          ASSERT(ep != HEAD(lock1));
4511 4791          while (ep != HEAD(lock1)) {
4512 4792                  if (no_path)
4513 4793                          ASSERT(ep->to_vertex != lock2);
4514 4794                  STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4515 4795                  COLOR(ep->to_vertex);
4516 4796                  ep = NEXT_ADJ(ep);
4517 4797          }
4518 4798  
4519 4799          while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4520 4800                  STACK_POP(vertex_stack, l_dstack);
4521 4801                  for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4522 4802                      ep = NEXT_ADJ(ep)) {
4523 4803                          if (COLORED(ep->to_vertex))
4524 4804                                  continue;
4525 4805                          COLOR(ep->to_vertex);
4526 4806                          if (ep->to_vertex == lock2)
4527 4807                                  return (1);
4528 4808  
4529 4809                          STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4530 4810                  }
4531 4811          }
4532 4812          return (0);
4533 4813  }
4534 4814  
4535 4815  static void
4536 4816  check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4537 4817  {
4538 4818          lock_descriptor_t *lock;
4539 4819  
4540 4820          /* Ignore OFD style locks since they're not process-wide. */
4541 4821          if (pid == 0)
4542 4822                  return;
4543 4823  
4544 4824          SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4545 4825  
4546 4826          if (lock) {
4547 4827                  while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4548 4828                          if (lock->l_flock.l_pid == pid &&
4549 4829                              lock->l_flock.l_sysid == sysid)
4550 4830                                  cmn_err(CE_PANIC,
4551 4831                                      "owner pid %d's lock %p in active queue",
4552 4832                                      pid, (void *)lock);
4553 4833                          lock = lock->l_next;
4554 4834                  }
4555 4835          }
4556 4836          SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4557 4837  
4558 4838          if (lock) {
4559 4839                  while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4560 4840                          if (lock->l_flock.l_pid == pid &&
4561 4841                              lock->l_flock.l_sysid == sysid)
4562 4842                                  cmn_err(CE_PANIC,
4563 4843                                      "owner pid %d's lock %p in sleep queue",
4564 4844                                      pid, (void *)lock);
4565 4845                          lock = lock->l_next;
4566 4846                  }
4567 4847          }
4568 4848  }
4569 4849  
4570 4850  static int
4571 4851  level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4572 4852  {
4573 4853          edge_t *ep = FIRST_ADJ(lock1);
4574 4854  
4575 4855          while (ep != HEAD(lock1)) {
4576 4856                  if (ep->to_vertex == lock2)
4577 4857                          return (1);
4578 4858                  else
4579 4859                          ep = NEXT_ADJ(ep);
4580 4860          }
4581 4861          return (0);
4582 4862  }
4583 4863  
4584 4864  static int
4585 4865  no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4586 4866  {
4587 4867          return (!level_two_path(lock1, lock2, 1));
4588 4868  }
4589 4869  
4590 4870  static void
4591 4871  path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4592 4872  {
4593 4873          if (level_one_path(lock1, lock2)) {
4594 4874                  if (level_two_path(lock1, lock2, 0) != 0) {
4595 4875                          cmn_err(CE_WARN,
4596 4876                              "one edge one path from lock1 %p lock2 %p",
4597 4877                              (void *)lock1, (void *)lock2);
4598 4878                  }
4599 4879          } else if (no_path(lock1, lock2)) {
4600 4880                  cmn_err(CE_PANIC,
4601 4881                      "No path from  lock1 %p to lock2 %p",
4602 4882                      (void *)lock1, (void *)lock2);
4603 4883          }
4604 4884  }
4605 4885  #endif /* DEBUG */

↓ open down ↓

412 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX