Print this page
NEX-3758 Support for remote stale lock detection
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
@@ -26,11 +26,11 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright 2015 Joyent, Inc.
*/
#include <sys/flock_impl.h>
#include <sys/vfs.h>
@@ -39,10 +39,14 @@
#include <sys/clconf.h>
#include <sys/cladm.h>
#include <sys/nbmlock.h>
#include <sys/cred.h>
#include <sys/policy.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+#include <sys/socket.h>
+#include <inet/ip.h>
/*
* The following four variables are for statistics purposes and they are
* not protected by locks. They may not be accurate but will at least be
* close to the actual value.
@@ -157,14 +161,36 @@
flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
};
zone_key_t flock_zone_key;
+/*
+ * Support for the remote stale lock detection
+ *
+ * The sysid_to_host_translator_lock readers/writer lock protects
+ * sysid_to_host_translator_list.
+ *
+ * The sysid_to_host_translator_list is a list of sysid to host name translator
+ * functions. The new translators are added using the public
+ * flk_add_sysid_to_host_translator() call.
+ *
+ * The stale_lock_timeout is in seconds and it determines the interval for the
+ * remote stale lock checking. When set to 0, the remote stale lock checking
+ * is disabled.
+ */
+struct sysid_to_host_translator_entry {
+ sysid_to_host_translator_t translator;
+ list_node_t node;
+};
+static krwlock_t sysid_to_host_translator_lock;
+static list_t sysid_to_host_translator_list;
+volatile int stale_lock_timeout = 3600; /* one hour, in seconds */
+
static void create_flock(lock_descriptor_t *, flock64_t *);
static lock_descriptor_t *flk_get_lock(void);
static void flk_free_lock(lock_descriptor_t *lock);
-static void flk_get_first_blocking_lock(lock_descriptor_t *request);
+static void flk_get_first_blocking_lock(lock_descriptor_t *);
static int flk_process_request(lock_descriptor_t *);
static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
static edge_t *flk_get_edge(void);
static int flk_wait_execute_request(lock_descriptor_t *);
static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
@@ -554,12 +580,11 @@
*/
if ((lckdat->l_type == F_UNLCK) ||
!((cmd & INOFLCK) || (cmd & SETFLCK))) {
lock_request = &stack_lock_request;
- (void) bzero((caddr_t)lock_request,
- sizeof (lock_descriptor_t));
+ bzero(lock_request, sizeof (lock_descriptor_t));
/*
* following is added to make the assertions in
* flk_execute_request() to pass through
*/
@@ -948,10 +973,18 @@
/* initialize all NLM states in array to NLM_UNKNOWN */
for (i = 0; i < nlm_status_size; i++) {
nlm_reg_status[i] = FLK_NLM_UNKNOWN;
}
}
+
+ mutex_init(&flock_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&nlm_reg_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ rw_init(&sysid_to_host_translator_lock, NULL, RW_DEFAULT, NULL);
+ list_create(&sysid_to_host_translator_list,
+ sizeof (struct sysid_to_host_translator_entry),
+ offsetof(struct sysid_to_host_translator_entry, node));
}
/*
* Zone constructor/destructor callbacks to be executed when a zone is
* created/destroyed.
@@ -1009,10 +1042,11 @@
void
flk_free_lock(lock_descriptor_t *lock)
{
file_t *fp;
+ ASSERT(lock->l_blocker >= 0);
ASSERT(IS_DEAD(lock));
if ((fp = lock->l_ofd) != NULL && fp->f_filock == (struct filock *)lock)
fp->f_filock = NULL;
@@ -1019,11 +1053,11 @@
if (IS_REFERENCED(lock)) {
lock->l_state |= DELETED_LOCK;
return;
}
flk_lock_frees++;
- kmem_free((void *)lock, sizeof (lock_descriptor_t));
+ kmem_free(lock, sizeof (lock_descriptor_t));
}
void
flk_set_state(lock_descriptor_t *lock, int new_state)
{
@@ -1059,10 +1093,268 @@
}
lock->l_status = new_state;
}
/*
+ * Support for the remote stale lock detection
+ */
+
+void
+flk_add_sysid_to_host_translator(sysid_to_host_translator_t tr)
+{
+ struct sysid_to_host_translator_entry *te;
+
+ te = kmem_alloc(sizeof (struct sysid_to_host_translator_entry),
+ KM_SLEEP);
+
+ te->translator = tr;
+
+ rw_enter(&sysid_to_host_translator_lock, RW_WRITER);
+ list_insert_head(&sysid_to_host_translator_list, te);
+ rw_exit(&sysid_to_host_translator_lock);
+}
+
+static void
+translate_sysid_to_host(zoneid_t zoneid, sysid_t sysid, char *host, size_t hlen,
+ const char **type)
+{
+ struct sockaddr sa;
+ struct sysid_to_host_translator_entry *te;
+
+ /* Some defaults in a case the translation will fail */
+ *type = "?";
+ (void) strlcpy(host, "?", hlen);
+
+ rw_enter(&sysid_to_host_translator_lock, RW_READER);
+
+ for (te = list_head(&sysid_to_host_translator_list); te != NULL;
+ te = list_next(&sysid_to_host_translator_list, te)) {
+
+ if (te->translator(zoneid, sysid, &sa, type) != 0) {
+ rw_exit(&sysid_to_host_translator_lock);
+
+ switch (sa.sa_family) {
+ case AF_INET:
+ (void) inet_ntop(AF_INET,
+ &((struct sockaddr_in *)&sa)->sin_addr,
+ host, hlen);
+ break;
+ case AF_INET6:
+ (void) inet_ntop(AF_INET6,
+ &((struct sockaddr_in6 *)&sa)->sin6_addr,
+ host, hlen);
+ break;
+ default:
+ break;
+ }
+
+ return;
+ }
+ }
+
+ rw_exit(&sysid_to_host_translator_lock);
+}
+
+static char *
+get_vnode_path(vnode_t *vp)
+{
+ size_t len;
+ char *ret;
+
+ mutex_enter(&vp->v_lock);
+ if (vp->v_path == NULL) {
+ mutex_exit(&vp->v_lock);
+ return (NULL);
+ }
+ len = strlen(vp->v_path) + 1;
+ mutex_exit(&vp->v_lock);
+
+ ret = kmem_alloc(len, KM_SLEEP);
+
+ mutex_enter(&vp->v_lock);
+ if (vp->v_path == NULL || strlen(vp->v_path) + 1 != len) {
+ mutex_exit(&vp->v_lock);
+ kmem_free(ret, len);
+ return (NULL);
+ }
+ bcopy(vp->v_path, ret, len);
+ mutex_exit(&vp->v_lock);
+
+ return (ret);
+}
+
+static void
+flk_stale_lock_check(lock_descriptor_t *lock)
+{
+ char *path;
+
+ char host[INET6_ADDRSTRLEN]; /* host name */
+ const char *type; /* host type */
+
+ /* temporary variables for the cmn_err() call */
+ char *p, *t; /* path, lock type */
+ pid_t pid; /* pid */
+ void *v; /* vnode */
+ u_offset_t s, e; /* start, end */
+
+ ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
+
+ /*
+ * Either not a remote lock, or the stale lock checking is disabled, or
+ * the lock is already reported.
+ */
+ if (IS_LOCAL(lock) || stale_lock_timeout == 0 || lock->l_blocker < 0)
+ return;
+
+ /* Seen first time? */
+ if (lock->l_blocker == 0) {
+ lock->l_blocker = gethrtime();
+ return;
+ }
+
+ /* Old enough? */
+ if ((gethrtime() - lock->l_blocker) / NANOSEC < stale_lock_timeout)
+ return;
+
+ translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
+ sizeof (host), &type);
+ path = get_vnode_path(lock->l_vnode);
+
+ pid = lock->l_flock.l_pid;
+ v = (void *)lock->l_vnode;
+ p = path == NULL ? "?" : path;
+ t = lock->l_type == F_WRLCK ? "WR" : "RD";
+ s = lock->l_start;
+ e = lock->l_end;
+
+ /* Report the blocker as stale */
+ cmn_err(CE_NOTE, "!Stale lock (host: %s (%s), pid: %d, vnode: %p, "
+ "path: %s, %sLCK: %llu:%llu)", host, type, pid, v, p, t, s, e);
+
+ if (path != NULL)
+ strfree(path);
+
+ /* Mark this blocker as reported */
+ lock->l_blocker = -lock->l_blocker;
+}
+
+static void
+flk_stale_lock_shrink(lock_descriptor_t *lock, lock_descriptor_t *new)
+{
+ char *path;
+
+ char host[INET6_ADDRSTRLEN]; /* host name */
+ const char *type; /* host type */
+
+ /* temporary variables for the cmn_err() call */
+ char *p, *t; /* path, lock type */
+ pid_t pid; /* pid */
+ void *v; /* vnode */
+ u_offset_t s, e; /* start, end */
+ u_offset_t ns, ne; /* new start, new end */
+
+ ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
+
+ translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
+ sizeof (host), &type);
+ path = get_vnode_path(lock->l_vnode);
+
+ pid = lock->l_flock.l_pid;
+ v = (void *)lock->l_vnode;
+ p = path == NULL ? "?" : path;
+ t = lock->l_type == F_WRLCK ? "WR" : "RD";
+ s = lock->l_start;
+ e = lock->l_end;
+ ns = new->l_start;
+ ne = new->l_end;
+
+ cmn_err(CE_NOTE, "!Stale lock SHRINK (host: %s (%s), pid: %d, "
+ "vnode: %p, path: %s, %sLCK: %llu:%llu -> %llu:%llu)", host, type,
+ pid, v, p, t, s, e, ns, ne);
+
+ if (path != NULL)
+ strfree(path);
+}
+
+static void
+flk_stale_lock_split(lock_descriptor_t *lock, lock_descriptor_t *new1,
+ lock_descriptor_t *new2)
+{
+ char *path;
+
+ char host[INET6_ADDRSTRLEN]; /* host name */
+ const char *type; /* host type */
+
+ /* temporary variables for the cmn_err() call */
+ char *p, *t; /* path, lock type */
+ pid_t pid; /* pid */
+ void *v; /* vnode */
+ u_offset_t s, e; /* start, end */
+ u_offset_t n1s, n1e; /* new1 start, new1 end */
+ u_offset_t n2s, n2e; /* new2 start, new2 end */
+
+ ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
+
+ translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
+ sizeof (host), &type);
+ path = get_vnode_path(lock->l_vnode);
+
+ pid = lock->l_flock.l_pid;
+ v = (void *)lock->l_vnode;
+ p = path == NULL ? "?" : path;
+ t = lock->l_type == F_WRLCK ? "WR" : "RD";
+ s = lock->l_start;
+ e = lock->l_end;
+ n1s = new1->l_start;
+ n1e = new1->l_end;
+ n2s = new2->l_start;
+ n2e = new2->l_end;
+
+ cmn_err(CE_NOTE, "!Stale lock SPLIT (host: %s (%s), pid: %d, "
+ "vnode: %p, path: %s, %sLCK: %llu:%llu -> %llu:%llu and %llu:%llu)",
+ host, type, pid, v, p, t, s, e, n1s, n1e, n2s, n2e);
+
+ if (path != NULL)
+ strfree(path);
+}
+
+static void
+flk_stale_lock_release(lock_descriptor_t *lock)
+{
+ char *path;
+
+ char host[INET6_ADDRSTRLEN]; /* host name */
+ const char *type; /* host type */
+
+ /* temporary variables for the cmn_err() call */
+ char *p, *t; /* path, lock type */
+ pid_t pid; /* pid */
+ void *v; /* vnode */
+ u_offset_t s, e; /* start, end */
+
+ ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
+
+ translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
+ sizeof (host), &type);
+ path = get_vnode_path(lock->l_vnode);
+
+ pid = lock->l_flock.l_pid;
+ v = (void *)lock->l_vnode;
+ p = path == NULL ? "?" : path;
+ t = lock->l_type == F_WRLCK ? "WR" : "RD";
+ s = lock->l_start;
+ e = lock->l_end;
+
+ cmn_err(CE_NOTE, "!Stale lock RELEASE (host: %s (%s), pid: %d, "
+ "vnode: %p, path: %s, %sLCK: %llu:%llu)", host, type, pid, v, p, t,
+ s, e);
+
+ if (path != NULL)
+ strfree(path);
+}
+
+/*
* Routine that checks whether there are any blocking locks in the system.
*
* The policy followed is if a write lock is sleeping we don't allow read
* locks before this write lock even though there may not be any active
* locks corresponding to the read locks' region.
@@ -1097,16 +1389,17 @@
* check active locks
*/
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
-
if (lock) {
do {
if (BLOCKS(lock, request)) {
- if (!request_will_wait)
+ if (!request_will_wait) {
+ flk_stale_lock_check(lock);
return (EAGAIN);
+ }
request_blocked_by_active = 1;
break;
}
/*
* Grant lock if it is for the same owner holding active
@@ -1122,10 +1415,11 @@
}
if (!request_blocked_by_active) {
lock_descriptor_t *lk[1];
lock_descriptor_t *first_glock = NULL;
+
/*
* Shall we grant this?! NO!!
* What about those locks that were just granted and still
* in sleep queue. Those threads are woken up and so locks
* are almost active.
@@ -1178,11 +1472,12 @@
}
lock = first_glock;
if (lock) {
do {
if (IS_GRANTED(lock)) {
- flk_recompute_dependencies(lock, lk, 1, 0);
+ flk_recompute_dependencies(lock, lk, 1,
+ 0);
}
lock = lock->l_prev;
} while ((lock->l_vnode == vp));
}
request->l_state &= ~RECOMPUTE_LOCK;
@@ -1240,16 +1535,17 @@
}
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
-/*
- * found_covering_lock == 2 iff at this point 'request' has paths
- * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
- * point 'request' has paths to all locks that blocks 'request' whose owners
- * are not same as the one that covers 'request' (covered_by above) and
- * we can have locks whose owner is same as covered_by in the active list.
+ /*
+ * found_covering_lock == 2 iff at this point 'request' has paths to
+ * all locks that blocks 'request'. found_covering_lock == 1 iff at
+ * this point 'request' has paths to all locks that blocks 'request'
+ * whose owners are not same as the one that covers 'request'
+ * (covered_by above) and we can have locks whose owner is same as
+ * covered_by in the active list.
*/
if (request_blocked_by_active && found_covering_lock != 2) {
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
ASSERT(lock != NULL);
@@ -1319,24 +1615,22 @@
if (IS_IO_LOCK(request))
return (0);
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
- if (lock == NULL && request->l_type == F_UNLCK)
- return (0);
- if (lock == NULL) {
- flk_insert_active_lock(request);
- return (0);
- }
-
+ if (lock != NULL) {
+ /*
+ * There are some active locks so check for relations
+ */
do {
lock1 = lock->l_next;
if (SAME_OWNER(request, lock)) {
done_searching = flk_relation(lock, request);
}
lock = lock1;
} while (lock->l_vnode == vp && !done_searching);
+ }
/*
* insert in active queue
*/
@@ -1597,14 +1891,15 @@
edge_frees++;
kmem_cache_free(flk_edge_cache, (void *)ep);
}
/*
- * Check the relationship of request with lock and perform the
- * recomputation of dependencies, break lock if required, and return
- * 1 if request cannot have any more relationship with the next
+ * Check the relationship of 'request' with 'lock' and perform the
+ * recomputation of dependencies, break 'lock' if required, and return
+ * 1 if 'request' cannot have any more relationship with the next
* active locks.
+ *
* The 'lock' and 'request' are compared and in case of overlap we
* delete the 'lock' and form new locks to represent the non-overlapped
* portion of original 'lock'. This function has side effects such as
* 'lock' will be freed, new locks will be added to the active list.
*/
@@ -1611,17 +1906,18 @@
static int
flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
{
int lock_effect;
- lock_descriptor_t *lock1, *lock2;
lock_descriptor_t *topology[3];
int nvertex = 0;
int i;
edge_t *ep;
- graph_t *gp = (lock->l_graph);
+ graph_t *gp = lock->l_graph;
+ boolean_t mergeable;
+ ASSERT(request->l_blocker == 0);
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
ASSERT(MUTEX_HELD(&gp->gp_mutex));
@@ -1637,147 +1933,104 @@
lock->l_type == F_RDLCK)
lock_effect = FLK_UPGRADE;
else
lock_effect = FLK_STAY_SAME;
+ /*
+ * The 'lock' and 'request' are merged only in a case the effect of
+ * both locks is same (FLK_STAY_SAME) and their blocker status
+ * (l_blocker) is same as well. We do not merge 'lock' and 'request'
+ * with different l_blocker values because such merge might affect the
+ * stale lock detection. It might cause either false positives, or
+ * miss some stale locks.
+ */
+ mergeable = lock_effect == FLK_STAY_SAME &&
+ lock->l_blocker == request->l_blocker;
+
if (lock->l_end < request->l_start) {
- if (lock->l_end == request->l_start - 1 &&
- lock_effect == FLK_STAY_SAME) {
- topology[0] = request;
+ /* If the 'lock' is just next to 'request', try to merge them */
+ if (lock->l_end == request->l_start - 1 && mergeable) {
request->l_start = lock->l_start;
- nvertex = 1;
goto recompute;
- } else {
+ }
+
+ /* Otherwise, they do not overlap, so return immediately */
return (0);
}
- }
- if (lock->l_start > request->l_end) {
- if (request->l_end == lock->l_start - 1 &&
- lock_effect == FLK_STAY_SAME) {
- topology[0] = request;
+ if (request->l_end < lock->l_start) {
+ /* If the 'request' is just next to 'lock', try to merge them */
+ if (request->l_end == lock->l_start - 1 && mergeable) {
request->l_end = lock->l_end;
- nvertex = 1;
goto recompute;
- } else {
+ }
+
+ /* Otherwise, they do not overlap, so return immediately */
return (1);
}
- }
- if (request->l_end < lock->l_end) {
- if (request->l_start > lock->l_start) {
- if (lock_effect == FLK_STAY_SAME) {
+ /*
+ * Here we are sure the 'lock' and 'request' overlaps, so the 'request'
+ * will replace the 'lock' (either fully, or at least partially).
+ */
+
+ /*
+ * If the 'request' does not fully cover the 'lock' at the start,
+ * either move the start of the 'request' to cover the 'lock', or split
+ * the 'lock'.
+ */
+ if (lock->l_start < request->l_start) {
+ if (mergeable) {
request->l_start = lock->l_start;
- request->l_end = lock->l_end;
- topology[0] = request;
- nvertex = 1;
} else {
- lock1 = flk_get_lock();
- lock2 = flk_get_lock();
- COPY(lock1, lock);
- COPY(lock2, lock);
- lock1->l_start = lock->l_start;
- lock1->l_end = request->l_start - 1;
- lock2->l_start = request->l_end + 1;
- lock2->l_end = lock->l_end;
- topology[0] = lock1;
- topology[1] = lock2;
- topology[2] = request;
- nvertex = 3;
+ lock_descriptor_t *new_lock = flk_get_lock();
+
+ COPY(new_lock, lock);
+ new_lock->l_end = request->l_start - 1;
+
+ topology[nvertex++] = new_lock;
}
- } else if (request->l_start < lock->l_start) {
- if (lock_effect == FLK_STAY_SAME) {
- request->l_end = lock->l_end;
- topology[0] = request;
- nvertex = 1;
- } else {
- lock1 = flk_get_lock();
- COPY(lock1, lock);
- lock1->l_start = request->l_end + 1;
- topology[0] = lock1;
- topology[1] = request;
- nvertex = 2;
}
- } else {
- if (lock_effect == FLK_STAY_SAME) {
- request->l_start = lock->l_start;
+
+ /*
+ * If the 'request' does not fully cover the 'lock' at the end, either
+ * move the end of the 'request' to cover the 'lock', or split the
+ * 'lock'.
+ */
+ if (request->l_end < lock->l_end) {
+ if (mergeable) {
request->l_end = lock->l_end;
- topology[0] = request;
- nvertex = 1;
} else {
- lock1 = flk_get_lock();
- COPY(lock1, lock);
- lock1->l_start = request->l_end + 1;
- topology[0] = lock1;
- topology[1] = request;
- nvertex = 2;
+ lock_descriptor_t *new_lock = flk_get_lock();
+
+ COPY(new_lock, lock);
+ new_lock->l_start = request->l_end + 1;
+
+ topology[nvertex++] = new_lock;
}
}
- } else if (request->l_end > lock->l_end) {
- if (request->l_start > lock->l_start) {
- if (lock_effect == FLK_STAY_SAME) {
- request->l_start = lock->l_start;
- topology[0] = request;
- nvertex = 1;
- } else {
- lock1 = flk_get_lock();
- COPY(lock1, lock);
- lock1->l_end = request->l_start - 1;
- topology[0] = lock1;
- topology[1] = request;
- nvertex = 2;
+
+ /*
+ * Log the blocker change
+ */
+ if (nvertex > 0 && lock->l_blocker < 0) {
+ if (nvertex == 1)
+ flk_stale_lock_shrink(lock, topology[0]);
+ if (nvertex == 2)
+ flk_stale_lock_split(lock, topology[0], topology[1]);
+
+ lock->l_blocker = 0;
}
- } else if (request->l_start < lock->l_start) {
- topology[0] = request;
- nvertex = 1;
- } else {
- topology[0] = request;
- nvertex = 1;
- }
- } else {
- if (request->l_start > lock->l_start) {
- if (lock_effect == FLK_STAY_SAME) {
- request->l_start = lock->l_start;
- topology[0] = request;
- nvertex = 1;
- } else {
- lock1 = flk_get_lock();
- COPY(lock1, lock);
- lock1->l_end = request->l_start - 1;
- topology[0] = lock1;
- topology[1] = request;
- nvertex = 2;
- }
- } else if (request->l_start < lock->l_start) {
- topology[0] = request;
- nvertex = 1;
- } else {
- if (lock_effect != FLK_UNLOCK) {
- topology[0] = request;
- nvertex = 1;
- } else {
- flk_delete_active_lock(lock, 0);
- flk_wakeup(lock, 1);
- flk_free_lock(lock);
- CHECK_SLEEPING_LOCKS(gp);
- CHECK_ACTIVE_LOCKS(gp);
- return (1);
- }
- }
- }
recompute:
-
/*
* For unlock we don't send the 'request' to for recomputing
* dependencies because no lock will add an edge to this.
*/
+ if (lock_effect != FLK_UNLOCK)
+ topology[nvertex++] = request;
- if (lock_effect == FLK_UNLOCK) {
- topology[nvertex-1] = NULL;
- nvertex--;
- }
for (i = 0; i < nvertex; i++) {
topology[i]->l_state |= RECOMPUTE_LOCK;
topology[i]->l_color = NO_COLOR;
}
@@ -1785,37 +2038,33 @@
/*
* we remove the adjacent edges for all vertices' to this vertex
* 'lock'.
*/
-
ep = FIRST_IN(lock);
while (ep != HEAD(lock)) {
ADJ_LIST_REMOVE(ep);
ep = NEXT_IN(ep);
}
flk_delete_active_lock(lock, 0);
/* We are ready for recomputing the dependencies now */
-
flk_recompute_dependencies(lock, topology, nvertex, 1);
for (i = 0; i < nvertex; i++) {
topology[i]->l_state &= ~RECOMPUTE_LOCK;
topology[i]->l_color = NO_COLOR;
}
-
if (lock_effect == FLK_UNLOCK) {
nvertex++;
}
for (i = 0; i < nvertex - 1; i++) {
flk_insert_active_lock(topology[i]);
}
-
if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
flk_wakeup(lock, 0);
} else {
ep = FIRST_IN(lock);
while (ep != HEAD(lock)) {
@@ -1890,10 +2139,16 @@
ASSERT(NOT_BLOCKED(lock));
ASSERT(IS_ACTIVE(lock));
ASSERT((vp->v_filocks != NULL));
+ if (lock->l_blocker < 0) {
+ /* Log the blocker release */
+ flk_stale_lock_release(lock);
+ lock->l_blocker = 0;
+ }
+
if (vp->v_filocks == (struct filock *)lock) {
vp->v_filocks = (struct filock *)
((lock->l_next->l_vnode == vp) ? lock->l_next :
NULL);
}
@@ -2058,11 +2313,11 @@
/*
* free the topology
*/
if (nvertex)
- kmem_free((void *)topology,
+ kmem_free(topology,
(nvertex * sizeof (lock_descriptor_t *)));
/*
* Possibility of some locks unblocked now
*/
@@ -2994,11 +3249,11 @@
int
chklock(struct vnode *vp, int iomode, u_offset_t offset, ssize_t len, int fmode,
caller_context_t *ct)
{
- register int i;
+ int i;
struct flock64 bf;
int error = 0;
bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
bf.l_whence = 0;
@@ -3325,11 +3580,11 @@
static void
flk_free_proc_edge(proc_edge_t *pep)
{
ASSERT(pep->refcount == 0);
- kmem_free((void *)pep, sizeof (proc_edge_t));
+ kmem_free(pep, sizeof (proc_edge_t));
flk_proc_edge_frees++;
}
/*
* Color the graph explicitly done only when the mark value hits max value.
@@ -4067,29 +4322,54 @@
}
mutex_exit(&gp->gp_mutex);
}
}
-
/*
* Wait until a lock is granted, cancelled, or interrupted.
*/
static void
wait_for_lock(lock_descriptor_t *request)
{
graph_t *gp = request->l_graph;
+ vnode_t *vp = request->l_vnode;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
!(IS_INTERRUPTED(request))) {
- if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
+ lock_descriptor_t *lock;
+
+ if (stale_lock_timeout == 0) {
+ /* The stale lock detection is disabled */
+ if (cv_wait_sig(&request->l_cv, &gp->gp_mutex) == 0) {
flk_set_state(request, FLK_INTERRUPTED_STATE);
request->l_state |= INTERRUPTED_LOCK;
}
+
+ continue;
}
+
+ SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
+
+ if (lock != NULL) {
+ do {
+ if (BLOCKS(lock, request)) {
+ flk_stale_lock_check(lock);
+ break;
+ }
+ lock = lock->l_next;
+ } while (lock->l_vnode == vp);
+ }
+
+ if (cv_timedwait_sig(&request->l_cv, &gp->gp_mutex,
+ ddi_get_lbolt() + SEC_TO_TICK(stale_lock_timeout)) == 0) {
+ flk_set_state(request, FLK_INTERRUPTED_STATE);
+ request->l_state |= INTERRUPTED_LOCK;
+ }
+ }
}
/*
* Create an flock structure from the existing lock information
*
@@ -4137,11 +4417,11 @@
case 1: /* SEEK_CUR */
*start = (u_offset_t)(flp->l_start + offset);
break;
case 2: /* SEEK_END */
vattr.va_mask = AT_SIZE;
- if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
+ if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
return (error);
*start = (u_offset_t)(flp->l_start + vattr.va_size);
break;
default:
return (EINVAL);
@@ -4170,19 +4450,19 @@
/*
* Check the validity of lock data. This can used by the NFS
* frlock routines to check data before contacting the server. The
* server must support semantics that aren't as restrictive as
* the UNIX API, so the NFS client is required to check.
- * The maximum is now passed in by the caller.
+ * The maximum is passed in by the caller.
*/
int
flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
{
/*
* The end (length) for local locking should never be greater
- * than MAXEND. However, the representation for
+ * than max. However, the representation for
* the entire file is MAX_U_OFFSET_T.
*/
if ((start > max) ||
((end > max) && (end != MAX_U_OFFSET_T))) {
return (EINVAL);