Print this page
NEX-3758 Support for remote stale lock detection
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/os/flock.c
+++ new/usr/src/uts/common/os/flock.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
|
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
21 21
22 22 /*
23 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30 /*
31 - * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
31 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
32 32 * Copyright 2015 Joyent, Inc.
33 33 */
34 34
35 35 #include <sys/flock_impl.h>
36 36 #include <sys/vfs.h>
37 37 #include <sys/t_lock.h> /* for <sys/callb.h> */
38 38 #include <sys/callb.h>
39 39 #include <sys/clconf.h>
40 40 #include <sys/cladm.h>
41 41 #include <sys/nbmlock.h>
42 42 #include <sys/cred.h>
43 43 #include <sys/policy.h>
44 +#include <sys/list.h>
45 +#include <sys/sysmacros.h>
46 +#include <sys/socket.h>
47 +#include <inet/ip.h>
44 48
45 49 /*
46 50 * The following four variables are for statistics purposes and they are
47 51 * not protected by locks. They may not be accurate but will at least be
48 52 * close to the actual value.
49 53 */
50 54
51 55 int flk_lock_allocs;
52 56 int flk_lock_frees;
53 57 int edge_allocs;
54 58 int edge_frees;
55 59 int flk_proc_vertex_allocs;
56 60 int flk_proc_edge_allocs;
57 61 int flk_proc_vertex_frees;
58 62 int flk_proc_edge_frees;
59 63
60 64 static kmutex_t flock_lock;
61 65
62 66 #ifdef DEBUG
63 67 int check_debug = 0;
64 68 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \
65 69 check_active_locks(gp);
66 70 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \
67 71 check_sleeping_locks(gp);
68 72 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \
69 73 if (check_debug) \
70 74 check_owner_locks(gp, pid, sysid, vp);
71 75 #define CHECK_LOCK_TRANSITION(old_state, new_state) \
72 76 { \
73 77 if (check_lock_transition(old_state, new_state)) { \
74 78 cmn_err(CE_PANIC, "Illegal lock transition \
75 79 from %d to %d", old_state, new_state); \
76 80 } \
77 81 }
78 82 #else
79 83
80 84 #define CHECK_ACTIVE_LOCKS(gp)
81 85 #define CHECK_SLEEPING_LOCKS(gp)
82 86 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
83 87 #define CHECK_LOCK_TRANSITION(old_state, new_state)
84 88
85 89 #endif /* DEBUG */
86 90
87 91 struct kmem_cache *flk_edge_cache;
88 92
89 93 graph_t *lock_graph[HASH_SIZE];
90 94 proc_graph_t pgraph;
91 95
92 96 /*
93 97 * Clustering.
94 98 *
95 99 * NLM REGISTRY TYPE IMPLEMENTATION
96 100 *
97 101 * Assumptions:
98 102 * 1. Nodes in a cluster are numbered starting at 1; always non-negative
99 103 * integers; maximum node id is returned by clconf_maximum_nodeid().
100 104 * 2. We use this node id to identify the node an NLM server runs on.
101 105 */
102 106
103 107 /*
104 108 * NLM registry object keeps track of NLM servers via their
105 109 * nlmids (which are the node ids of the node in the cluster they run on)
106 110 * that have requested locks at this LLM with which this registry is
107 111 * associated.
108 112 *
109 113 * Representation of abstraction:
110 114 * rep = record[ states: array[nlm_state],
111 115 * lock: mutex]
112 116 *
113 117 * Representation invariants:
114 118 * 1. index i of rep.states is between 0 and n - 1 where n is number
115 119 * of elements in the array, which happen to be the maximum number
116 120 * of nodes in the cluster configuration + 1.
117 121 * 2. map nlmid to index i of rep.states
118 122 * 0 -> 0
119 123 * 1 -> 1
120 124 * 2 -> 2
121 125 * n-1 -> clconf_maximum_nodeid()+1
122 126 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting
123 127 * from forgetting to subtract 1 from the index.
124 128 * 4. The reason we keep the 0th index is the following. A legitimate
125 129 * cluster configuration includes making a UFS file system NFS
126 130 * exportable. The code is structured so that if you're in a cluster
127 131 * you do one thing; otherwise, you do something else. The problem
128 132 * is what to do if you think you're in a cluster with PXFS loaded,
129 133 * but you're using UFS not PXFS? The upper two bytes of the sysid
130 134 * encode the node id of the node where NLM server runs; these bytes
131 135 * are zero for UFS. Since the nodeid is used to index into the
132 136 * registry, we can record the NLM server state information at index
133 137 * 0 using the same mechanism used for PXFS file locks!
134 138 */
135 139 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
136 140 static kmutex_t nlm_reg_lock; /* lock to protect arrary */
137 141 static uint_t nlm_status_size; /* size of state array */
138 142
139 143 /*
140 144 * Although we need a global lock dependency graph (and associated data
141 145 * structures), we also need a per-zone notion of whether the lock manager is
142 146 * running, and so whether to allow lock manager requests or not.
143 147 *
144 148 * Thus, on a per-zone basis we maintain a ``global'' variable
145 149 * (flk_lockmgr_status), protected by flock_lock, and set when the lock
146 150 * manager is determined to be changing state (starting or stopping).
147 151 *
148 152 * Each graph/zone pair also has a copy of this variable, which is protected by
149 153 * the graph's mutex.
150 154 *
151 155 * The per-graph copies are used to synchronize lock requests with shutdown
|
↓ open down ↓ |
98 lines elided |
↑ open up ↑ |
152 156 * requests. The global copy is used to initialize the per-graph field when a
153 157 * new graph is created.
154 158 */
155 159 struct flock_globals {
156 160 flk_lockmgr_status_t flk_lockmgr_status;
157 161 flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
158 162 };
159 163
160 164 zone_key_t flock_zone_key;
161 165
166 +/*
167 + * Support for the remote stale lock detection
168 + *
169 + * The sysid_to_host_translator_lock readers/writer lock protects
170 + * sysid_to_host_translator_list.
171 + *
172 + * The sysid_to_host_translator_list is a list of sysid to host name translator
173 + * functions. The new translators are added using the public
174 + * flk_add_sysid_to_host_translator() call.
175 + *
176 + * The stale_lock_timeout is in seconds and it determines the interval for the
177 + * remote stale lock checking. When set to 0, the remote stale lock checking
178 + * is disabled.
179 + */
180 +struct sysid_to_host_translator_entry {
181 + sysid_to_host_translator_t translator;
182 + list_node_t node;
183 +};
184 +static krwlock_t sysid_to_host_translator_lock;
185 +static list_t sysid_to_host_translator_list;
186 +volatile int stale_lock_timeout = 3600; /* one hour, in seconds */
187 +
162 188 static void create_flock(lock_descriptor_t *, flock64_t *);
163 189 static lock_descriptor_t *flk_get_lock(void);
164 190 static void flk_free_lock(lock_descriptor_t *lock);
165 -static void flk_get_first_blocking_lock(lock_descriptor_t *request);
191 +static void flk_get_first_blocking_lock(lock_descriptor_t *);
166 192 static int flk_process_request(lock_descriptor_t *);
167 193 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
168 194 static edge_t *flk_get_edge(void);
169 195 static int flk_wait_execute_request(lock_descriptor_t *);
170 196 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
171 197 static void flk_insert_active_lock(lock_descriptor_t *);
172 198 static void flk_delete_active_lock(lock_descriptor_t *, int);
173 199 static void flk_insert_sleeping_lock(lock_descriptor_t *);
174 200 static void flk_graph_uncolor(graph_t *);
175 201 static void flk_wakeup(lock_descriptor_t *, int);
176 202 static void flk_free_edge(edge_t *);
177 203 static void flk_recompute_dependencies(lock_descriptor_t *,
178 204 lock_descriptor_t **, int, int);
179 205 static int flk_find_barriers(lock_descriptor_t *);
180 206 static void flk_update_barriers(lock_descriptor_t *);
181 207 static int flk_color_reachables(lock_descriptor_t *);
182 208 static int flk_canceled(lock_descriptor_t *);
183 209 static void flk_delete_locks_by_sysid(lock_descriptor_t *);
184 210 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
185 211 static void wait_for_lock(lock_descriptor_t *);
186 212 static void unlock_lockmgr_granted(struct flock_globals *);
187 213 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
188 214
189 215 /* Clustering hooks */
190 216 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
191 217 static void cl_flk_wakeup_sleeping_nlm_locks(int);
192 218 static void cl_flk_unlock_nlm_granted(int);
193 219
194 220 #ifdef DEBUG
195 221 static int check_lock_transition(int, int);
196 222 static void check_sleeping_locks(graph_t *);
197 223 static void check_active_locks(graph_t *);
198 224 static int no_path(lock_descriptor_t *, lock_descriptor_t *);
199 225 static void path(lock_descriptor_t *, lock_descriptor_t *);
200 226 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
201 227 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
202 228 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
203 229 #endif
204 230
205 231 /* proc_graph function definitions */
206 232 static int flk_check_deadlock(lock_descriptor_t *);
207 233 static void flk_proc_graph_uncolor(void);
208 234 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
209 235 static proc_edge_t *flk_get_proc_edge(void);
210 236 static void flk_proc_release(proc_vertex_t *);
211 237 static void flk_free_proc_edge(proc_edge_t *);
212 238 static void flk_update_proc_graph(edge_t *, int);
213 239
214 240 /* Non-blocking mandatory locking */
215 241 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
216 242 u_offset_t);
217 243
218 244 static struct flock_globals *
219 245 flk_get_globals(void)
220 246 {
221 247 /*
222 248 * The KLM module had better be loaded if we're attempting to handle
223 249 * lockmgr requests.
224 250 */
225 251 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
226 252 return (zone_getspecific(flock_zone_key, curproc->p_zone));
227 253 }
228 254
229 255 static flk_lockmgr_status_t
230 256 flk_get_lockmgr_status(void)
231 257 {
232 258 struct flock_globals *fg;
233 259
234 260 ASSERT(MUTEX_HELD(&flock_lock));
235 261
236 262 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
237 263 /*
238 264 * KLM module not loaded; lock manager definitely not running.
239 265 */
240 266 return (FLK_LOCKMGR_DOWN);
241 267 }
242 268 fg = flk_get_globals();
243 269 return (fg->flk_lockmgr_status);
244 270 }
245 271
246 272 /*
247 273 * This implements Open File Description (not descriptor) style record locking.
248 274 * These locks can also be thought of as pid-less since they are not tied to a
249 275 * specific process, thus they're preserved across fork.
250 276 *
251 277 * Called directly from fcntl.
252 278 *
253 279 * See reclock() for the implementation of the traditional POSIX style record
254 280 * locking scheme (pid-ful). This function is derived from reclock() but
255 281 * simplified and modified to work for OFD style locking.
256 282 *
257 283 * The two primary advantages of OFD style of locking are:
258 284 * 1) It is per-file description, so closing a file descriptor that refers to a
259 285 * different file description for the same file will not drop the lock (i.e.
260 286 * two open's of the same file get different descriptions but a dup or fork
261 287 * will refer to the same description).
262 288 * 2) Locks are preserved across fork(2).
263 289 *
264 290 * Because these locks are per-description a lock ptr lives at the f_filocks
265 291 * member of the file_t and the lock_descriptor includes a file_t pointer
266 292 * to enable unique lock identification and management.
267 293 *
268 294 * Since these locks are pid-less we cannot do deadlock detection with the
269 295 * current process-oriented implementation. This is consistent with OFD locking
270 296 * behavior on other operating systems such as Linux. Since we don't do
271 297 * deadlock detection we never interact with the process graph that is
272 298 * maintained for deadlock detection on the traditional POSIX-style locks.
273 299 *
274 300 * Future Work:
275 301 *
276 302 * The current implementation does not support record locks. That is,
277 303 * currently the single lock must cover the entire file. This is validated in
278 304 * fcntl. To support record locks the f_filock pointer in the file_t needs to
279 305 * be changed to a list of pointers to the locks. That list needs to be
280 306 * managed independently of the lock list on the vnode itself and it needs to
281 307 * be maintained as record locks are created, split, coalesced and deleted.
282 308 *
283 309 * The current implementation does not support remote file systems (e.g.
284 310 * NFS or CIFS). This is handled in fs_frlock(). The design of how OFD locks
285 311 * interact with the NLM is not clear since the NLM protocol/implementation
286 312 * appears to be oriented around locks associated with a process. A further
287 313 * problem is that a design is needed for what nlm_send_siglost() should do and
288 314 * where it will send SIGLOST. More recent versions of Linux apparently try to
289 315 * emulate OFD locks on NFS by converting them to traditional POSIX style locks
290 316 * that work with the NLM. It is not clear that this provides the correct
291 317 * semantics in all cases.
292 318 */
293 319 int
294 320 ofdlock(file_t *fp, int fcmd, flock64_t *lckdat, int flag, u_offset_t offset)
295 321 {
296 322 int cmd = 0;
297 323 vnode_t *vp;
298 324 lock_descriptor_t stack_lock_request;
299 325 lock_descriptor_t *lock_request;
300 326 int error = 0;
301 327 graph_t *gp;
302 328 int serialize = 0;
303 329
304 330 if (fcmd != F_OFD_GETLK)
305 331 cmd = SETFLCK;
306 332
307 333 if (fcmd == F_OFD_SETLKW || fcmd == F_FLOCKW)
308 334 cmd |= SLPFLCK;
309 335
310 336 /* see block comment */
311 337 VERIFY(lckdat->l_whence == 0);
312 338 VERIFY(lckdat->l_start == 0);
313 339 VERIFY(lckdat->l_len == 0);
314 340
315 341 vp = fp->f_vnode;
316 342
317 343 /*
318 344 * For reclock fs_frlock() would normally have set these in a few
319 345 * places but for us it's cleaner to centralize it here. Note that
320 346 * IGN_PID is -1. We use 0 for our pid-less locks.
321 347 */
322 348 lckdat->l_pid = 0;
323 349 lckdat->l_sysid = 0;
324 350
325 351 /*
326 352 * Check access permissions
327 353 */
328 354 if ((fcmd == F_OFD_SETLK || fcmd == F_OFD_SETLKW) &&
329 355 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
330 356 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
331 357 return (EBADF);
332 358
333 359 /*
334 360 * for query and unlock we use the stack_lock_request
335 361 */
336 362 if (lckdat->l_type == F_UNLCK || !(cmd & SETFLCK)) {
337 363 lock_request = &stack_lock_request;
338 364 (void) bzero((caddr_t)lock_request,
339 365 sizeof (lock_descriptor_t));
340 366
341 367 /*
342 368 * following is added to make the assertions in
343 369 * flk_execute_request() pass
344 370 */
345 371 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
346 372 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
347 373 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
348 374 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
349 375 lock_request->l_status = FLK_INITIAL_STATE;
350 376 } else {
351 377 lock_request = flk_get_lock();
352 378 fp->f_filock = (struct filock *)lock_request;
353 379 }
354 380 lock_request->l_state = 0;
355 381 lock_request->l_vnode = vp;
356 382 lock_request->l_zoneid = getzoneid();
357 383 lock_request->l_ofd = fp;
358 384
359 385 /*
360 386 * Convert the request range into the canonical start and end
361 387 * values then check the validity of the lock range.
362 388 */
363 389 error = flk_convert_lock_data(vp, lckdat, &lock_request->l_start,
364 390 &lock_request->l_end, offset);
365 391 if (error)
366 392 goto done;
367 393
368 394 error = flk_check_lock_data(lock_request->l_start, lock_request->l_end,
369 395 MAXEND);
370 396 if (error)
371 397 goto done;
372 398
373 399 ASSERT(lock_request->l_end >= lock_request->l_start);
374 400
375 401 lock_request->l_type = lckdat->l_type;
376 402 if (cmd & SLPFLCK)
377 403 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
378 404
379 405 if (!(cmd & SETFLCK)) {
380 406 if (lock_request->l_type == F_RDLCK ||
381 407 lock_request->l_type == F_WRLCK)
382 408 lock_request->l_state |= QUERY_LOCK;
383 409 }
384 410 lock_request->l_flock = (*lckdat);
385 411
386 412 /*
387 413 * We are ready for processing the request
388 414 */
389 415
390 416 if (fcmd != F_OFD_GETLK && lock_request->l_type != F_UNLCK &&
391 417 nbl_need_check(vp)) {
392 418 nbl_start_crit(vp, RW_WRITER);
393 419 serialize = 1;
394 420 }
395 421
396 422 /* Get the lock graph for a particular vnode */
397 423 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
398 424
399 425 mutex_enter(&gp->gp_mutex);
400 426
401 427 lock_request->l_state |= REFERENCED_LOCK;
402 428 lock_request->l_graph = gp;
403 429
404 430 switch (lock_request->l_type) {
405 431 case F_RDLCK:
406 432 case F_WRLCK:
407 433 if (IS_QUERY_LOCK(lock_request)) {
408 434 flk_get_first_blocking_lock(lock_request);
409 435 if (lock_request->l_ofd != NULL)
410 436 lock_request->l_flock.l_pid = -1;
411 437 (*lckdat) = lock_request->l_flock;
412 438 } else {
413 439 /* process the request now */
414 440 error = flk_process_request(lock_request);
415 441 }
416 442 break;
417 443
418 444 case F_UNLCK:
419 445 /* unlock request will not block so execute it immediately */
420 446 error = flk_execute_request(lock_request);
421 447 break;
422 448
423 449 default:
424 450 error = EINVAL;
425 451 break;
426 452 }
427 453
428 454 if (lock_request == &stack_lock_request) {
429 455 flk_set_state(lock_request, FLK_DEAD_STATE);
430 456 } else {
431 457 lock_request->l_state &= ~REFERENCED_LOCK;
432 458 if ((error != 0) || IS_DELETED(lock_request)) {
433 459 flk_set_state(lock_request, FLK_DEAD_STATE);
434 460 flk_free_lock(lock_request);
435 461 }
436 462 }
437 463
438 464 mutex_exit(&gp->gp_mutex);
439 465 if (serialize)
440 466 nbl_end_crit(vp);
441 467
442 468 return (error);
443 469
444 470 done:
445 471 flk_set_state(lock_request, FLK_DEAD_STATE);
446 472 if (lock_request != &stack_lock_request)
447 473 flk_free_lock(lock_request);
448 474 return (error);
449 475 }
450 476
451 477 /*
452 478 * Remove any lock on the vnode belonging to the given file_t.
453 479 * Called from closef on last close, file_t is locked.
454 480 *
455 481 * This is modeled on the cleanlocks() function but only removes the single
456 482 * lock associated with fp.
457 483 */
458 484 void
459 485 ofdcleanlock(file_t *fp)
460 486 {
461 487 lock_descriptor_t *fplock, *lock, *nlock;
462 488 vnode_t *vp;
463 489 graph_t *gp;
464 490
465 491 ASSERT(MUTEX_HELD(&fp->f_tlock));
466 492
467 493 if ((fplock = (lock_descriptor_t *)fp->f_filock) == NULL)
468 494 return;
469 495
470 496 fp->f_filock = NULL;
471 497 vp = fp->f_vnode;
472 498
473 499 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
474 500
475 501 if (gp == NULL)
476 502 return;
477 503 mutex_enter(&gp->gp_mutex);
478 504
479 505 CHECK_SLEEPING_LOCKS(gp);
480 506 CHECK_ACTIVE_LOCKS(gp);
481 507
482 508 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
483 509
484 510 if (lock) {
485 511 do {
486 512 nlock = lock->l_next;
487 513 if (fplock == lock) {
488 514 CANCEL_WAKEUP(lock);
489 515 break;
490 516 }
491 517 lock = nlock;
492 518 } while (lock->l_vnode == vp);
493 519 }
494 520
495 521 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
496 522
497 523 if (lock) {
498 524 do {
499 525 nlock = lock->l_next;
500 526 if (fplock == lock) {
501 527 flk_delete_active_lock(lock, 0);
502 528 flk_wakeup(lock, 1);
503 529 flk_free_lock(lock);
504 530 break;
505 531 }
506 532 lock = nlock;
507 533 } while (lock->l_vnode == vp);
508 534 }
509 535
510 536 CHECK_SLEEPING_LOCKS(gp);
511 537 CHECK_ACTIVE_LOCKS(gp);
512 538 mutex_exit(&gp->gp_mutex);
513 539 }
514 540
515 541 /*
516 542 * Routine called from fs_frlock in fs/fs_subr.c
517 543 *
518 544 * This implements traditional POSIX style record locking. The two primary
519 545 * drawbacks to this style of locking are:
520 546 * 1) It is per-process, so any close of a file descriptor that refers to the
521 547 * file will drop the lock (e.g. lock /etc/passwd, call a library function
522 548 * which opens /etc/passwd to read the file, when the library closes it's
523 549 * file descriptor the application loses its lock and does not know).
524 550 * 2) Locks are not preserved across fork(2).
525 551 *
526 552 * Because these locks are only associated with a PID, they are per-process.
527 553 * This is why any close will drop the lock and is also why, once the process
528 554 * forks, the lock is no longer related to the new process. These locks can
529 555 * be considered as PID-ful.
530 556 *
531 557 * See ofdlock() for the implementation of a similar but improved locking
532 558 * scheme.
533 559 */
534 560 int
535 561 reclock(vnode_t *vp, flock64_t *lckdat, int cmd, int flag, u_offset_t offset,
536 562 flk_callback_t *flk_cbp)
537 563 {
538 564 lock_descriptor_t stack_lock_request;
539 565 lock_descriptor_t *lock_request;
540 566 int error = 0;
541 567 graph_t *gp;
542 568 int nlmid;
543 569
544 570 /*
545 571 * Check access permissions
546 572 */
547 573 if ((cmd & SETFLCK) &&
548 574 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
|
↓ open down ↓ |
373 lines elided |
↑ open up ↑ |
549 575 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
550 576 return (EBADF);
551 577
552 578 /*
553 579 * for query and unlock we use the stack_lock_request
554 580 */
555 581
556 582 if ((lckdat->l_type == F_UNLCK) ||
557 583 !((cmd & INOFLCK) || (cmd & SETFLCK))) {
558 584 lock_request = &stack_lock_request;
559 - (void) bzero((caddr_t)lock_request,
560 - sizeof (lock_descriptor_t));
585 + bzero(lock_request, sizeof (lock_descriptor_t));
561 586
562 587 /*
563 588 * following is added to make the assertions in
564 589 * flk_execute_request() to pass through
565 590 */
566 591
567 592 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
568 593 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
569 594 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
570 595 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
571 596 lock_request->l_status = FLK_INITIAL_STATE;
572 597 } else {
573 598 lock_request = flk_get_lock();
574 599 }
575 600 lock_request->l_state = 0;
576 601 lock_request->l_vnode = vp;
577 602 lock_request->l_zoneid = getzoneid();
578 603
579 604 /*
580 605 * Convert the request range into the canonical start and end
581 606 * values. The NLM protocol supports locking over the entire
582 607 * 32-bit range, so there's no range checking for remote requests,
583 608 * but we still need to verify that local requests obey the rules.
584 609 */
585 610 /* Clustering */
586 611 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
587 612 ASSERT(lckdat->l_whence == 0);
588 613 lock_request->l_start = lckdat->l_start;
589 614 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
590 615 lckdat->l_start + (lckdat->l_len - 1);
591 616 } else {
592 617 /* check the validity of the lock range */
593 618 error = flk_convert_lock_data(vp, lckdat,
594 619 &lock_request->l_start, &lock_request->l_end,
595 620 offset);
596 621 if (error) {
597 622 goto done;
598 623 }
599 624 error = flk_check_lock_data(lock_request->l_start,
600 625 lock_request->l_end, MAXEND);
601 626 if (error) {
602 627 goto done;
603 628 }
604 629 }
605 630
606 631 ASSERT(lock_request->l_end >= lock_request->l_start);
607 632
608 633 lock_request->l_type = lckdat->l_type;
609 634 if (cmd & INOFLCK)
610 635 lock_request->l_state |= IO_LOCK;
611 636 if (cmd & SLPFLCK)
612 637 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
613 638 if (cmd & RCMDLCK)
614 639 lock_request->l_state |= LOCKMGR_LOCK;
615 640 if (cmd & NBMLCK)
616 641 lock_request->l_state |= NBMAND_LOCK;
617 642 /*
618 643 * Clustering: set flag for PXFS locks
619 644 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
620 645 * also be of type 'RCMDLCK'.
621 646 * We do not _only_ check the GETPXFSID() macro because local PXFS
622 647 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
623 648 */
624 649
625 650 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
626 651 lock_request->l_state |= PXFS_LOCK;
627 652 }
628 653 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
629 654 if (lock_request->l_type == F_RDLCK ||
630 655 lock_request->l_type == F_WRLCK)
631 656 lock_request->l_state |= QUERY_LOCK;
632 657 }
633 658 lock_request->l_flock = (*lckdat);
634 659 lock_request->l_callbacks = flk_cbp;
635 660
636 661 /*
637 662 * We are ready for processing the request
638 663 */
639 664 if (IS_LOCKMGR(lock_request)) {
640 665 /*
641 666 * If the lock request is an NLM server request ....
642 667 */
643 668 if (nlm_status_size == 0) { /* not booted as cluster */
644 669 mutex_enter(&flock_lock);
645 670 /*
646 671 * Bail out if this is a lock manager request and the
647 672 * lock manager is not supposed to be running.
648 673 */
649 674 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
650 675 mutex_exit(&flock_lock);
651 676 error = ENOLCK;
652 677 goto done;
653 678 }
654 679 mutex_exit(&flock_lock);
655 680 } else { /* booted as a cluster */
656 681 nlmid = GETNLMID(lock_request->l_flock.l_sysid);
657 682 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
658 683
659 684 mutex_enter(&nlm_reg_lock);
660 685 /*
661 686 * If the NLM registry does not know about this
662 687 * NLM server making the request, add its nlmid
663 688 * to the registry.
664 689 */
665 690 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
666 691 nlmid)) {
667 692 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
668 693 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
669 694 nlmid)) {
670 695 /*
671 696 * If the NLM server is already known (has made
672 697 * previous lock requests) and its state is
673 698 * not NLM_UP (means that NLM server is
674 699 * shutting down), then bail out with an
675 700 * error to deny the lock request.
676 701 */
677 702 mutex_exit(&nlm_reg_lock);
678 703 error = ENOLCK;
679 704 goto done;
680 705 }
681 706 mutex_exit(&nlm_reg_lock);
682 707 }
683 708 }
684 709
685 710 /* Now get the lock graph for a particular vnode */
686 711 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
687 712
688 713 /*
689 714 * We drop rwlock here otherwise this might end up causing a
690 715 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
691 716 */
692 717
693 718 if (IS_IO_LOCK(lock_request)) {
694 719 VOP_RWUNLOCK(vp,
695 720 (lock_request->l_type == F_RDLCK) ?
696 721 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
697 722 }
698 723 mutex_enter(&gp->gp_mutex);
699 724
700 725 lock_request->l_state |= REFERENCED_LOCK;
701 726 lock_request->l_graph = gp;
702 727
703 728 switch (lock_request->l_type) {
704 729 case F_RDLCK:
705 730 case F_WRLCK:
706 731 if (IS_QUERY_LOCK(lock_request)) {
707 732 flk_get_first_blocking_lock(lock_request);
708 733 if (lock_request->l_ofd != NULL)
709 734 lock_request->l_flock.l_pid = -1;
710 735 (*lckdat) = lock_request->l_flock;
711 736 break;
712 737 }
713 738
714 739 /* process the request now */
715 740
716 741 error = flk_process_request(lock_request);
717 742 break;
718 743
719 744 case F_UNLCK:
720 745 /* unlock request will not block so execute it immediately */
721 746
722 747 if (IS_LOCKMGR(lock_request) &&
723 748 flk_canceled(lock_request)) {
724 749 error = 0;
725 750 } else {
726 751 error = flk_execute_request(lock_request);
727 752 }
728 753 break;
729 754
730 755 case F_UNLKSYS:
731 756 /*
732 757 * Recovery mechanism to release lock manager locks when
733 758 * NFS client crashes and restart. NFS server will clear
734 759 * old locks and grant new locks.
735 760 */
736 761
737 762 if (lock_request->l_flock.l_sysid == 0) {
738 763 mutex_exit(&gp->gp_mutex);
739 764 return (EINVAL);
740 765 }
741 766 if (secpolicy_nfs(CRED()) != 0) {
742 767 mutex_exit(&gp->gp_mutex);
743 768 return (EPERM);
744 769 }
745 770 flk_delete_locks_by_sysid(lock_request);
746 771 lock_request->l_state &= ~REFERENCED_LOCK;
747 772 flk_set_state(lock_request, FLK_DEAD_STATE);
748 773 flk_free_lock(lock_request);
749 774 mutex_exit(&gp->gp_mutex);
750 775 return (0);
751 776
752 777 default:
753 778 error = EINVAL;
754 779 break;
755 780 }
756 781
757 782 /* Clustering: For blocked PXFS locks, return */
758 783 if (error == PXFS_LOCK_BLOCKED) {
759 784 lock_request->l_state &= ~REFERENCED_LOCK;
760 785 mutex_exit(&gp->gp_mutex);
761 786 return (error);
762 787 }
763 788
764 789 /*
765 790 * Now that we have seen the status of locks in the system for
766 791 * this vnode we acquire the rwlock if it is an IO_LOCK.
767 792 */
768 793
769 794 if (IS_IO_LOCK(lock_request)) {
770 795 (void) VOP_RWLOCK(vp,
771 796 (lock_request->l_type == F_RDLCK) ?
772 797 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
773 798 if (!error) {
774 799 lckdat->l_type = F_UNLCK;
775 800
776 801 /*
777 802 * This wake up is needed otherwise
778 803 * if IO_LOCK has slept the dependents on this
779 804 * will not be woken up at all. (bugid # 1185482).
780 805 */
781 806
782 807 flk_wakeup(lock_request, 1);
783 808 flk_set_state(lock_request, FLK_DEAD_STATE);
784 809 flk_free_lock(lock_request);
785 810 }
786 811 /*
787 812 * else if error had occurred either flk_process_request()
788 813 * has returned EDEADLK in which case there will be no
789 814 * dependents for this lock or EINTR from flk_wait_execute_
790 815 * request() in which case flk_cancel_sleeping_lock()
791 816 * would have been done. same is true with EBADF.
792 817 */
793 818 }
794 819
795 820 if (lock_request == &stack_lock_request) {
796 821 flk_set_state(lock_request, FLK_DEAD_STATE);
797 822 } else {
798 823 lock_request->l_state &= ~REFERENCED_LOCK;
799 824 if ((error != 0) || IS_DELETED(lock_request)) {
800 825 flk_set_state(lock_request, FLK_DEAD_STATE);
801 826 flk_free_lock(lock_request);
802 827 }
803 828 }
804 829
805 830 mutex_exit(&gp->gp_mutex);
806 831 return (error);
807 832
808 833 done:
809 834 flk_set_state(lock_request, FLK_DEAD_STATE);
810 835 if (lock_request != &stack_lock_request)
811 836 flk_free_lock(lock_request);
812 837 return (error);
813 838 }
814 839
815 840 /*
816 841 * Invoke the callbacks in the given list. If before sleeping, invoke in
817 842 * list order. If after sleeping, invoke in reverse order.
818 843 *
819 844 * CPR (suspend/resume) support: if one of the callbacks returns a
820 845 * callb_cpr_t, return it. This will be used to make the thread CPR-safe
821 846 * while it is sleeping. There should be at most one callb_cpr_t for the
822 847 * thread.
823 848 * XXX This is unnecessarily complicated. The CPR information should just
824 849 * get passed in directly through VOP_FRLOCK and reclock, rather than
825 850 * sneaking it in via a callback.
826 851 */
827 852
828 853 callb_cpr_t *
829 854 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
830 855 {
831 856 callb_cpr_t *cpr_callbackp = NULL;
832 857 callb_cpr_t *one_result;
833 858 flk_callback_t *cb;
834 859
835 860 if (cblist == NULL)
836 861 return (NULL);
837 862
838 863 if (when == FLK_BEFORE_SLEEP) {
839 864 cb = cblist;
840 865 do {
841 866 one_result = (*cb->cb_callback)(when, cb->cb_data);
842 867 if (one_result != NULL) {
843 868 ASSERT(cpr_callbackp == NULL);
844 869 cpr_callbackp = one_result;
845 870 }
846 871 cb = cb->cb_next;
847 872 } while (cb != cblist);
848 873 } else {
849 874 cb = cblist->cb_prev;
850 875 do {
851 876 one_result = (*cb->cb_callback)(when, cb->cb_data);
852 877 if (one_result != NULL) {
853 878 cpr_callbackp = one_result;
854 879 }
855 880 cb = cb->cb_prev;
856 881 } while (cb != cblist->cb_prev);
857 882 }
858 883
859 884 return (cpr_callbackp);
860 885 }
861 886
862 887 /*
863 888 * Initialize a flk_callback_t to hold the given callback.
864 889 */
865 890
866 891 void
867 892 flk_init_callback(flk_callback_t *flk_cb,
868 893 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
869 894 {
870 895 flk_cb->cb_next = flk_cb;
871 896 flk_cb->cb_prev = flk_cb;
872 897 flk_cb->cb_callback = cb_fcn;
873 898 flk_cb->cb_data = cbdata;
874 899 }
875 900
876 901 /*
877 902 * Initialize an flk_callback_t and then link it into the head of an
878 903 * existing list (which may be NULL).
879 904 */
880 905
881 906 void
882 907 flk_add_callback(flk_callback_t *newcb,
883 908 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
884 909 void *cbdata, flk_callback_t *cblist)
885 910 {
886 911 flk_init_callback(newcb, cb_fcn, cbdata);
887 912
888 913 if (cblist == NULL)
889 914 return;
890 915
891 916 newcb->cb_prev = cblist->cb_prev;
892 917 newcb->cb_next = cblist;
893 918 cblist->cb_prev->cb_next = newcb;
894 919 cblist->cb_prev = newcb;
895 920 }
896 921
897 922 /*
898 923 * Remove the callback from a list.
899 924 */
900 925
901 926 void
902 927 flk_del_callback(flk_callback_t *flk_cb)
903 928 {
904 929 flk_cb->cb_next->cb_prev = flk_cb->cb_prev;
905 930 flk_cb->cb_prev->cb_next = flk_cb->cb_next;
906 931
907 932 flk_cb->cb_prev = flk_cb;
908 933 flk_cb->cb_next = flk_cb;
909 934 }
910 935
911 936 /*
912 937 * Initialize the flk_edge_cache data structure and create the
913 938 * nlm_reg_status array.
914 939 */
915 940
916 941 void
917 942 flk_init(void)
918 943 {
919 944 uint_t i;
920 945
921 946 flk_edge_cache = kmem_cache_create("flk_edges",
922 947 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
923 948 if (flk_edge_cache == NULL) {
924 949 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
925 950 }
926 951 /*
927 952 * Create the NLM registry object.
928 953 */
929 954
930 955 if (cluster_bootflags & CLUSTER_BOOTED) {
931 956 /*
932 957 * This routine tells you the maximum node id that will be used
933 958 * in the cluster. This number will be the size of the nlm
934 959 * registry status array. We add 1 because we will be using
935 960 * all entries indexed from 0 to maxnodeid; e.g., from 0
936 961 * to 64, for a total of 65 entries.
937 962 */
938 963 nlm_status_size = clconf_maximum_nodeid() + 1;
939 964 } else {
940 965 nlm_status_size = 0;
941 966 }
942 967
|
↓ open down ↓ |
372 lines elided |
↑ open up ↑ |
943 968 if (nlm_status_size != 0) { /* booted as a cluster */
944 969 nlm_reg_status = (flk_nlm_status_t *)
945 970 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
946 971 KM_SLEEP);
947 972
948 973 /* initialize all NLM states in array to NLM_UNKNOWN */
949 974 for (i = 0; i < nlm_status_size; i++) {
950 975 nlm_reg_status[i] = FLK_NLM_UNKNOWN;
951 976 }
952 977 }
978 +
979 + mutex_init(&flock_lock, NULL, MUTEX_DEFAULT, NULL);
980 + mutex_init(&nlm_reg_lock, NULL, MUTEX_DEFAULT, NULL);
981 +
982 + rw_init(&sysid_to_host_translator_lock, NULL, RW_DEFAULT, NULL);
983 + list_create(&sysid_to_host_translator_list,
984 + sizeof (struct sysid_to_host_translator_entry),
985 + offsetof(struct sysid_to_host_translator_entry, node));
953 986 }
954 987
955 988 /*
956 989 * Zone constructor/destructor callbacks to be executed when a zone is
957 990 * created/destroyed.
958 991 */
959 992 /* ARGSUSED */
960 993 void *
961 994 flk_zone_init(zoneid_t zoneid)
962 995 {
963 996 struct flock_globals *fg;
964 997 uint_t i;
965 998
966 999 fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
967 1000 fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
968 1001 for (i = 0; i < HASH_SIZE; i++)
969 1002 fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
970 1003 return (fg);
971 1004 }
972 1005
973 1006 /* ARGSUSED */
974 1007 void
975 1008 flk_zone_fini(zoneid_t zoneid, void *data)
976 1009 {
977 1010 struct flock_globals *fg = data;
978 1011
979 1012 kmem_free(fg, sizeof (*fg));
980 1013 }
981 1014
982 1015 /*
983 1016 * Get a lock_descriptor structure with initialization of edge lists.
984 1017 */
985 1018
986 1019 static lock_descriptor_t *
987 1020 flk_get_lock(void)
988 1021 {
989 1022 lock_descriptor_t *l;
990 1023
991 1024 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
992 1025
993 1026 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
994 1027 l->l_edge.edge_in_next = &l->l_edge;
995 1028 l->l_edge.edge_in_prev = &l->l_edge;
996 1029 l->l_edge.edge_adj_next = &l->l_edge;
997 1030 l->l_edge.edge_adj_prev = &l->l_edge;
998 1031 l->pvertex = -1;
999 1032 l->l_status = FLK_INITIAL_STATE;
1000 1033 flk_lock_allocs++;
1001 1034 return (l);
1002 1035 }
1003 1036
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
1004 1037 /*
1005 1038 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
1006 1039 * when some thread has a reference to it as in reclock().
1007 1040 */
1008 1041
1009 1042 void
1010 1043 flk_free_lock(lock_descriptor_t *lock)
1011 1044 {
1012 1045 file_t *fp;
1013 1046
1047 + ASSERT(lock->l_blocker >= 0);
1014 1048 ASSERT(IS_DEAD(lock));
1015 1049
1016 1050 if ((fp = lock->l_ofd) != NULL && fp->f_filock == (struct filock *)lock)
1017 1051 fp->f_filock = NULL;
1018 1052
1019 1053 if (IS_REFERENCED(lock)) {
1020 1054 lock->l_state |= DELETED_LOCK;
1021 1055 return;
1022 1056 }
1023 1057 flk_lock_frees++;
1024 - kmem_free((void *)lock, sizeof (lock_descriptor_t));
1058 + kmem_free(lock, sizeof (lock_descriptor_t));
1025 1059 }
1026 1060
1027 1061 void
1028 1062 flk_set_state(lock_descriptor_t *lock, int new_state)
1029 1063 {
1030 1064 /*
1031 1065 * Locks in the sleeping list may be woken up in a number of ways,
1032 1066 * and more than once. If a sleeping lock is signaled awake more
1033 1067 * than once, then it may or may not change state depending on its
1034 1068 * current state.
1035 1069 * Also note that NLM locks that are sleeping could be moved to an
1036 1070 * interrupted state more than once if the unlock request is
1037 1071 * retransmitted by the NLM client - the second time around, this is
1038 1072 * just a nop.
1039 1073 * The ordering of being signaled awake is:
1040 1074 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
1041 1075 * The checks below implement this ordering.
1042 1076 */
1043 1077 if (IS_INTERRUPTED(lock)) {
1044 1078 if ((new_state == FLK_CANCELLED_STATE) ||
1045 1079 (new_state == FLK_GRANTED_STATE) ||
1046 1080 (new_state == FLK_INTERRUPTED_STATE)) {
1047 1081 return;
1048 1082 }
1049 1083 }
1050 1084 if (IS_CANCELLED(lock)) {
1051 1085 if ((new_state == FLK_GRANTED_STATE) ||
1052 1086 (new_state == FLK_CANCELLED_STATE)) {
1053 1087 return;
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
1054 1088 }
1055 1089 }
1056 1090 CHECK_LOCK_TRANSITION(lock->l_status, new_state);
1057 1091 if (IS_PXFS(lock)) {
1058 1092 cl_flk_state_transition_notify(lock, lock->l_status, new_state);
1059 1093 }
1060 1094 lock->l_status = new_state;
1061 1095 }
1062 1096
1063 1097 /*
1098 + * Support for the remote stale lock detection
1099 + */
1100 +
1101 +void
1102 +flk_add_sysid_to_host_translator(sysid_to_host_translator_t tr)
1103 +{
1104 + struct sysid_to_host_translator_entry *te;
1105 +
1106 + te = kmem_alloc(sizeof (struct sysid_to_host_translator_entry),
1107 + KM_SLEEP);
1108 +
1109 + te->translator = tr;
1110 +
1111 + rw_enter(&sysid_to_host_translator_lock, RW_WRITER);
1112 + list_insert_head(&sysid_to_host_translator_list, te);
1113 + rw_exit(&sysid_to_host_translator_lock);
1114 +}
1115 +
1116 +static void
1117 +translate_sysid_to_host(zoneid_t zoneid, sysid_t sysid, char *host, size_t hlen,
1118 + const char **type)
1119 +{
1120 + struct sockaddr sa;
1121 + struct sysid_to_host_translator_entry *te;
1122 +
1123 + /* Some defaults in a case the translation will fail */
1124 + *type = "?";
1125 + (void) strlcpy(host, "?", hlen);
1126 +
1127 + rw_enter(&sysid_to_host_translator_lock, RW_READER);
1128 +
1129 + for (te = list_head(&sysid_to_host_translator_list); te != NULL;
1130 + te = list_next(&sysid_to_host_translator_list, te)) {
1131 +
1132 + if (te->translator(zoneid, sysid, &sa, type) != 0) {
1133 + rw_exit(&sysid_to_host_translator_lock);
1134 +
1135 + switch (sa.sa_family) {
1136 + case AF_INET:
1137 + (void) inet_ntop(AF_INET,
1138 + &((struct sockaddr_in *)&sa)->sin_addr,
1139 + host, hlen);
1140 + break;
1141 + case AF_INET6:
1142 + (void) inet_ntop(AF_INET6,
1143 + &((struct sockaddr_in6 *)&sa)->sin6_addr,
1144 + host, hlen);
1145 + break;
1146 + default:
1147 + break;
1148 + }
1149 +
1150 + return;
1151 + }
1152 + }
1153 +
1154 + rw_exit(&sysid_to_host_translator_lock);
1155 +}
1156 +
1157 +static char *
1158 +get_vnode_path(vnode_t *vp)
1159 +{
1160 + size_t len;
1161 + char *ret;
1162 +
1163 + mutex_enter(&vp->v_lock);
1164 + if (vp->v_path == NULL) {
1165 + mutex_exit(&vp->v_lock);
1166 + return (NULL);
1167 + }
1168 + len = strlen(vp->v_path) + 1;
1169 + mutex_exit(&vp->v_lock);
1170 +
1171 + ret = kmem_alloc(len, KM_SLEEP);
1172 +
1173 + mutex_enter(&vp->v_lock);
1174 + if (vp->v_path == NULL || strlen(vp->v_path) + 1 != len) {
1175 + mutex_exit(&vp->v_lock);
1176 + kmem_free(ret, len);
1177 + return (NULL);
1178 + }
1179 + bcopy(vp->v_path, ret, len);
1180 + mutex_exit(&vp->v_lock);
1181 +
1182 + return (ret);
1183 +}
1184 +
1185 +static void
1186 +flk_stale_lock_check(lock_descriptor_t *lock)
1187 +{
1188 + char *path;
1189 +
1190 + char host[INET6_ADDRSTRLEN]; /* host name */
1191 + const char *type; /* host type */
1192 +
1193 + /* temporary variables for the cmn_err() call */
1194 + char *p, *t; /* path, lock type */
1195 + pid_t pid; /* pid */
1196 + void *v; /* vnode */
1197 + u_offset_t s, e; /* start, end */
1198 +
1199 + ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
1200 +
1201 + /*
1202 + * Either not a remote lock, or the stale lock checking is disabled, or
1203 + * the lock is already reported.
1204 + */
1205 + if (IS_LOCAL(lock) || stale_lock_timeout == 0 || lock->l_blocker < 0)
1206 + return;
1207 +
1208 + /* Seen first time? */
1209 + if (lock->l_blocker == 0) {
1210 + lock->l_blocker = gethrtime();
1211 + return;
1212 + }
1213 +
1214 + /* Old enough? */
1215 + if ((gethrtime() - lock->l_blocker) / NANOSEC < stale_lock_timeout)
1216 + return;
1217 +
1218 + translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
1219 + sizeof (host), &type);
1220 + path = get_vnode_path(lock->l_vnode);
1221 +
1222 + pid = lock->l_flock.l_pid;
1223 + v = (void *)lock->l_vnode;
1224 + p = path == NULL ? "?" : path;
1225 + t = lock->l_type == F_WRLCK ? "WR" : "RD";
1226 + s = lock->l_start;
1227 + e = lock->l_end;
1228 +
1229 + /* Report the blocker as stale */
1230 + cmn_err(CE_NOTE, "!Stale lock (host: %s (%s), pid: %d, vnode: %p, "
1231 + "path: %s, %sLCK: %llu:%llu)", host, type, pid, v, p, t, s, e);
1232 +
1233 + if (path != NULL)
1234 + strfree(path);
1235 +
1236 + /* Mark this blocker as reported */
1237 + lock->l_blocker = -lock->l_blocker;
1238 +}
1239 +
1240 +static void
1241 +flk_stale_lock_shrink(lock_descriptor_t *lock, lock_descriptor_t *new)
1242 +{
1243 + char *path;
1244 +
1245 + char host[INET6_ADDRSTRLEN]; /* host name */
1246 + const char *type; /* host type */
1247 +
1248 + /* temporary variables for the cmn_err() call */
1249 + char *p, *t; /* path, lock type */
1250 + pid_t pid; /* pid */
1251 + void *v; /* vnode */
1252 + u_offset_t s, e; /* start, end */
1253 + u_offset_t ns, ne; /* new start, new end */
1254 +
1255 + ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
1256 +
1257 + translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
1258 + sizeof (host), &type);
1259 + path = get_vnode_path(lock->l_vnode);
1260 +
1261 + pid = lock->l_flock.l_pid;
1262 + v = (void *)lock->l_vnode;
1263 + p = path == NULL ? "?" : path;
1264 + t = lock->l_type == F_WRLCK ? "WR" : "RD";
1265 + s = lock->l_start;
1266 + e = lock->l_end;
1267 + ns = new->l_start;
1268 + ne = new->l_end;
1269 +
1270 + cmn_err(CE_NOTE, "!Stale lock SHRINK (host: %s (%s), pid: %d, "
1271 + "vnode: %p, path: %s, %sLCK: %llu:%llu -> %llu:%llu)", host, type,
1272 + pid, v, p, t, s, e, ns, ne);
1273 +
1274 + if (path != NULL)
1275 + strfree(path);
1276 +}
1277 +
1278 +static void
1279 +flk_stale_lock_split(lock_descriptor_t *lock, lock_descriptor_t *new1,
1280 + lock_descriptor_t *new2)
1281 +{
1282 + char *path;
1283 +
1284 + char host[INET6_ADDRSTRLEN]; /* host name */
1285 + const char *type; /* host type */
1286 +
1287 + /* temporary variables for the cmn_err() call */
1288 + char *p, *t; /* path, lock type */
1289 + pid_t pid; /* pid */
1290 + void *v; /* vnode */
1291 + u_offset_t s, e; /* start, end */
1292 + u_offset_t n1s, n1e; /* new1 start, new1 end */
1293 + u_offset_t n2s, n2e; /* new2 start, new2 end */
1294 +
1295 + ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
1296 +
1297 + translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
1298 + sizeof (host), &type);
1299 + path = get_vnode_path(lock->l_vnode);
1300 +
1301 + pid = lock->l_flock.l_pid;
1302 + v = (void *)lock->l_vnode;
1303 + p = path == NULL ? "?" : path;
1304 + t = lock->l_type == F_WRLCK ? "WR" : "RD";
1305 + s = lock->l_start;
1306 + e = lock->l_end;
1307 + n1s = new1->l_start;
1308 + n1e = new1->l_end;
1309 + n2s = new2->l_start;
1310 + n2e = new2->l_end;
1311 +
1312 + cmn_err(CE_NOTE, "!Stale lock SPLIT (host: %s (%s), pid: %d, "
1313 + "vnode: %p, path: %s, %sLCK: %llu:%llu -> %llu:%llu and %llu:%llu)",
1314 + host, type, pid, v, p, t, s, e, n1s, n1e, n2s, n2e);
1315 +
1316 + if (path != NULL)
1317 + strfree(path);
1318 +}
1319 +
1320 +static void
1321 +flk_stale_lock_release(lock_descriptor_t *lock)
1322 +{
1323 + char *path;
1324 +
1325 + char host[INET6_ADDRSTRLEN]; /* host name */
1326 + const char *type; /* host type */
1327 +
1328 + /* temporary variables for the cmn_err() call */
1329 + char *p, *t; /* path, lock type */
1330 + pid_t pid; /* pid */
1331 + void *v; /* vnode */
1332 + u_offset_t s, e; /* start, end */
1333 +
1334 + ASSERT(MUTEX_HELD(&lock->l_graph->gp_mutex));
1335 +
1336 + translate_sysid_to_host(lock->l_zoneid, lock->l_flock.l_sysid, host,
1337 + sizeof (host), &type);
1338 + path = get_vnode_path(lock->l_vnode);
1339 +
1340 + pid = lock->l_flock.l_pid;
1341 + v = (void *)lock->l_vnode;
1342 + p = path == NULL ? "?" : path;
1343 + t = lock->l_type == F_WRLCK ? "WR" : "RD";
1344 + s = lock->l_start;
1345 + e = lock->l_end;
1346 +
1347 + cmn_err(CE_NOTE, "!Stale lock RELEASE (host: %s (%s), pid: %d, "
1348 + "vnode: %p, path: %s, %sLCK: %llu:%llu)", host, type, pid, v, p, t,
1349 + s, e);
1350 +
1351 + if (path != NULL)
1352 + strfree(path);
1353 +}
1354 +
1355 +/*
1064 1356 * Routine that checks whether there are any blocking locks in the system.
1065 1357 *
1066 1358 * The policy followed is if a write lock is sleeping we don't allow read
1067 1359 * locks before this write lock even though there may not be any active
1068 1360 * locks corresponding to the read locks' region.
1069 1361 *
1070 1362 * flk_add_edge() function adds an edge between l1 and l2 iff there
1071 1363 * is no path between l1 and l2. This is done to have a "minimum
1072 1364 * storage representation" of the dependency graph.
1073 1365 *
1074 1366 * Another property of the graph is since only the new request throws
1075 1367 * edges to the existing locks in the graph, the graph is always topologically
1076 1368 * ordered.
1077 1369 */
1078 1370
1079 1371 static int
1080 1372 flk_process_request(lock_descriptor_t *request)
1081 1373 {
1082 1374 graph_t *gp = request->l_graph;
1083 1375 lock_descriptor_t *lock;
1084 1376 int request_blocked_by_active = 0;
1085 1377 int request_blocked_by_granted = 0;
1086 1378 int request_blocked_by_sleeping = 0;
1087 1379 vnode_t *vp = request->l_vnode;
1088 1380 int error = 0;
1089 1381 int request_will_wait = 0;
1090 1382 int found_covering_lock = 0;
1091 1383 lock_descriptor_t *covered_by = NULL;
|
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
1092 1384
1093 1385 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1094 1386 request_will_wait = IS_WILLING_TO_SLEEP(request);
1095 1387
1096 1388 /*
1097 1389 * check active locks
1098 1390 */
1099 1391
1100 1392 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1101 1393
1102 -
1103 1394 if (lock) {
1104 1395 do {
1105 1396 if (BLOCKS(lock, request)) {
1106 - if (!request_will_wait)
1397 + if (!request_will_wait) {
1398 + flk_stale_lock_check(lock);
1107 1399 return (EAGAIN);
1400 + }
1108 1401 request_blocked_by_active = 1;
1109 1402 break;
1110 1403 }
1111 1404 /*
1112 1405 * Grant lock if it is for the same owner holding active
1113 1406 * lock that covers the request.
1114 1407 */
1115 1408
1116 1409 if (SAME_OWNER(lock, request) &&
1117 1410 COVERS(lock, request) &&
1118 1411 (request->l_type == F_RDLCK))
1119 1412 return (flk_execute_request(request));
1120 1413 lock = lock->l_next;
1121 1414 } while (lock->l_vnode == vp);
1122 1415 }
1123 1416
1124 1417 if (!request_blocked_by_active) {
1125 - lock_descriptor_t *lk[1];
1126 - lock_descriptor_t *first_glock = NULL;
1418 + lock_descriptor_t *lk[1];
1419 + lock_descriptor_t *first_glock = NULL;
1420 +
1127 1421 /*
1128 1422 * Shall we grant this?! NO!!
1129 1423 * What about those locks that were just granted and still
1130 1424 * in sleep queue. Those threads are woken up and so locks
1131 1425 * are almost active.
1132 1426 */
1133 1427 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1134 1428 if (lock) {
1135 1429 do {
1136 1430 if (BLOCKS(lock, request)) {
1137 1431 if (IS_GRANTED(lock)) {
1138 1432 request_blocked_by_granted = 1;
1139 1433 } else {
1140 1434 request_blocked_by_sleeping = 1;
1141 1435 }
1142 1436 }
1143 1437
1144 1438 lock = lock->l_next;
1145 1439 } while ((lock->l_vnode == vp));
1146 1440 first_glock = lock->l_prev;
1147 1441 ASSERT(first_glock->l_vnode == vp);
1148 1442 }
1149 1443
1150 1444 if (request_blocked_by_granted)
1151 1445 goto block;
1152 1446
1153 1447 if (!request_blocked_by_sleeping) {
1154 1448 /*
1155 1449 * If the request isn't going to be blocked by a
1156 1450 * sleeping request, we know that it isn't going to
1157 1451 * be blocked; we can just execute the request --
1158 1452 * without performing costly deadlock detection.
1159 1453 */
1160 1454 ASSERT(!request_blocked_by_active);
1161 1455 return (flk_execute_request(request));
1162 1456 } else if (request->l_type == F_RDLCK) {
1163 1457 /*
1164 1458 * If we have a sleeping writer in the requested
1165 1459 * lock's range, block.
1166 1460 */
1167 1461 goto block;
1168 1462 }
1169 1463
1170 1464 lk[0] = request;
1171 1465 request->l_state |= RECOMPUTE_LOCK;
1172 1466 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
|
↓ open down ↓ |
36 lines elided |
↑ open up ↑ |
1173 1467 if (lock) {
1174 1468 do {
1175 1469 flk_recompute_dependencies(lock, lk, 1, 0);
1176 1470 lock = lock->l_next;
1177 1471 } while (lock->l_vnode == vp);
1178 1472 }
1179 1473 lock = first_glock;
1180 1474 if (lock) {
1181 1475 do {
1182 1476 if (IS_GRANTED(lock)) {
1183 - flk_recompute_dependencies(lock, lk, 1, 0);
1477 + flk_recompute_dependencies(lock, lk, 1,
1478 + 0);
1184 1479 }
1185 1480 lock = lock->l_prev;
1186 1481 } while ((lock->l_vnode == vp));
1187 1482 }
1188 1483 request->l_state &= ~RECOMPUTE_LOCK;
1189 1484 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
1190 1485 return (EDEADLK);
1191 1486 return (flk_execute_request(request));
1192 1487 }
1193 1488
1194 1489 block:
1195 1490 if (request_will_wait)
1196 1491 flk_graph_uncolor(gp);
1197 1492
1198 1493 /* check sleeping locks */
1199 1494
1200 1495 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1201 1496
1202 1497 /*
1203 1498 * If we find a sleeping write lock that is a superset of the
1204 1499 * region wanted by request we can be assured that by adding an
1205 1500 * edge to this write lock we have paths to all locks in the
1206 1501 * graph that blocks the request except in one case and that is why
1207 1502 * another check for SAME_OWNER in the loop below. The exception
1208 1503 * case is when this process that owns the sleeping write lock 'l1'
1209 1504 * has other locks l2, l3, l4 that are in the system and arrived
1210 1505 * before l1. l1 does not have path to these locks as they are from
1211 1506 * same process. We break when we find a second covering sleeping
1212 1507 * lock l5 owned by a process different from that owning l1, because
1213 1508 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
1214 1509 * it has l1 would have produced a deadlock already.
1215 1510 */
1216 1511
1217 1512 if (lock) {
1218 1513 do {
1219 1514 if (BLOCKS(lock, request)) {
1220 1515 if (!request_will_wait)
1221 1516 return (EAGAIN);
1222 1517 if (COVERS(lock, request) &&
1223 1518 lock->l_type == F_WRLCK) {
1224 1519 if (found_covering_lock &&
1225 1520 !SAME_OWNER(lock, covered_by)) {
1226 1521 found_covering_lock++;
1227 1522 break;
1228 1523 }
1229 1524 found_covering_lock = 1;
1230 1525 covered_by = lock;
1231 1526 }
1232 1527 if (found_covering_lock &&
1233 1528 !SAME_OWNER(lock, covered_by)) {
1234 1529 lock = lock->l_next;
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
1235 1530 continue;
1236 1531 }
1237 1532 if ((error = flk_add_edge(request, lock,
1238 1533 !found_covering_lock, 0)))
1239 1534 return (error);
1240 1535 }
1241 1536 lock = lock->l_next;
1242 1537 } while (lock->l_vnode == vp);
1243 1538 }
1244 1539
1245 -/*
1246 - * found_covering_lock == 2 iff at this point 'request' has paths
1247 - * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
1248 - * point 'request' has paths to all locks that blocks 'request' whose owners
1249 - * are not same as the one that covers 'request' (covered_by above) and
1250 - * we can have locks whose owner is same as covered_by in the active list.
1251 - */
1540 + /*
1541 + * found_covering_lock == 2 iff at this point 'request' has paths to
1542 + * all locks that blocks 'request'. found_covering_lock == 1 iff at
1543 + * this point 'request' has paths to all locks that blocks 'request'
1544 + * whose owners are not same as the one that covers 'request'
1545 + * (covered_by above) and we can have locks whose owner is same as
1546 + * covered_by in the active list.
1547 + */
1252 1548
1253 1549 if (request_blocked_by_active && found_covering_lock != 2) {
1254 1550 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1255 1551 ASSERT(lock != NULL);
1256 1552 do {
1257 1553 if (BLOCKS(lock, request)) {
1258 1554 if (found_covering_lock &&
1259 1555 !SAME_OWNER(lock, covered_by)) {
1260 1556 lock = lock->l_next;
1261 1557 continue;
1262 1558 }
1263 1559 if ((error = flk_add_edge(request, lock,
1264 1560 CHECK_CYCLE, 0)))
1265 1561 return (error);
1266 1562 }
1267 1563 lock = lock->l_next;
1268 1564 } while (lock->l_vnode == vp);
1269 1565 }
1270 1566
1271 1567 if (NOT_BLOCKED(request)) {
1272 1568 /*
1273 1569 * request not dependent on any other locks
1274 1570 * so execute this request
1275 1571 */
1276 1572 return (flk_execute_request(request));
1277 1573 } else {
1278 1574 /*
1279 1575 * check for deadlock
1280 1576 */
1281 1577 if (flk_check_deadlock(request))
1282 1578 return (EDEADLK);
1283 1579 /*
1284 1580 * this thread has to sleep
1285 1581 */
1286 1582 return (flk_wait_execute_request(request));
1287 1583 }
1288 1584 }
1289 1585
1290 1586 /*
1291 1587 * The actual execution of the request in the simple case is only to
1292 1588 * insert the 'request' in the list of active locks if it is not an
1293 1589 * UNLOCK.
1294 1590 * We have to consider the existing active locks' relation to
1295 1591 * this 'request' if they are owned by same process. flk_relation() does
1296 1592 * this job and sees to that the dependency graph information is maintained
1297 1593 * properly.
1298 1594 */
1299 1595
1300 1596 int
1301 1597 flk_execute_request(lock_descriptor_t *request)
1302 1598 {
1303 1599 graph_t *gp = request->l_graph;
1304 1600 vnode_t *vp = request->l_vnode;
1305 1601 lock_descriptor_t *lock, *lock1;
1306 1602 int done_searching = 0;
1307 1603
1308 1604 CHECK_SLEEPING_LOCKS(gp);
1309 1605 CHECK_ACTIVE_LOCKS(gp);
1310 1606
1311 1607 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1312 1608
1313 1609 flk_set_state(request, FLK_START_STATE);
|
↓ open down ↓ |
52 lines elided |
↑ open up ↑ |
1314 1610
1315 1611 ASSERT(NOT_BLOCKED(request));
1316 1612
1317 1613 /* IO_LOCK requests are only to check status */
1318 1614
1319 1615 if (IS_IO_LOCK(request))
1320 1616 return (0);
1321 1617
1322 1618 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1323 1619
1324 - if (lock == NULL && request->l_type == F_UNLCK)
1325 - return (0);
1326 - if (lock == NULL) {
1327 - flk_insert_active_lock(request);
1328 - return (0);
1620 + if (lock != NULL) {
1621 + /*
1622 + * There are some active locks so check for relations
1623 + */
1624 + do {
1625 + lock1 = lock->l_next;
1626 + if (SAME_OWNER(request, lock)) {
1627 + done_searching = flk_relation(lock, request);
1628 + }
1629 + lock = lock1;
1630 + } while (lock->l_vnode == vp && !done_searching);
1329 1631 }
1330 1632
1331 - do {
1332 - lock1 = lock->l_next;
1333 - if (SAME_OWNER(request, lock)) {
1334 - done_searching = flk_relation(lock, request);
1335 - }
1336 - lock = lock1;
1337 - } while (lock->l_vnode == vp && !done_searching);
1338 -
1339 1633 /*
1340 1634 * insert in active queue
1341 1635 */
1342 1636
1343 1637 if (request->l_type != F_UNLCK)
1344 1638 flk_insert_active_lock(request);
1345 1639
1346 1640 return (0);
1347 1641 }
1348 1642
1349 1643 /*
1350 1644 * 'request' is blocked by some one therefore we put it into sleep queue.
1351 1645 */
1352 1646 static int
1353 1647 flk_wait_execute_request(lock_descriptor_t *request)
1354 1648 {
1355 1649 graph_t *gp = request->l_graph;
1356 1650 callb_cpr_t *cprp; /* CPR info from callback */
1357 1651 struct flock_globals *fg;
1358 1652 int index;
1359 1653
1360 1654 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1361 1655 ASSERT(IS_WILLING_TO_SLEEP(request));
1362 1656
1363 1657 flk_insert_sleeping_lock(request);
1364 1658
1365 1659 if (IS_LOCKMGR(request)) {
1366 1660 index = HASH_INDEX(request->l_vnode);
1367 1661 fg = flk_get_globals();
1368 1662
1369 1663 if (nlm_status_size == 0) { /* not booted as a cluster */
1370 1664 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1371 1665 flk_cancel_sleeping_lock(request, 1);
1372 1666 return (ENOLCK);
1373 1667 }
1374 1668 } else { /* booted as a cluster */
1375 1669 /*
1376 1670 * If the request is an NLM server lock request,
1377 1671 * and the NLM state of the lock request is not
1378 1672 * NLM_UP (because the NLM server is shutting
1379 1673 * down), then cancel the sleeping lock and
1380 1674 * return error ENOLCK that will encourage the
1381 1675 * client to retransmit.
1382 1676 */
1383 1677 if (!IS_NLM_UP(request)) {
1384 1678 flk_cancel_sleeping_lock(request, 1);
1385 1679 return (ENOLCK);
1386 1680 }
1387 1681 }
1388 1682 }
1389 1683
1390 1684 /* Clustering: For blocking PXFS locks, return */
1391 1685 if (IS_PXFS(request)) {
1392 1686 /*
1393 1687 * PXFS locks sleep on the client side.
1394 1688 * The callback argument is used to wake up the sleeper
1395 1689 * when the lock is granted.
1396 1690 * We return -1 (rather than an errno value) to indicate
1397 1691 * the client side should sleep
1398 1692 */
1399 1693 return (PXFS_LOCK_BLOCKED);
1400 1694 }
1401 1695
1402 1696 if (request->l_callbacks != NULL) {
1403 1697 /*
1404 1698 * To make sure the shutdown code works correctly, either
1405 1699 * the callback must happen after putting the lock on the
1406 1700 * sleep list, or we must check the shutdown status after
1407 1701 * returning from the callback (and before sleeping). At
1408 1702 * least for now, we'll use the first option. If a
1409 1703 * shutdown or signal or whatever happened while the graph
1410 1704 * mutex was dropped, that will be detected by
1411 1705 * wait_for_lock().
1412 1706 */
1413 1707 mutex_exit(&gp->gp_mutex);
1414 1708
1415 1709 cprp = flk_invoke_callbacks(request->l_callbacks,
1416 1710 FLK_BEFORE_SLEEP);
1417 1711
1418 1712 mutex_enter(&gp->gp_mutex);
1419 1713
1420 1714 if (cprp == NULL) {
1421 1715 wait_for_lock(request);
1422 1716 } else {
1423 1717 mutex_enter(cprp->cc_lockp);
1424 1718 CALLB_CPR_SAFE_BEGIN(cprp);
1425 1719 mutex_exit(cprp->cc_lockp);
1426 1720 wait_for_lock(request);
1427 1721 mutex_enter(cprp->cc_lockp);
1428 1722 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1429 1723 mutex_exit(cprp->cc_lockp);
1430 1724 }
1431 1725
1432 1726 mutex_exit(&gp->gp_mutex);
1433 1727 (void) flk_invoke_callbacks(request->l_callbacks,
1434 1728 FLK_AFTER_SLEEP);
1435 1729 mutex_enter(&gp->gp_mutex);
1436 1730 } else {
1437 1731 wait_for_lock(request);
1438 1732 }
1439 1733
1440 1734 if (IS_LOCKMGR(request)) {
1441 1735 /*
1442 1736 * If the lock manager is shutting down, return an
1443 1737 * error that will encourage the client to retransmit.
1444 1738 */
1445 1739 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1446 1740 !IS_GRANTED(request)) {
1447 1741 flk_cancel_sleeping_lock(request, 1);
1448 1742 return (ENOLCK);
1449 1743 }
1450 1744 }
1451 1745
1452 1746 if (IS_INTERRUPTED(request)) {
1453 1747 /* we got a signal, or act like we did */
1454 1748 flk_cancel_sleeping_lock(request, 1);
1455 1749 return (EINTR);
1456 1750 }
1457 1751
1458 1752 /* Cancelled if some other thread has closed the file */
1459 1753
1460 1754 if (IS_CANCELLED(request)) {
1461 1755 flk_cancel_sleeping_lock(request, 1);
1462 1756 return (EBADF);
1463 1757 }
1464 1758
1465 1759 request->l_state &= ~GRANTED_LOCK;
1466 1760 REMOVE_SLEEP_QUEUE(request);
1467 1761 return (flk_execute_request(request));
1468 1762 }
1469 1763
1470 1764 /*
1471 1765 * This routine adds an edge between from and to because from depends
1472 1766 * to. If asked to check for deadlock it checks whether there are any
1473 1767 * reachable locks from "from_lock" that is owned by the same process
1474 1768 * as "from_lock".
1475 1769 * NOTE: It is the caller's responsibility to make sure that the color
1476 1770 * of the graph is consistent between the calls to flk_add_edge as done
1477 1771 * in flk_process_request. This routine does not color and check for
1478 1772 * deadlock explicitly.
1479 1773 */
1480 1774
1481 1775 static int
1482 1776 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1483 1777 int check_cycle, int update_graph)
1484 1778 {
1485 1779 edge_t *edge;
1486 1780 edge_t *ep;
1487 1781 lock_descriptor_t *vertex;
1488 1782 lock_descriptor_t *vertex_stack;
1489 1783
1490 1784 STACK_INIT(vertex_stack);
1491 1785
1492 1786 /*
1493 1787 * if to vertex already has mark_color just return
1494 1788 * don't add an edge as it is reachable from from vertex
1495 1789 * before itself.
1496 1790 */
1497 1791
1498 1792 if (COLORED(to_lock))
1499 1793 return (0);
1500 1794
1501 1795 edge = flk_get_edge();
1502 1796
1503 1797 /*
1504 1798 * set the from and to vertex
1505 1799 */
1506 1800
1507 1801 edge->from_vertex = from_lock;
1508 1802 edge->to_vertex = to_lock;
1509 1803
1510 1804 /*
1511 1805 * put in adjacency list of from vertex
1512 1806 */
1513 1807
1514 1808 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1515 1809 edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1516 1810 edge->edge_adj_prev = &from_lock->l_edge;
1517 1811 from_lock->l_edge.edge_adj_next = edge;
1518 1812
1519 1813 /*
1520 1814 * put in list of to vertex
1521 1815 */
1522 1816
1523 1817 to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1524 1818 edge->edge_in_next = to_lock->l_edge.edge_in_next;
1525 1819 to_lock->l_edge.edge_in_next = edge;
1526 1820 edge->edge_in_prev = &to_lock->l_edge;
1527 1821
1528 1822
1529 1823 if (update_graph) {
1530 1824 flk_update_proc_graph(edge, 0);
1531 1825 return (0);
1532 1826 }
1533 1827 if (!check_cycle) {
1534 1828 return (0);
1535 1829 }
1536 1830
1537 1831 STACK_PUSH(vertex_stack, from_lock, l_stack);
1538 1832
1539 1833 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1540 1834
1541 1835 STACK_POP(vertex_stack, l_stack);
1542 1836
1543 1837 for (ep = FIRST_ADJ(vertex);
1544 1838 ep != HEAD(vertex);
1545 1839 ep = NEXT_ADJ(ep)) {
1546 1840 if (COLORED(ep->to_vertex))
1547 1841 continue;
1548 1842 COLOR(ep->to_vertex);
1549 1843 if (SAME_OWNER(ep->to_vertex, from_lock))
1550 1844 goto dead_lock;
1551 1845 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1552 1846 }
1553 1847 }
1554 1848 return (0);
1555 1849
1556 1850 dead_lock:
1557 1851
1558 1852 /*
1559 1853 * remove all edges
1560 1854 */
1561 1855
1562 1856 ep = FIRST_ADJ(from_lock);
1563 1857
1564 1858 while (ep != HEAD(from_lock)) {
1565 1859 IN_LIST_REMOVE(ep);
1566 1860 from_lock->l_sedge = NEXT_ADJ(ep);
1567 1861 ADJ_LIST_REMOVE(ep);
1568 1862 flk_free_edge(ep);
1569 1863 ep = from_lock->l_sedge;
1570 1864 }
1571 1865 return (EDEADLK);
1572 1866 }
1573 1867
1574 1868 /*
1575 1869 * Get an edge structure for representing the dependency between two locks.
1576 1870 */
1577 1871
1578 1872 static edge_t *
1579 1873 flk_get_edge()
1580 1874 {
1581 1875 edge_t *ep;
1582 1876
1583 1877 ASSERT(flk_edge_cache != NULL);
1584 1878
1585 1879 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1586 1880 edge_allocs++;
1587 1881 return (ep);
1588 1882 }
1589 1883
1590 1884 /*
1591 1885 * Free the edge structure.
|
↓ open down ↓ |
243 lines elided |
↑ open up ↑ |
1592 1886 */
1593 1887
1594 1888 static void
1595 1889 flk_free_edge(edge_t *ep)
1596 1890 {
1597 1891 edge_frees++;
1598 1892 kmem_cache_free(flk_edge_cache, (void *)ep);
1599 1893 }
1600 1894
1601 1895 /*
1602 - * Check the relationship of request with lock and perform the
1603 - * recomputation of dependencies, break lock if required, and return
1604 - * 1 if request cannot have any more relationship with the next
1896 + * Check the relationship of 'request' with 'lock' and perform the
1897 + * recomputation of dependencies, break 'lock' if required, and return
1898 + * 1 if 'request' cannot have any more relationship with the next
1605 1899 * active locks.
1900 + *
1606 1901 * The 'lock' and 'request' are compared and in case of overlap we
1607 1902 * delete the 'lock' and form new locks to represent the non-overlapped
1608 1903 * portion of original 'lock'. This function has side effects such as
1609 1904 * 'lock' will be freed, new locks will be added to the active list.
1610 1905 */
1611 1906
1612 1907 static int
1613 1908 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1614 1909 {
1615 1910 int lock_effect;
1616 - lock_descriptor_t *lock1, *lock2;
1617 1911 lock_descriptor_t *topology[3];
1618 1912 int nvertex = 0;
1619 1913 int i;
1620 1914 edge_t *ep;
1621 - graph_t *gp = (lock->l_graph);
1915 + graph_t *gp = lock->l_graph;
1916 + boolean_t mergeable;
1622 1917
1918 + ASSERT(request->l_blocker == 0);
1623 1919
1624 1920 CHECK_SLEEPING_LOCKS(gp);
1625 1921 CHECK_ACTIVE_LOCKS(gp);
1626 1922
1627 1923 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1628 1924
1629 1925 topology[0] = topology[1] = topology[2] = NULL;
1630 1926
1631 1927 if (request->l_type == F_UNLCK)
1632 1928 lock_effect = FLK_UNLOCK;
1633 1929 else if (request->l_type == F_RDLCK &&
1634 1930 lock->l_type == F_WRLCK)
1635 1931 lock_effect = FLK_DOWNGRADE;
1636 1932 else if (request->l_type == F_WRLCK &&
1637 1933 lock->l_type == F_RDLCK)
1638 1934 lock_effect = FLK_UPGRADE;
1639 1935 else
1640 1936 lock_effect = FLK_STAY_SAME;
1641 1937
1938 + /*
1939 + * The 'lock' and 'request' are merged only in a case the effect of
1940 + * both locks is same (FLK_STAY_SAME) and their blocker status
1941 + * (l_blocker) is same as well. We do not merge 'lock' and 'request'
1942 + * with different l_blocker values because such merge might affect the
1943 + * stale lock detection. It might cause either false positives, or
1944 + * miss some stale locks.
1945 + */
1946 + mergeable = lock_effect == FLK_STAY_SAME &&
1947 + lock->l_blocker == request->l_blocker;
1948 +
1642 1949 if (lock->l_end < request->l_start) {
1643 - if (lock->l_end == request->l_start - 1 &&
1644 - lock_effect == FLK_STAY_SAME) {
1645 - topology[0] = request;
1950 + /* If the 'lock' is just next to 'request', try to merge them */
1951 + if (lock->l_end == request->l_start - 1 && mergeable) {
1646 1952 request->l_start = lock->l_start;
1647 - nvertex = 1;
1648 1953 goto recompute;
1649 - } else {
1650 - return (0);
1651 1954 }
1955 +
1956 + /* Otherwise, they do not overlap, so return immediately */
1957 + return (0);
1652 1958 }
1653 1959
1654 - if (lock->l_start > request->l_end) {
1655 - if (request->l_end == lock->l_start - 1 &&
1656 - lock_effect == FLK_STAY_SAME) {
1657 - topology[0] = request;
1960 + if (request->l_end < lock->l_start) {
1961 + /* If the 'request' is just next to 'lock', try to merge them */
1962 + if (request->l_end == lock->l_start - 1 && mergeable) {
1658 1963 request->l_end = lock->l_end;
1659 - nvertex = 1;
1660 1964 goto recompute;
1965 + }
1966 +
1967 + /* Otherwise, they do not overlap, so return immediately */
1968 + return (1);
1969 + }
1970 +
1971 + /*
1972 + * Here we are sure the 'lock' and 'request' overlaps, so the 'request'
1973 + * will replace the 'lock' (either fully, or at least partially).
1974 + */
1975 +
1976 + /*
1977 + * If the 'request' does not fully cover the 'lock' at the start,
1978 + * either move the start of the 'request' to cover the 'lock', or split
1979 + * the 'lock'.
1980 + */
1981 + if (lock->l_start < request->l_start) {
1982 + if (mergeable) {
1983 + request->l_start = lock->l_start;
1661 1984 } else {
1662 - return (1);
1985 + lock_descriptor_t *new_lock = flk_get_lock();
1986 +
1987 + COPY(new_lock, lock);
1988 + new_lock->l_end = request->l_start - 1;
1989 +
1990 + topology[nvertex++] = new_lock;
1663 1991 }
1664 1992 }
1665 1993
1994 + /*
1995 + * If the 'request' does not fully cover the 'lock' at the end, either
1996 + * move the end of the 'request' to cover the 'lock', or split the
1997 + * 'lock'.
1998 + */
1666 1999 if (request->l_end < lock->l_end) {
1667 - if (request->l_start > lock->l_start) {
1668 - if (lock_effect == FLK_STAY_SAME) {
1669 - request->l_start = lock->l_start;
1670 - request->l_end = lock->l_end;
1671 - topology[0] = request;
1672 - nvertex = 1;
1673 - } else {
1674 - lock1 = flk_get_lock();
1675 - lock2 = flk_get_lock();
1676 - COPY(lock1, lock);
1677 - COPY(lock2, lock);
1678 - lock1->l_start = lock->l_start;
1679 - lock1->l_end = request->l_start - 1;
1680 - lock2->l_start = request->l_end + 1;
1681 - lock2->l_end = lock->l_end;
1682 - topology[0] = lock1;
1683 - topology[1] = lock2;
1684 - topology[2] = request;
1685 - nvertex = 3;
1686 - }
1687 - } else if (request->l_start < lock->l_start) {
1688 - if (lock_effect == FLK_STAY_SAME) {
1689 - request->l_end = lock->l_end;
1690 - topology[0] = request;
1691 - nvertex = 1;
1692 - } else {
1693 - lock1 = flk_get_lock();
1694 - COPY(lock1, lock);
1695 - lock1->l_start = request->l_end + 1;
1696 - topology[0] = lock1;
1697 - topology[1] = request;
1698 - nvertex = 2;
1699 - }
1700 - } else {
1701 - if (lock_effect == FLK_STAY_SAME) {
1702 - request->l_start = lock->l_start;
1703 - request->l_end = lock->l_end;
1704 - topology[0] = request;
1705 - nvertex = 1;
1706 - } else {
1707 - lock1 = flk_get_lock();
1708 - COPY(lock1, lock);
1709 - lock1->l_start = request->l_end + 1;
1710 - topology[0] = lock1;
1711 - topology[1] = request;
1712 - nvertex = 2;
1713 - }
1714 - }
1715 - } else if (request->l_end > lock->l_end) {
1716 - if (request->l_start > lock->l_start) {
1717 - if (lock_effect == FLK_STAY_SAME) {
1718 - request->l_start = lock->l_start;
1719 - topology[0] = request;
1720 - nvertex = 1;
1721 - } else {
1722 - lock1 = flk_get_lock();
1723 - COPY(lock1, lock);
1724 - lock1->l_end = request->l_start - 1;
1725 - topology[0] = lock1;
1726 - topology[1] = request;
1727 - nvertex = 2;
1728 - }
1729 - } else if (request->l_start < lock->l_start) {
1730 - topology[0] = request;
1731 - nvertex = 1;
2000 + if (mergeable) {
2001 + request->l_end = lock->l_end;
1732 2002 } else {
1733 - topology[0] = request;
1734 - nvertex = 1;
2003 + lock_descriptor_t *new_lock = flk_get_lock();
2004 +
2005 + COPY(new_lock, lock);
2006 + new_lock->l_start = request->l_end + 1;
2007 +
2008 + topology[nvertex++] = new_lock;
1735 2009 }
1736 - } else {
1737 - if (request->l_start > lock->l_start) {
1738 - if (lock_effect == FLK_STAY_SAME) {
1739 - request->l_start = lock->l_start;
1740 - topology[0] = request;
1741 - nvertex = 1;
1742 - } else {
1743 - lock1 = flk_get_lock();
1744 - COPY(lock1, lock);
1745 - lock1->l_end = request->l_start - 1;
1746 - topology[0] = lock1;
1747 - topology[1] = request;
1748 - nvertex = 2;
1749 - }
1750 - } else if (request->l_start < lock->l_start) {
1751 - topology[0] = request;
1752 - nvertex = 1;
1753 - } else {
1754 - if (lock_effect != FLK_UNLOCK) {
1755 - topology[0] = request;
1756 - nvertex = 1;
1757 - } else {
1758 - flk_delete_active_lock(lock, 0);
1759 - flk_wakeup(lock, 1);
1760 - flk_free_lock(lock);
1761 - CHECK_SLEEPING_LOCKS(gp);
1762 - CHECK_ACTIVE_LOCKS(gp);
1763 - return (1);
1764 - }
1765 - }
1766 2010 }
1767 2011
1768 -recompute:
2012 + /*
2013 + * Log the blocker change
2014 + */
2015 + if (nvertex > 0 && lock->l_blocker < 0) {
2016 + if (nvertex == 1)
2017 + flk_stale_lock_shrink(lock, topology[0]);
2018 + if (nvertex == 2)
2019 + flk_stale_lock_split(lock, topology[0], topology[1]);
1769 2020
2021 + lock->l_blocker = 0;
2022 + }
2023 +
2024 +recompute:
1770 2025 /*
1771 2026 * For unlock we don't send the 'request' to for recomputing
1772 2027 * dependencies because no lock will add an edge to this.
1773 2028 */
2029 + if (lock_effect != FLK_UNLOCK)
2030 + topology[nvertex++] = request;
1774 2031
1775 - if (lock_effect == FLK_UNLOCK) {
1776 - topology[nvertex-1] = NULL;
1777 - nvertex--;
1778 - }
1779 2032 for (i = 0; i < nvertex; i++) {
1780 2033 topology[i]->l_state |= RECOMPUTE_LOCK;
1781 2034 topology[i]->l_color = NO_COLOR;
1782 2035 }
1783 2036
1784 2037 ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1785 2038
1786 2039 /*
1787 2040 * we remove the adjacent edges for all vertices' to this vertex
1788 2041 * 'lock'.
1789 2042 */
1790 -
1791 2043 ep = FIRST_IN(lock);
1792 2044 while (ep != HEAD(lock)) {
1793 2045 ADJ_LIST_REMOVE(ep);
1794 2046 ep = NEXT_IN(ep);
1795 2047 }
1796 2048
1797 2049 flk_delete_active_lock(lock, 0);
1798 2050
1799 2051 /* We are ready for recomputing the dependencies now */
1800 -
1801 2052 flk_recompute_dependencies(lock, topology, nvertex, 1);
1802 2053
1803 2054 for (i = 0; i < nvertex; i++) {
1804 2055 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1805 2056 topology[i]->l_color = NO_COLOR;
1806 2057 }
1807 2058
1808 -
1809 2059 if (lock_effect == FLK_UNLOCK) {
1810 2060 nvertex++;
1811 2061 }
1812 2062 for (i = 0; i < nvertex - 1; i++) {
1813 2063 flk_insert_active_lock(topology[i]);
1814 2064 }
1815 2065
1816 -
1817 2066 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1818 2067 flk_wakeup(lock, 0);
1819 2068 } else {
1820 2069 ep = FIRST_IN(lock);
1821 2070 while (ep != HEAD(lock)) {
1822 2071 lock->l_sedge = NEXT_IN(ep);
1823 2072 IN_LIST_REMOVE(ep);
1824 2073 flk_update_proc_graph(ep, 1);
1825 2074 flk_free_edge(ep);
1826 2075 ep = lock->l_sedge;
1827 2076 }
1828 2077 }
1829 2078 flk_free_lock(lock);
1830 2079
1831 2080 CHECK_SLEEPING_LOCKS(gp);
1832 2081 CHECK_ACTIVE_LOCKS(gp);
1833 2082 return (0);
1834 2083 }
1835 2084
1836 2085 /*
1837 2086 * Insert a lock into the active queue.
1838 2087 */
1839 2088
1840 2089 static void
1841 2090 flk_insert_active_lock(lock_descriptor_t *new_lock)
1842 2091 {
1843 2092 graph_t *gp = new_lock->l_graph;
1844 2093 vnode_t *vp = new_lock->l_vnode;
1845 2094 lock_descriptor_t *first_lock, *lock;
1846 2095
1847 2096 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1848 2097
1849 2098 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1850 2099 first_lock = lock;
1851 2100
1852 2101 if (first_lock != NULL) {
1853 2102 for (; (lock->l_vnode == vp &&
1854 2103 lock->l_start < new_lock->l_start); lock = lock->l_next)
1855 2104 ;
1856 2105 } else {
1857 2106 lock = ACTIVE_HEAD(gp);
1858 2107 }
1859 2108
1860 2109 lock->l_prev->l_next = new_lock;
1861 2110 new_lock->l_next = lock;
1862 2111 new_lock->l_prev = lock->l_prev;
1863 2112 lock->l_prev = new_lock;
1864 2113
1865 2114 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1866 2115 vp->v_filocks = (struct filock *)new_lock;
1867 2116 }
1868 2117 flk_set_state(new_lock, FLK_ACTIVE_STATE);
1869 2118 new_lock->l_state |= ACTIVE_LOCK;
1870 2119
1871 2120 CHECK_ACTIVE_LOCKS(gp);
1872 2121 CHECK_SLEEPING_LOCKS(gp);
1873 2122 }
1874 2123
1875 2124 /*
1876 2125 * Delete the active lock : Performs two functions depending on the
1877 2126 * value of second parameter. One is to remove from the active lists
1878 2127 * only and other is to both remove and free the lock.
1879 2128 */
1880 2129
1881 2130 static void
1882 2131 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1883 2132 {
1884 2133 vnode_t *vp = lock->l_vnode;
|
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
1885 2134 graph_t *gp = lock->l_graph;
1886 2135
1887 2136 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1888 2137 if (free_lock)
1889 2138 ASSERT(NO_DEPENDENTS(lock));
1890 2139 ASSERT(NOT_BLOCKED(lock));
1891 2140 ASSERT(IS_ACTIVE(lock));
1892 2141
1893 2142 ASSERT((vp->v_filocks != NULL));
1894 2143
2144 + if (lock->l_blocker < 0) {
2145 + /* Log the blocker release */
2146 + flk_stale_lock_release(lock);
2147 + lock->l_blocker = 0;
2148 + }
2149 +
1895 2150 if (vp->v_filocks == (struct filock *)lock) {
1896 2151 vp->v_filocks = (struct filock *)
1897 2152 ((lock->l_next->l_vnode == vp) ? lock->l_next :
1898 2153 NULL);
1899 2154 }
1900 2155 lock->l_next->l_prev = lock->l_prev;
1901 2156 lock->l_prev->l_next = lock->l_next;
1902 2157 lock->l_next = lock->l_prev = NULL;
1903 2158 flk_set_state(lock, FLK_DEAD_STATE);
1904 2159 lock->l_state &= ~ACTIVE_LOCK;
1905 2160
1906 2161 if (free_lock)
1907 2162 flk_free_lock(lock);
1908 2163 CHECK_ACTIVE_LOCKS(gp);
1909 2164 CHECK_SLEEPING_LOCKS(gp);
1910 2165 }
1911 2166
1912 2167 /*
1913 2168 * Insert into the sleep queue.
1914 2169 */
1915 2170
1916 2171 static void
1917 2172 flk_insert_sleeping_lock(lock_descriptor_t *request)
1918 2173 {
1919 2174 graph_t *gp = request->l_graph;
1920 2175 vnode_t *vp = request->l_vnode;
1921 2176 lock_descriptor_t *lock;
1922 2177
1923 2178 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1924 2179 ASSERT(IS_INITIAL(request));
1925 2180
1926 2181 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1927 2182 lock->l_vnode < vp); lock = lock->l_next)
1928 2183 ;
1929 2184
1930 2185 lock->l_prev->l_next = request;
|
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
1931 2186 request->l_prev = lock->l_prev;
1932 2187 lock->l_prev = request;
1933 2188 request->l_next = lock;
1934 2189 flk_set_state(request, FLK_SLEEPING_STATE);
1935 2190 request->l_state |= SLEEPING_LOCK;
1936 2191 }
1937 2192
1938 2193 /*
1939 2194 * Cancelling a sleeping lock implies removing a vertex from the
1940 2195 * dependency graph and therefore we should recompute the dependencies
1941 - * of all vertices that have a path to this vertex, w.r.t. all
2196 + * of all vertices that have a path to this vertex, w.r.t. all
1942 2197 * vertices reachable from this vertex.
1943 2198 */
1944 2199
1945 2200 void
1946 2201 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1947 2202 {
1948 2203 graph_t *gp = request->l_graph;
1949 2204 vnode_t *vp = request->l_vnode;
1950 2205 lock_descriptor_t **topology = NULL;
1951 2206 edge_t *ep;
1952 2207 lock_descriptor_t *vertex, *lock;
1953 2208 int nvertex = 0;
1954 2209 int i;
1955 2210 lock_descriptor_t *vertex_stack;
1956 2211
1957 2212 STACK_INIT(vertex_stack);
1958 2213
1959 2214 ASSERT(MUTEX_HELD(&gp->gp_mutex));
1960 2215 /*
1961 2216 * count number of vertex pointers that has to be allocated
1962 2217 * All vertices that are reachable from request.
1963 2218 */
1964 2219
1965 2220 STACK_PUSH(vertex_stack, request, l_stack);
1966 2221
1967 2222 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1968 2223 STACK_POP(vertex_stack, l_stack);
1969 2224 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1970 2225 ep = NEXT_ADJ(ep)) {
1971 2226 if (IS_RECOMPUTE(ep->to_vertex))
1972 2227 continue;
1973 2228 ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1974 2229 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1975 2230 nvertex++;
1976 2231 }
1977 2232 }
1978 2233
1979 2234 /*
1980 2235 * allocate memory for holding the vertex pointers
1981 2236 */
1982 2237
1983 2238 if (nvertex) {
1984 2239 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1985 2240 KM_SLEEP);
1986 2241 }
1987 2242
1988 2243 /*
1989 2244 * one more pass to actually store the vertices in the
1990 2245 * allocated array.
1991 2246 * We first check sleeping locks and then active locks
1992 2247 * so that topology array will be in a topological
1993 2248 * order.
1994 2249 */
1995 2250
1996 2251 nvertex = 0;
1997 2252 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1998 2253
1999 2254 if (lock) {
2000 2255 do {
2001 2256 if (IS_RECOMPUTE(lock)) {
2002 2257 lock->l_index = nvertex;
2003 2258 topology[nvertex++] = lock;
2004 2259 }
2005 2260 lock->l_color = NO_COLOR;
2006 2261 lock = lock->l_next;
2007 2262 } while (lock->l_vnode == vp);
2008 2263 }
2009 2264
2010 2265 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2011 2266
2012 2267 if (lock) {
2013 2268 do {
2014 2269 if (IS_RECOMPUTE(lock)) {
2015 2270 lock->l_index = nvertex;
2016 2271 topology[nvertex++] = lock;
2017 2272 }
2018 2273 lock->l_color = NO_COLOR;
2019 2274 lock = lock->l_next;
2020 2275 } while (lock->l_vnode == vp);
2021 2276 }
2022 2277
2023 2278 /*
2024 2279 * remove in and out edges of request
2025 2280 * They are freed after updating proc_graph below.
2026 2281 */
2027 2282
2028 2283 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
2029 2284 ADJ_LIST_REMOVE(ep);
2030 2285 }
2031 2286
2032 2287
2033 2288 if (remove_from_queue)
2034 2289 REMOVE_SLEEP_QUEUE(request);
2035 2290
2036 2291 /* we are ready to recompute */
2037 2292
2038 2293 flk_recompute_dependencies(request, topology, nvertex, 1);
2039 2294
2040 2295 ep = FIRST_ADJ(request);
2041 2296 while (ep != HEAD(request)) {
2042 2297 IN_LIST_REMOVE(ep);
2043 2298 request->l_sedge = NEXT_ADJ(ep);
2044 2299 ADJ_LIST_REMOVE(ep);
2045 2300 flk_update_proc_graph(ep, 1);
2046 2301 flk_free_edge(ep);
2047 2302 ep = request->l_sedge;
2048 2303 }
2049 2304
2050 2305
2051 2306 /*
2052 2307 * unset the RECOMPUTE flag in those vertices
|
↓ open down ↓ |
101 lines elided |
↑ open up ↑ |
2053 2308 */
2054 2309
2055 2310 for (i = 0; i < nvertex; i++) {
2056 2311 topology[i]->l_state &= ~RECOMPUTE_LOCK;
2057 2312 }
2058 2313
2059 2314 /*
2060 2315 * free the topology
2061 2316 */
2062 2317 if (nvertex)
2063 - kmem_free((void *)topology,
2318 + kmem_free(topology,
2064 2319 (nvertex * sizeof (lock_descriptor_t *)));
2065 2320 /*
2066 2321 * Possibility of some locks unblocked now
2067 2322 */
2068 2323
2069 2324 flk_wakeup(request, 0);
2070 2325
2071 2326 /*
2072 2327 * we expect to have a correctly recomputed graph now.
2073 2328 */
2074 2329 flk_set_state(request, FLK_DEAD_STATE);
2075 2330 flk_free_lock(request);
2076 2331 CHECK_SLEEPING_LOCKS(gp);
2077 2332 CHECK_ACTIVE_LOCKS(gp);
2078 2333
2079 2334 }
2080 2335
2081 2336 /*
2082 2337 * Uncoloring the graph is simply to increment the mark value of the graph
2083 2338 * And only when wrap round takes place will we color all vertices in
2084 2339 * the graph explicitly.
2085 2340 */
2086 2341
2087 2342 static void
2088 2343 flk_graph_uncolor(graph_t *gp)
2089 2344 {
2090 2345 lock_descriptor_t *lock;
2091 2346
2092 2347 if (gp->mark == UINT_MAX) {
2093 2348 gp->mark = 1;
2094 2349 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
2095 2350 lock = lock->l_next)
2096 2351 lock->l_color = 0;
2097 2352
2098 2353 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
2099 2354 lock = lock->l_next)
2100 2355 lock->l_color = 0;
2101 2356 } else {
2102 2357 gp->mark++;
2103 2358 }
2104 2359 }
2105 2360
2106 2361 /*
2107 2362 * Wake up locks that are blocked on the given lock.
2108 2363 */
2109 2364
2110 2365 static void
2111 2366 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
2112 2367 {
2113 2368 edge_t *ep;
2114 2369 graph_t *gp = lock->l_graph;
2115 2370 lock_descriptor_t *lck;
2116 2371
2117 2372 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2118 2373 if (NO_DEPENDENTS(lock))
2119 2374 return;
2120 2375 ep = FIRST_IN(lock);
2121 2376 do {
2122 2377 /*
2123 2378 * delete the edge from the adjacency list
2124 2379 * of from vertex. if no more adjacent edges
2125 2380 * for this vertex wake this process.
2126 2381 */
2127 2382 lck = ep->from_vertex;
2128 2383 if (adj_list_remove)
2129 2384 ADJ_LIST_REMOVE(ep);
2130 2385 flk_update_proc_graph(ep, 1);
2131 2386 if (NOT_BLOCKED(lck)) {
2132 2387 GRANT_WAKEUP(lck);
2133 2388 }
2134 2389 lock->l_sedge = NEXT_IN(ep);
2135 2390 IN_LIST_REMOVE(ep);
2136 2391 flk_free_edge(ep);
2137 2392 ep = lock->l_sedge;
2138 2393 } while (ep != HEAD(lock));
2139 2394 ASSERT(NO_DEPENDENTS(lock));
2140 2395 }
2141 2396
2142 2397 /*
2143 2398 * The dependents of request, is checked for its dependency against the
2144 2399 * locks in topology (called topology because the array is and should be in
2145 2400 * topological order for this algorithm, if not in topological order the
2146 2401 * inner loop below might add more edges than necessary. Topological ordering
2147 2402 * of vertices satisfies the property that all edges will be from left to
2148 2403 * right i.e., topology[i] can have an edge to topology[j], iff i<j)
2149 2404 * If lock l1 in the dependent set of request is dependent (blocked by)
2150 2405 * on lock l2 in topology but does not have a path to it, we add an edge
2151 2406 * in the inner loop below.
2152 2407 *
2153 2408 * We don't want to add an edge between l1 and l2 if there exists
2154 2409 * already a path from l1 to l2, so care has to be taken for those vertices
2155 2410 * that have two paths to 'request'. These vertices are referred to here
2156 2411 * as barrier locks.
2157 2412 *
2158 2413 * The barriers has to be found (those vertex that originally had two paths
2159 2414 * to request) because otherwise we may end up adding edges unnecessarily
2160 2415 * to vertices in topology, and thus barrier vertices can have an edge
2161 2416 * to a vertex in topology as well a path to it.
2162 2417 */
2163 2418
2164 2419 static void
2165 2420 flk_recompute_dependencies(lock_descriptor_t *request,
2166 2421 lock_descriptor_t **topology, int nvertex, int update_graph)
2167 2422 {
2168 2423 lock_descriptor_t *vertex, *lock;
2169 2424 graph_t *gp = request->l_graph;
2170 2425 int i, count;
2171 2426 int barrier_found = 0;
2172 2427 edge_t *ep;
2173 2428 lock_descriptor_t *vertex_stack;
2174 2429
2175 2430 STACK_INIT(vertex_stack);
2176 2431
2177 2432 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2178 2433 if (nvertex == 0)
2179 2434 return;
2180 2435 flk_graph_uncolor(request->l_graph);
2181 2436 barrier_found = flk_find_barriers(request);
2182 2437 request->l_state |= RECOMPUTE_DONE;
2183 2438
2184 2439 STACK_PUSH(vertex_stack, request, l_stack);
2185 2440 request->l_sedge = FIRST_IN(request);
2186 2441
2187 2442
2188 2443 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2189 2444 if (vertex->l_state & RECOMPUTE_DONE) {
2190 2445 count = 0;
2191 2446 goto next_in_edge;
2192 2447 }
2193 2448 if (IS_BARRIER(vertex)) {
2194 2449 /* decrement the barrier count */
2195 2450 if (vertex->l_index) {
2196 2451 vertex->l_index--;
2197 2452 /* this guy will be pushed again anyway ? */
2198 2453 STACK_POP(vertex_stack, l_stack);
2199 2454 if (vertex->l_index == 0) {
2200 2455 /*
2201 2456 * barrier is over we can recompute
2202 2457 * dependencies for this lock in the
2203 2458 * next stack pop
2204 2459 */
2205 2460 vertex->l_state &= ~BARRIER_LOCK;
2206 2461 }
2207 2462 continue;
2208 2463 }
2209 2464 }
2210 2465 vertex->l_state |= RECOMPUTE_DONE;
2211 2466 flk_graph_uncolor(gp);
2212 2467 count = flk_color_reachables(vertex);
2213 2468 for (i = 0; i < nvertex; i++) {
2214 2469 lock = topology[i];
2215 2470 if (COLORED(lock))
2216 2471 continue;
2217 2472 if (BLOCKS(lock, vertex)) {
2218 2473 (void) flk_add_edge(vertex, lock,
2219 2474 NO_CHECK_CYCLE, update_graph);
2220 2475 COLOR(lock);
2221 2476 count++;
2222 2477 count += flk_color_reachables(lock);
2223 2478 }
2224 2479
2225 2480 }
2226 2481
2227 2482 next_in_edge:
2228 2483 if (count == nvertex ||
2229 2484 vertex->l_sedge == HEAD(vertex)) {
2230 2485 /* prune the tree below this */
2231 2486 STACK_POP(vertex_stack, l_stack);
2232 2487 vertex->l_state &= ~RECOMPUTE_DONE;
2233 2488 /* update the barrier locks below this! */
2234 2489 if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
2235 2490 flk_graph_uncolor(gp);
2236 2491 flk_update_barriers(vertex);
2237 2492 }
2238 2493 continue;
2239 2494 }
2240 2495
2241 2496 ep = vertex->l_sedge;
2242 2497 lock = ep->from_vertex;
2243 2498 STACK_PUSH(vertex_stack, lock, l_stack);
2244 2499 lock->l_sedge = FIRST_IN(lock);
2245 2500 vertex->l_sedge = NEXT_IN(ep);
2246 2501 }
2247 2502
2248 2503 }
2249 2504
2250 2505 /*
2251 2506 * Color all reachable vertices from vertex that belongs to topology (here
2252 2507 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
2253 2508 *
2254 2509 * Note: we need to use a different stack_link l_stack1 because this is
2255 2510 * called from flk_recompute_dependencies() that already uses a stack with
2256 2511 * l_stack as stack_link.
2257 2512 */
2258 2513
2259 2514 static int
2260 2515 flk_color_reachables(lock_descriptor_t *vertex)
2261 2516 {
2262 2517 lock_descriptor_t *ver, *lock;
2263 2518 int count;
2264 2519 edge_t *ep;
2265 2520 lock_descriptor_t *vertex_stack;
2266 2521
2267 2522 STACK_INIT(vertex_stack);
2268 2523
2269 2524 STACK_PUSH(vertex_stack, vertex, l_stack1);
2270 2525 count = 0;
2271 2526 while ((ver = STACK_TOP(vertex_stack)) != NULL) {
2272 2527
2273 2528 STACK_POP(vertex_stack, l_stack1);
2274 2529 for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
2275 2530 ep = NEXT_ADJ(ep)) {
2276 2531 lock = ep->to_vertex;
2277 2532 if (COLORED(lock))
2278 2533 continue;
2279 2534 COLOR(lock);
2280 2535 if (IS_RECOMPUTE(lock))
2281 2536 count++;
2282 2537 STACK_PUSH(vertex_stack, lock, l_stack1);
2283 2538 }
2284 2539
2285 2540 }
2286 2541 return (count);
2287 2542 }
2288 2543
2289 2544 /*
2290 2545 * Called from flk_recompute_dependencies() this routine decrements
2291 2546 * the barrier count of barrier vertices that are reachable from lock.
2292 2547 */
2293 2548
2294 2549 static void
2295 2550 flk_update_barriers(lock_descriptor_t *lock)
2296 2551 {
2297 2552 lock_descriptor_t *vertex, *lck;
2298 2553 edge_t *ep;
2299 2554 lock_descriptor_t *vertex_stack;
2300 2555
2301 2556 STACK_INIT(vertex_stack);
2302 2557
2303 2558 STACK_PUSH(vertex_stack, lock, l_stack1);
2304 2559
2305 2560 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2306 2561 STACK_POP(vertex_stack, l_stack1);
2307 2562 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2308 2563 ep = NEXT_IN(ep)) {
2309 2564 lck = ep->from_vertex;
2310 2565 if (COLORED(lck)) {
2311 2566 if (IS_BARRIER(lck)) {
2312 2567 ASSERT(lck->l_index > 0);
2313 2568 lck->l_index--;
2314 2569 if (lck->l_index == 0)
2315 2570 lck->l_state &= ~BARRIER_LOCK;
2316 2571 }
2317 2572 continue;
2318 2573 }
2319 2574 COLOR(lck);
2320 2575 if (IS_BARRIER(lck)) {
2321 2576 ASSERT(lck->l_index > 0);
2322 2577 lck->l_index--;
2323 2578 if (lck->l_index == 0)
2324 2579 lck->l_state &= ~BARRIER_LOCK;
2325 2580 }
2326 2581 STACK_PUSH(vertex_stack, lck, l_stack1);
2327 2582 }
2328 2583 }
2329 2584 }
2330 2585
2331 2586 /*
2332 2587 * Finds all vertices that are reachable from 'lock' more than once and
2333 2588 * mark them as barrier vertices and increment their barrier count.
2334 2589 * The barrier count is one minus the total number of paths from lock
2335 2590 * to that vertex.
2336 2591 */
2337 2592
2338 2593 static int
2339 2594 flk_find_barriers(lock_descriptor_t *lock)
2340 2595 {
2341 2596 lock_descriptor_t *vertex, *lck;
2342 2597 int found = 0;
2343 2598 edge_t *ep;
2344 2599 lock_descriptor_t *vertex_stack;
2345 2600
2346 2601 STACK_INIT(vertex_stack);
2347 2602
2348 2603 STACK_PUSH(vertex_stack, lock, l_stack1);
2349 2604
2350 2605 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2351 2606 STACK_POP(vertex_stack, l_stack1);
2352 2607 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2353 2608 ep = NEXT_IN(ep)) {
2354 2609 lck = ep->from_vertex;
2355 2610 if (COLORED(lck)) {
2356 2611 /* this is a barrier */
2357 2612 lck->l_state |= BARRIER_LOCK;
2358 2613 /* index will have barrier count */
2359 2614 lck->l_index++;
2360 2615 if (!found)
2361 2616 found = 1;
2362 2617 continue;
2363 2618 }
2364 2619 COLOR(lck);
2365 2620 lck->l_index = 0;
2366 2621 STACK_PUSH(vertex_stack, lck, l_stack1);
2367 2622 }
2368 2623 }
2369 2624 return (found);
2370 2625 }
2371 2626
2372 2627 /*
2373 2628 * Finds the first lock that is mainly responsible for blocking this
2374 2629 * request. If there is no such lock, request->l_flock.l_type is set to
2375 2630 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars
2376 2631 * of the blocking lock.
2377 2632 *
2378 2633 * Note: It is possible a request is blocked by a sleeping lock because
2379 2634 * of the fairness policy used in flk_process_request() to construct the
2380 2635 * dependencies. (see comments before flk_process_request()).
2381 2636 */
2382 2637
2383 2638 static void
2384 2639 flk_get_first_blocking_lock(lock_descriptor_t *request)
2385 2640 {
2386 2641 graph_t *gp = request->l_graph;
2387 2642 vnode_t *vp = request->l_vnode;
2388 2643 lock_descriptor_t *lock, *blocker;
2389 2644
2390 2645 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2391 2646 blocker = NULL;
2392 2647 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2393 2648
2394 2649 if (lock) {
2395 2650 do {
2396 2651 if (BLOCKS(lock, request)) {
2397 2652 blocker = lock;
2398 2653 break;
2399 2654 }
2400 2655 lock = lock->l_next;
2401 2656 } while (lock->l_vnode == vp);
2402 2657 }
2403 2658
2404 2659 if (blocker == NULL && request->l_flock.l_type == F_RDLCK) {
2405 2660 /*
2406 2661 * No active lock is blocking this request, but if a read
2407 2662 * lock is requested, it may also get blocked by a waiting
2408 2663 * writer. So search all sleeping locks and see if there is
2409 2664 * a writer waiting.
2410 2665 */
2411 2666 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2412 2667 if (lock) {
2413 2668 do {
2414 2669 if (BLOCKS(lock, request)) {
2415 2670 blocker = lock;
2416 2671 break;
2417 2672 }
2418 2673 lock = lock->l_next;
2419 2674 } while (lock->l_vnode == vp);
2420 2675 }
2421 2676 }
2422 2677
2423 2678 if (blocker) {
2424 2679 report_blocker(blocker, request);
2425 2680 } else
2426 2681 request->l_flock.l_type = F_UNLCK;
2427 2682 }
2428 2683
2429 2684 /*
2430 2685 * Get the graph_t structure associated with a vnode.
2431 2686 * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2432 2687 * not yet been initialized, then a new element is allocated and returned.
2433 2688 */
2434 2689 graph_t *
2435 2690 flk_get_lock_graph(vnode_t *vp, int initialize)
2436 2691 {
2437 2692 graph_t *gp;
2438 2693 graph_t *gp_alloc = NULL;
2439 2694 int index = HASH_INDEX(vp);
2440 2695
2441 2696 if (initialize == FLK_USE_GRAPH) {
2442 2697 mutex_enter(&flock_lock);
2443 2698 gp = lock_graph[index];
2444 2699 mutex_exit(&flock_lock);
2445 2700 return (gp);
2446 2701 }
2447 2702
2448 2703 ASSERT(initialize == FLK_INIT_GRAPH);
2449 2704
2450 2705 if (lock_graph[index] == NULL) {
2451 2706
2452 2707 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2453 2708
2454 2709 /* Initialize the graph */
2455 2710
2456 2711 gp_alloc->active_locks.l_next =
2457 2712 gp_alloc->active_locks.l_prev =
2458 2713 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2459 2714 gp_alloc->sleeping_locks.l_next =
2460 2715 gp_alloc->sleeping_locks.l_prev =
2461 2716 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2462 2717 gp_alloc->index = index;
2463 2718 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2464 2719 }
2465 2720
2466 2721 mutex_enter(&flock_lock);
2467 2722
2468 2723 gp = lock_graph[index];
2469 2724
2470 2725 /* Recheck the value within flock_lock */
2471 2726 if (gp == NULL) {
2472 2727 struct flock_globals *fg;
2473 2728
2474 2729 /* We must have previously allocated the graph_t structure */
2475 2730 ASSERT(gp_alloc != NULL);
2476 2731 lock_graph[index] = gp = gp_alloc;
2477 2732 /*
2478 2733 * The lockmgr status is only needed if KLM is loaded.
2479 2734 */
2480 2735 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2481 2736 fg = flk_get_globals();
2482 2737 fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2483 2738 }
2484 2739 }
2485 2740
2486 2741 mutex_exit(&flock_lock);
2487 2742
2488 2743 if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2489 2744 /* There was a race to allocate the graph_t and we lost */
2490 2745 mutex_destroy(&gp_alloc->gp_mutex);
2491 2746 kmem_free(gp_alloc, sizeof (graph_t));
2492 2747 }
2493 2748
2494 2749 return (gp);
2495 2750 }
2496 2751
2497 2752 /*
2498 2753 * PSARC case 1997/292
2499 2754 */
2500 2755 int
2501 2756 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
2502 2757 {
2503 2758 lock_descriptor_t *lock;
2504 2759 int result = 0;
2505 2760 graph_t *gp;
2506 2761 int lock_nlmid;
2507 2762
2508 2763 /*
2509 2764 * Check to see if node is booted as a cluster. If not, return.
2510 2765 */
2511 2766 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2512 2767 return (0);
2513 2768 }
2514 2769
2515 2770 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2516 2771 if (gp == NULL) {
2517 2772 return (0);
2518 2773 }
2519 2774
2520 2775 mutex_enter(&gp->gp_mutex);
2521 2776
2522 2777 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2523 2778
2524 2779 if (lock) {
2525 2780 while (lock->l_vnode == vp) {
2526 2781 /* get NLM id from sysid */
2527 2782 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2528 2783
2529 2784 /*
2530 2785 * If NLM server request _and_ nlmid of lock matches
2531 2786 * nlmid of argument, then we've found a remote lock.
2532 2787 */
2533 2788 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2534 2789 result = 1;
2535 2790 goto done;
2536 2791 }
2537 2792 lock = lock->l_next;
2538 2793 }
2539 2794 }
2540 2795
2541 2796 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2542 2797
2543 2798 if (lock) {
2544 2799 while (lock->l_vnode == vp) {
2545 2800 /* get NLM id from sysid */
2546 2801 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
2547 2802
2548 2803 /*
2549 2804 * If NLM server request _and_ nlmid of lock matches
2550 2805 * nlmid of argument, then we've found a remote lock.
2551 2806 */
2552 2807 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
2553 2808 result = 1;
2554 2809 goto done;
2555 2810 }
2556 2811 lock = lock->l_next;
2557 2812 }
2558 2813 }
2559 2814
2560 2815 done:
2561 2816 mutex_exit(&gp->gp_mutex);
2562 2817 return (result);
2563 2818 }
2564 2819
2565 2820 /*
2566 2821 * Determine whether there are any locks for the given vnode with a remote
2567 2822 * sysid. Returns zero if not, non-zero if there are.
2568 2823 *
2569 2824 * Note that the return value from this function is potentially invalid
2570 2825 * once it has been returned. The caller is responsible for providing its
2571 2826 * own synchronization mechanism to ensure that the return value is useful
2572 2827 * (e.g., see nfs_lockcompletion()).
2573 2828 */
2574 2829 int
2575 2830 flk_has_remote_locks(vnode_t *vp)
2576 2831 {
2577 2832 lock_descriptor_t *lock;
2578 2833 int result = 0;
2579 2834 graph_t *gp;
2580 2835
2581 2836 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2582 2837 if (gp == NULL) {
2583 2838 return (0);
2584 2839 }
2585 2840
2586 2841 mutex_enter(&gp->gp_mutex);
2587 2842
2588 2843 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2589 2844
2590 2845 if (lock) {
2591 2846 while (lock->l_vnode == vp) {
2592 2847 if (IS_REMOTE(lock)) {
2593 2848 result = 1;
2594 2849 goto done;
2595 2850 }
2596 2851 lock = lock->l_next;
2597 2852 }
2598 2853 }
2599 2854
2600 2855 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2601 2856
2602 2857 if (lock) {
2603 2858 while (lock->l_vnode == vp) {
2604 2859 if (IS_REMOTE(lock)) {
2605 2860 result = 1;
2606 2861 goto done;
2607 2862 }
2608 2863 lock = lock->l_next;
2609 2864 }
2610 2865 }
2611 2866
2612 2867 done:
2613 2868 mutex_exit(&gp->gp_mutex);
2614 2869 return (result);
2615 2870 }
2616 2871
2617 2872 /*
2618 2873 * Determine whether there are any locks for the given vnode with a remote
2619 2874 * sysid matching given sysid.
2620 2875 * Used by the new (open source) NFS Lock Manager (NLM)
2621 2876 */
2622 2877 int
2623 2878 flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
2624 2879 {
2625 2880 lock_descriptor_t *lock;
2626 2881 int result = 0;
2627 2882 graph_t *gp;
2628 2883
2629 2884 if (sysid == 0)
2630 2885 return (0);
2631 2886
2632 2887 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2633 2888 if (gp == NULL) {
2634 2889 return (0);
2635 2890 }
2636 2891
2637 2892 mutex_enter(&gp->gp_mutex);
2638 2893
2639 2894 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2640 2895
2641 2896 if (lock) {
2642 2897 while (lock->l_vnode == vp) {
2643 2898 if (lock->l_flock.l_sysid == sysid) {
2644 2899 result = 1;
2645 2900 goto done;
2646 2901 }
2647 2902 lock = lock->l_next;
2648 2903 }
2649 2904 }
2650 2905
2651 2906 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2652 2907
2653 2908 if (lock) {
2654 2909 while (lock->l_vnode == vp) {
2655 2910 if (lock->l_flock.l_sysid == sysid) {
2656 2911 result = 1;
2657 2912 goto done;
2658 2913 }
2659 2914 lock = lock->l_next;
2660 2915 }
2661 2916 }
2662 2917
2663 2918 done:
2664 2919 mutex_exit(&gp->gp_mutex);
2665 2920 return (result);
2666 2921 }
2667 2922
2668 2923 /*
2669 2924 * Determine if there are any locks owned by the given sysid.
2670 2925 * Returns zero if not, non-zero if there are. Note that this return code
2671 2926 * could be derived from flk_get_{sleeping,active}_locks, but this routine
2672 2927 * avoids all the memory allocations of those routines.
2673 2928 *
2674 2929 * This routine has the same synchronization issues as
2675 2930 * flk_has_remote_locks.
2676 2931 */
2677 2932
2678 2933 int
2679 2934 flk_sysid_has_locks(int sysid, int lck_type)
2680 2935 {
2681 2936 int has_locks = 0;
2682 2937 lock_descriptor_t *lock;
2683 2938 graph_t *gp;
2684 2939 int i;
2685 2940
2686 2941 for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2687 2942 mutex_enter(&flock_lock);
2688 2943 gp = lock_graph[i];
2689 2944 mutex_exit(&flock_lock);
2690 2945 if (gp == NULL) {
2691 2946 continue;
2692 2947 }
2693 2948
2694 2949 mutex_enter(&gp->gp_mutex);
2695 2950
2696 2951 if (lck_type & FLK_QUERY_ACTIVE) {
2697 2952 for (lock = ACTIVE_HEAD(gp)->l_next;
2698 2953 lock != ACTIVE_HEAD(gp) && !has_locks;
2699 2954 lock = lock->l_next) {
2700 2955 if (lock->l_flock.l_sysid == sysid)
2701 2956 has_locks = 1;
2702 2957 }
2703 2958 }
2704 2959
2705 2960 if (lck_type & FLK_QUERY_SLEEPING) {
2706 2961 for (lock = SLEEPING_HEAD(gp)->l_next;
2707 2962 lock != SLEEPING_HEAD(gp) && !has_locks;
2708 2963 lock = lock->l_next) {
2709 2964 if (lock->l_flock.l_sysid == sysid)
2710 2965 has_locks = 1;
2711 2966 }
2712 2967 }
2713 2968 mutex_exit(&gp->gp_mutex);
2714 2969 }
2715 2970
2716 2971 return (has_locks);
2717 2972 }
2718 2973
2719 2974
2720 2975 /*
2721 2976 * PSARC case 1997/292
2722 2977 *
2723 2978 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit
2724 2979 * quantity, the real sysid generated by the NLM server; the upper half
2725 2980 * identifies the node of the cluster where the NLM server ran.
2726 2981 * This routine is only called by an NLM server running in a cluster.
2727 2982 * Effects: Remove all locks held on behalf of the client identified
2728 2983 * by "sysid."
2729 2984 */
2730 2985 void
2731 2986 cl_flk_remove_locks_by_sysid(int sysid)
2732 2987 {
2733 2988 graph_t *gp;
2734 2989 int i;
2735 2990 lock_descriptor_t *lock, *nlock;
2736 2991
2737 2992 /*
2738 2993 * Check to see if node is booted as a cluster. If not, return.
2739 2994 */
2740 2995 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
2741 2996 return;
2742 2997 }
2743 2998
2744 2999 ASSERT(sysid != 0);
2745 3000 for (i = 0; i < HASH_SIZE; i++) {
2746 3001 mutex_enter(&flock_lock);
2747 3002 gp = lock_graph[i];
2748 3003 mutex_exit(&flock_lock);
2749 3004
2750 3005 if (gp == NULL)
2751 3006 continue;
2752 3007
2753 3008 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */
2754 3009
2755 3010 /* signal sleeping requests so that they bail out */
2756 3011 lock = SLEEPING_HEAD(gp)->l_next;
2757 3012 while (lock != SLEEPING_HEAD(gp)) {
2758 3013 nlock = lock->l_next;
2759 3014 if (lock->l_flock.l_sysid == sysid) {
2760 3015 INTERRUPT_WAKEUP(lock);
2761 3016 }
2762 3017 lock = nlock;
2763 3018 }
2764 3019
2765 3020 /* delete active locks */
2766 3021 lock = ACTIVE_HEAD(gp)->l_next;
2767 3022 while (lock != ACTIVE_HEAD(gp)) {
2768 3023 nlock = lock->l_next;
2769 3024 if (lock->l_flock.l_sysid == sysid) {
2770 3025 flk_delete_active_lock(lock, 0);
2771 3026 flk_wakeup(lock, 1);
2772 3027 flk_free_lock(lock);
2773 3028 }
2774 3029 lock = nlock;
2775 3030 }
2776 3031 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */
2777 3032 }
2778 3033 }
2779 3034
2780 3035 /*
2781 3036 * Delete all locks in the system that belongs to the sysid of the request.
2782 3037 */
2783 3038
2784 3039 static void
2785 3040 flk_delete_locks_by_sysid(lock_descriptor_t *request)
2786 3041 {
2787 3042 int sysid = request->l_flock.l_sysid;
2788 3043 lock_descriptor_t *lock, *nlock;
2789 3044 graph_t *gp;
2790 3045 int i;
2791 3046
2792 3047 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2793 3048 ASSERT(sysid != 0);
2794 3049
2795 3050 mutex_exit(&request->l_graph->gp_mutex);
2796 3051
2797 3052 for (i = 0; i < HASH_SIZE; i++) {
2798 3053 mutex_enter(&flock_lock);
2799 3054 gp = lock_graph[i];
2800 3055 mutex_exit(&flock_lock);
2801 3056
2802 3057 if (gp == NULL)
2803 3058 continue;
2804 3059
2805 3060 mutex_enter(&gp->gp_mutex);
2806 3061
2807 3062 /* signal sleeping requests so that they bail out */
2808 3063 lock = SLEEPING_HEAD(gp)->l_next;
2809 3064 while (lock != SLEEPING_HEAD(gp)) {
2810 3065 nlock = lock->l_next;
2811 3066 if (lock->l_flock.l_sysid == sysid) {
2812 3067 INTERRUPT_WAKEUP(lock);
2813 3068 }
2814 3069 lock = nlock;
2815 3070 }
2816 3071
2817 3072 /* delete active locks */
2818 3073 lock = ACTIVE_HEAD(gp)->l_next;
2819 3074 while (lock != ACTIVE_HEAD(gp)) {
2820 3075 nlock = lock->l_next;
2821 3076 if (lock->l_flock.l_sysid == sysid) {
2822 3077 flk_delete_active_lock(lock, 0);
2823 3078 flk_wakeup(lock, 1);
2824 3079 flk_free_lock(lock);
2825 3080 }
2826 3081 lock = nlock;
2827 3082 }
2828 3083 mutex_exit(&gp->gp_mutex);
2829 3084 }
2830 3085
2831 3086 mutex_enter(&request->l_graph->gp_mutex);
2832 3087 }
2833 3088
2834 3089 /*
2835 3090 * Clustering: Deletes PXFS locks
2836 3091 * Effects: Delete all locks on files in the given file system and with the
2837 3092 * given PXFS id.
2838 3093 */
2839 3094 void
2840 3095 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
2841 3096 {
2842 3097 lock_descriptor_t *lock, *nlock;
2843 3098 graph_t *gp;
2844 3099 int i;
2845 3100
2846 3101 for (i = 0; i < HASH_SIZE; i++) {
2847 3102 mutex_enter(&flock_lock);
2848 3103 gp = lock_graph[i];
2849 3104 mutex_exit(&flock_lock);
2850 3105
2851 3106 if (gp == NULL)
2852 3107 continue;
2853 3108
2854 3109 mutex_enter(&gp->gp_mutex);
2855 3110
2856 3111 /* signal sleeping requests so that they bail out */
2857 3112 lock = SLEEPING_HEAD(gp)->l_next;
2858 3113 while (lock != SLEEPING_HEAD(gp)) {
2859 3114 nlock = lock->l_next;
2860 3115 if (lock->l_vnode->v_vfsp == vfsp) {
2861 3116 ASSERT(IS_PXFS(lock));
2862 3117 if (GETPXFSID(lock->l_flock.l_sysid) ==
2863 3118 pxfsid) {
2864 3119 flk_set_state(lock,
2865 3120 FLK_CANCELLED_STATE);
2866 3121 flk_cancel_sleeping_lock(lock, 1);
2867 3122 }
2868 3123 }
2869 3124 lock = nlock;
2870 3125 }
2871 3126
2872 3127 /* delete active locks */
2873 3128 lock = ACTIVE_HEAD(gp)->l_next;
2874 3129 while (lock != ACTIVE_HEAD(gp)) {
2875 3130 nlock = lock->l_next;
2876 3131 if (lock->l_vnode->v_vfsp == vfsp) {
2877 3132 ASSERT(IS_PXFS(lock));
2878 3133 if (GETPXFSID(lock->l_flock.l_sysid) ==
2879 3134 pxfsid) {
2880 3135 flk_delete_active_lock(lock, 0);
2881 3136 flk_wakeup(lock, 1);
2882 3137 flk_free_lock(lock);
2883 3138 }
2884 3139 }
2885 3140 lock = nlock;
2886 3141 }
2887 3142 mutex_exit(&gp->gp_mutex);
2888 3143 }
2889 3144 }
2890 3145
2891 3146 /*
2892 3147 * Search for a sleeping lock manager lock which matches exactly this lock
2893 3148 * request; if one is found, fake a signal to cancel it.
2894 3149 *
2895 3150 * Return 1 if a matching lock was found, 0 otherwise.
2896 3151 */
2897 3152
2898 3153 static int
2899 3154 flk_canceled(lock_descriptor_t *request)
2900 3155 {
2901 3156 lock_descriptor_t *lock, *nlock;
2902 3157 graph_t *gp = request->l_graph;
2903 3158 vnode_t *vp = request->l_vnode;
2904 3159
2905 3160 ASSERT(MUTEX_HELD(&gp->gp_mutex));
2906 3161 ASSERT(IS_LOCKMGR(request));
2907 3162 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2908 3163
2909 3164 if (lock) {
2910 3165 while (lock->l_vnode == vp) {
2911 3166 nlock = lock->l_next;
2912 3167 if (SAME_OWNER(lock, request) &&
2913 3168 lock->l_start == request->l_start &&
2914 3169 lock->l_end == request->l_end) {
2915 3170 INTERRUPT_WAKEUP(lock);
2916 3171 return (1);
2917 3172 }
2918 3173 lock = nlock;
2919 3174 }
2920 3175 }
2921 3176 return (0);
2922 3177 }
2923 3178
2924 3179 /*
2925 3180 * Remove all non-OFD locks for the vnode belonging to the given pid and sysid.
2926 3181 * That is, since OFD locks are pid-less we'll never match on the incoming
2927 3182 * pid. OFD locks are removed earlier in the close() path via closef() and
2928 3183 * ofdcleanlock().
2929 3184 */
2930 3185 void
2931 3186 cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2932 3187 {
2933 3188 graph_t *gp;
2934 3189 lock_descriptor_t *lock, *nlock;
2935 3190 lock_descriptor_t *link_stack;
2936 3191
2937 3192 STACK_INIT(link_stack);
2938 3193
2939 3194 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2940 3195
2941 3196 if (gp == NULL)
2942 3197 return;
2943 3198 mutex_enter(&gp->gp_mutex);
2944 3199
2945 3200 CHECK_SLEEPING_LOCKS(gp);
2946 3201 CHECK_ACTIVE_LOCKS(gp);
2947 3202
2948 3203 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2949 3204
2950 3205 if (lock) {
2951 3206 do {
2952 3207 nlock = lock->l_next;
2953 3208 if ((lock->l_flock.l_pid == pid ||
2954 3209 pid == IGN_PID) &&
2955 3210 lock->l_flock.l_sysid == sysid) {
2956 3211 CANCEL_WAKEUP(lock);
2957 3212 }
2958 3213 lock = nlock;
2959 3214 } while (lock->l_vnode == vp);
2960 3215 }
2961 3216
2962 3217 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2963 3218
2964 3219 if (lock) {
2965 3220 do {
2966 3221 nlock = lock->l_next;
2967 3222 if ((lock->l_flock.l_pid == pid ||
2968 3223 pid == IGN_PID) &&
2969 3224 lock->l_flock.l_sysid == sysid) {
2970 3225 flk_delete_active_lock(lock, 0);
2971 3226 STACK_PUSH(link_stack, lock, l_stack);
2972 3227 }
2973 3228 lock = nlock;
2974 3229 } while (lock->l_vnode == vp);
2975 3230 }
2976 3231
2977 3232 while ((lock = STACK_TOP(link_stack)) != NULL) {
2978 3233 STACK_POP(link_stack, l_stack);
2979 3234 flk_wakeup(lock, 1);
2980 3235 flk_free_lock(lock);
2981 3236 }
2982 3237
2983 3238 CHECK_SLEEPING_LOCKS(gp);
2984 3239 CHECK_ACTIVE_LOCKS(gp);
2985 3240 CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2986 3241 mutex_exit(&gp->gp_mutex);
2987 3242 }
2988 3243
|
↓ open down ↓ |
915 lines elided |
↑ open up ↑ |
2989 3244
2990 3245 /*
2991 3246 * Called from 'fs' read and write routines for files that have mandatory
2992 3247 * locking enabled.
2993 3248 */
2994 3249
2995 3250 int
2996 3251 chklock(struct vnode *vp, int iomode, u_offset_t offset, ssize_t len, int fmode,
2997 3252 caller_context_t *ct)
2998 3253 {
2999 - register int i;
3254 + int i;
3000 3255 struct flock64 bf;
3001 3256 int error = 0;
3002 3257
3003 3258 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
3004 3259 bf.l_whence = 0;
3005 3260 bf.l_start = offset;
3006 3261 bf.l_len = len;
3007 3262 if (ct == NULL) {
3008 3263 bf.l_pid = curproc->p_pid;
3009 3264 bf.l_sysid = 0;
3010 3265 } else {
3011 3266 bf.l_pid = ct->cc_pid;
3012 3267 bf.l_sysid = ct->cc_sysid;
3013 3268 }
3014 3269 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
3015 3270 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
3016 3271 bf.l_type != F_UNLCK)
3017 3272 error = i ? i : EAGAIN;
3018 3273 return (error);
3019 3274 }
3020 3275
3021 3276 /*
3022 3277 * convoff - converts the given data (start, whence) to the
3023 3278 * given whence.
3024 3279 */
3025 3280 int
3026 3281 convoff(struct vnode *vp, struct flock64 *lckdat, int whence, offset_t offset)
3027 3282 {
3028 3283 int error;
3029 3284 struct vattr vattr;
3030 3285
3031 3286 if ((lckdat->l_whence == 2) || (whence == 2)) {
3032 3287 vattr.va_mask = AT_SIZE;
3033 3288 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
3034 3289 return (error);
3035 3290 }
3036 3291
3037 3292 switch (lckdat->l_whence) {
3038 3293 case 1:
3039 3294 lckdat->l_start += offset;
3040 3295 break;
3041 3296 case 2:
3042 3297 lckdat->l_start += vattr.va_size;
3043 3298 /* FALLTHRU */
3044 3299 case 0:
3045 3300 break;
3046 3301 default:
3047 3302 return (EINVAL);
3048 3303 }
3049 3304
3050 3305 if (lckdat->l_start < 0)
3051 3306 return (EINVAL);
3052 3307
3053 3308 switch (whence) {
3054 3309 case 1:
3055 3310 lckdat->l_start -= offset;
3056 3311 break;
3057 3312 case 2:
3058 3313 lckdat->l_start -= vattr.va_size;
3059 3314 /* FALLTHRU */
3060 3315 case 0:
3061 3316 break;
3062 3317 default:
3063 3318 return (EINVAL);
3064 3319 }
3065 3320
3066 3321 lckdat->l_whence = (short)whence;
3067 3322 return (0);
3068 3323 }
3069 3324
3070 3325
3071 3326 /* proc_graph function definitions */
3072 3327
3073 3328 /*
3074 3329 * Function checks for deadlock due to the new 'lock'. If deadlock found
3075 3330 * edges of this lock are freed and returned.
3076 3331 */
3077 3332
3078 3333 static int
3079 3334 flk_check_deadlock(lock_descriptor_t *lock)
3080 3335 {
3081 3336 proc_vertex_t *start_vertex, *pvertex;
3082 3337 proc_vertex_t *dvertex;
3083 3338 proc_edge_t *pep, *ppep;
3084 3339 edge_t *ep, *nep;
3085 3340 proc_vertex_t *process_stack;
3086 3341
3087 3342 /*
3088 3343 * OFD style locks are not associated with any process so there is
3089 3344 * no proc graph for these. Thus we cannot, and do not, do deadlock
3090 3345 * detection.
3091 3346 */
3092 3347 if (lock->l_ofd != NULL)
3093 3348 return (0);
3094 3349
3095 3350 STACK_INIT(process_stack);
3096 3351
3097 3352 mutex_enter(&flock_lock);
3098 3353 start_vertex = flk_get_proc_vertex(lock);
3099 3354 ASSERT(start_vertex != NULL);
3100 3355
3101 3356 /* construct the edges from this process to other processes */
3102 3357
3103 3358 ep = FIRST_ADJ(lock);
3104 3359 while (ep != HEAD(lock)) {
3105 3360 proc_vertex_t *adj_proc;
3106 3361
3107 3362 adj_proc = flk_get_proc_vertex(ep->to_vertex);
3108 3363 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
3109 3364 if (pep->to_proc == adj_proc) {
3110 3365 ASSERT(pep->refcount);
3111 3366 pep->refcount++;
3112 3367 break;
3113 3368 }
3114 3369 }
3115 3370 if (pep == NULL) {
3116 3371 pep = flk_get_proc_edge();
3117 3372 pep->to_proc = adj_proc;
3118 3373 pep->refcount = 1;
3119 3374 adj_proc->incount++;
3120 3375 pep->next = start_vertex->edge;
3121 3376 start_vertex->edge = pep;
3122 3377 }
3123 3378 ep = NEXT_ADJ(ep);
3124 3379 }
3125 3380
3126 3381 ep = FIRST_IN(lock);
3127 3382
3128 3383 while (ep != HEAD(lock)) {
3129 3384 proc_vertex_t *in_proc;
3130 3385
3131 3386 in_proc = flk_get_proc_vertex(ep->from_vertex);
3132 3387
3133 3388 for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
3134 3389 if (pep->to_proc == start_vertex) {
3135 3390 ASSERT(pep->refcount);
3136 3391 pep->refcount++;
3137 3392 break;
3138 3393 }
3139 3394 }
3140 3395 if (pep == NULL) {
3141 3396 pep = flk_get_proc_edge();
3142 3397 pep->to_proc = start_vertex;
3143 3398 pep->refcount = 1;
3144 3399 start_vertex->incount++;
3145 3400 pep->next = in_proc->edge;
3146 3401 in_proc->edge = pep;
3147 3402 }
3148 3403 ep = NEXT_IN(ep);
3149 3404 }
3150 3405
3151 3406 if (start_vertex->incount == 0) {
3152 3407 mutex_exit(&flock_lock);
3153 3408 return (0);
3154 3409 }
3155 3410
3156 3411 flk_proc_graph_uncolor();
3157 3412
3158 3413 start_vertex->p_sedge = start_vertex->edge;
3159 3414
3160 3415 STACK_PUSH(process_stack, start_vertex, p_stack);
3161 3416
3162 3417 while ((pvertex = STACK_TOP(process_stack)) != NULL) {
3163 3418 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
3164 3419 dvertex = pep->to_proc;
3165 3420 if (!PROC_ARRIVED(dvertex)) {
3166 3421 STACK_PUSH(process_stack, dvertex, p_stack);
3167 3422 dvertex->p_sedge = dvertex->edge;
3168 3423 PROC_ARRIVE(pvertex);
3169 3424 pvertex->p_sedge = pep->next;
3170 3425 break;
3171 3426 }
3172 3427 if (!PROC_DEPARTED(dvertex))
3173 3428 goto deadlock;
3174 3429 }
3175 3430 if (pep == NULL) {
3176 3431 PROC_DEPART(pvertex);
3177 3432 STACK_POP(process_stack, p_stack);
3178 3433 }
3179 3434 }
3180 3435 mutex_exit(&flock_lock);
3181 3436 return (0);
3182 3437
3183 3438 deadlock:
3184 3439
3185 3440 /* we remove all lock edges and proc edges */
3186 3441
3187 3442 ep = FIRST_ADJ(lock);
3188 3443 while (ep != HEAD(lock)) {
3189 3444 proc_vertex_t *adj_proc;
3190 3445 adj_proc = flk_get_proc_vertex(ep->to_vertex);
3191 3446 nep = NEXT_ADJ(ep);
3192 3447 IN_LIST_REMOVE(ep);
3193 3448 ADJ_LIST_REMOVE(ep);
3194 3449 flk_free_edge(ep);
3195 3450 ppep = start_vertex->edge;
3196 3451 for (pep = start_vertex->edge; pep != NULL; ppep = pep,
3197 3452 pep = ppep->next) {
3198 3453 if (pep->to_proc == adj_proc) {
3199 3454 pep->refcount--;
3200 3455 if (pep->refcount == 0) {
3201 3456 if (pep == ppep) {
3202 3457 start_vertex->edge = pep->next;
3203 3458 } else {
3204 3459 ppep->next = pep->next;
3205 3460 }
3206 3461 adj_proc->incount--;
3207 3462 flk_proc_release(adj_proc);
3208 3463 flk_free_proc_edge(pep);
3209 3464 }
3210 3465 break;
3211 3466 }
3212 3467 }
3213 3468 ep = nep;
3214 3469 }
3215 3470 ep = FIRST_IN(lock);
3216 3471 while (ep != HEAD(lock)) {
3217 3472 proc_vertex_t *in_proc;
3218 3473 in_proc = flk_get_proc_vertex(ep->from_vertex);
3219 3474 nep = NEXT_IN(ep);
3220 3475 IN_LIST_REMOVE(ep);
3221 3476 ADJ_LIST_REMOVE(ep);
3222 3477 flk_free_edge(ep);
3223 3478 ppep = in_proc->edge;
3224 3479 for (pep = in_proc->edge; pep != NULL; ppep = pep,
3225 3480 pep = ppep->next) {
3226 3481 if (pep->to_proc == start_vertex) {
3227 3482 pep->refcount--;
3228 3483 if (pep->refcount == 0) {
3229 3484 if (pep == ppep) {
3230 3485 in_proc->edge = pep->next;
3231 3486 } else {
3232 3487 ppep->next = pep->next;
3233 3488 }
3234 3489 start_vertex->incount--;
3235 3490 flk_proc_release(in_proc);
3236 3491 flk_free_proc_edge(pep);
3237 3492 }
3238 3493 break;
3239 3494 }
3240 3495 }
3241 3496 ep = nep;
3242 3497 }
3243 3498 flk_proc_release(start_vertex);
3244 3499 mutex_exit(&flock_lock);
3245 3500 return (1);
3246 3501 }
3247 3502
3248 3503 /*
3249 3504 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
3250 3505 * from the list we return that, otherwise we allocate one. If necessary,
3251 3506 * we grow the list of vertices also.
3252 3507 */
3253 3508
3254 3509 static proc_vertex_t *
3255 3510 flk_get_proc_vertex(lock_descriptor_t *lock)
3256 3511 {
3257 3512 int i;
3258 3513 proc_vertex_t *pv;
3259 3514 proc_vertex_t **palloc;
3260 3515
3261 3516 ASSERT(MUTEX_HELD(&flock_lock));
3262 3517 if (lock->pvertex != -1) {
3263 3518 ASSERT(lock->pvertex >= 0);
3264 3519 pv = pgraph.proc[lock->pvertex];
3265 3520 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
3266 3521 return (pv);
3267 3522 }
3268 3523 }
3269 3524 for (i = 0; i < pgraph.gcount; i++) {
3270 3525 pv = pgraph.proc[i];
3271 3526 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
3272 3527 lock->pvertex = pv->index = i;
3273 3528 return (pv);
3274 3529 }
3275 3530 }
3276 3531 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
3277 3532 pv->pid = lock->l_flock.l_pid;
3278 3533 pv->sysid = lock->l_flock.l_sysid;
3279 3534 flk_proc_vertex_allocs++;
3280 3535 if (pgraph.free != 0) {
3281 3536 for (i = 0; i < pgraph.gcount; i++) {
3282 3537 if (pgraph.proc[i] == NULL) {
3283 3538 pgraph.proc[i] = pv;
3284 3539 lock->pvertex = pv->index = i;
3285 3540 pgraph.free--;
3286 3541 return (pv);
3287 3542 }
3288 3543 }
3289 3544 }
3290 3545 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
3291 3546 sizeof (proc_vertex_t *), KM_SLEEP);
3292 3547
3293 3548 if (pgraph.proc) {
3294 3549 bcopy(pgraph.proc, palloc,
3295 3550 pgraph.gcount * sizeof (proc_vertex_t *));
3296 3551
3297 3552 kmem_free(pgraph.proc,
3298 3553 pgraph.gcount * sizeof (proc_vertex_t *));
3299 3554 }
3300 3555 pgraph.proc = palloc;
3301 3556 pgraph.free += (PROC_CHUNK - 1);
3302 3557 pv->index = lock->pvertex = pgraph.gcount;
3303 3558 pgraph.gcount += PROC_CHUNK;
3304 3559 pgraph.proc[pv->index] = pv;
3305 3560 return (pv);
3306 3561 }
3307 3562
3308 3563 /*
3309 3564 * Allocate a proc edge.
3310 3565 */
3311 3566
3312 3567 static proc_edge_t *
3313 3568 flk_get_proc_edge()
3314 3569 {
3315 3570 proc_edge_t *pep;
3316 3571
3317 3572 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
3318 3573 flk_proc_edge_allocs++;
3319 3574 return (pep);
|
↓ open down ↓ |
310 lines elided |
↑ open up ↑ |
3320 3575 }
3321 3576
3322 3577 /*
3323 3578 * Free the proc edge. Called whenever its reference count goes to zero.
3324 3579 */
3325 3580
3326 3581 static void
3327 3582 flk_free_proc_edge(proc_edge_t *pep)
3328 3583 {
3329 3584 ASSERT(pep->refcount == 0);
3330 - kmem_free((void *)pep, sizeof (proc_edge_t));
3585 + kmem_free(pep, sizeof (proc_edge_t));
3331 3586 flk_proc_edge_frees++;
3332 3587 }
3333 3588
3334 3589 /*
3335 3590 * Color the graph explicitly done only when the mark value hits max value.
3336 3591 */
3337 3592
3338 3593 static void
3339 3594 flk_proc_graph_uncolor()
3340 3595 {
3341 3596 int i;
3342 3597
3343 3598 if (pgraph.mark == UINT_MAX) {
3344 3599 for (i = 0; i < pgraph.gcount; i++)
3345 3600 if (pgraph.proc[i] != NULL) {
3346 3601 pgraph.proc[i]->atime = 0;
3347 3602 pgraph.proc[i]->dtime = 0;
3348 3603 }
3349 3604 pgraph.mark = 1;
3350 3605 } else {
3351 3606 pgraph.mark++;
3352 3607 }
3353 3608 }
3354 3609
3355 3610 /*
3356 3611 * Release the proc vertex iff both there are no in edges and out edges
3357 3612 */
3358 3613
3359 3614 static void
3360 3615 flk_proc_release(proc_vertex_t *proc)
3361 3616 {
3362 3617 ASSERT(MUTEX_HELD(&flock_lock));
3363 3618 if (proc->edge == NULL && proc->incount == 0) {
3364 3619 pgraph.proc[proc->index] = NULL;
3365 3620 pgraph.free++;
3366 3621 kmem_free(proc, sizeof (proc_vertex_t));
3367 3622 flk_proc_vertex_frees++;
3368 3623 }
3369 3624 }
3370 3625
3371 3626 /*
3372 3627 * Updates process graph to reflect change in a lock_graph.
3373 3628 * Note: We should call this function only after we have a correctly
3374 3629 * recomputed lock graph. Otherwise we might miss a deadlock detection.
3375 3630 * eg: in function flk_relation() we call this function after flk_recompute_
3376 3631 * dependencies() otherwise if a process tries to lock a vnode hashed
3377 3632 * into another graph it might sleep for ever.
3378 3633 */
3379 3634
3380 3635 static void
3381 3636 flk_update_proc_graph(edge_t *ep, int delete)
3382 3637 {
3383 3638 proc_vertex_t *toproc, *fromproc;
3384 3639 proc_edge_t *pep, *prevpep;
3385 3640
3386 3641 mutex_enter(&flock_lock);
3387 3642
3388 3643 /*
3389 3644 * OFD style locks are not associated with any process so there is
3390 3645 * no proc graph for these.
3391 3646 */
3392 3647 if (ep->from_vertex->l_ofd != NULL) {
3393 3648 mutex_exit(&flock_lock);
3394 3649 return;
3395 3650 }
3396 3651
3397 3652 toproc = flk_get_proc_vertex(ep->to_vertex);
3398 3653 fromproc = flk_get_proc_vertex(ep->from_vertex);
3399 3654
3400 3655 if (!delete)
3401 3656 goto add;
3402 3657 pep = prevpep = fromproc->edge;
3403 3658
3404 3659 ASSERT(pep != NULL);
3405 3660 while (pep != NULL) {
3406 3661 if (pep->to_proc == toproc) {
3407 3662 ASSERT(pep->refcount > 0);
3408 3663 pep->refcount--;
3409 3664 if (pep->refcount == 0) {
3410 3665 if (pep == prevpep) {
3411 3666 fromproc->edge = pep->next;
3412 3667 } else {
3413 3668 prevpep->next = pep->next;
3414 3669 }
3415 3670 toproc->incount--;
3416 3671 flk_proc_release(toproc);
3417 3672 flk_free_proc_edge(pep);
3418 3673 }
3419 3674 break;
3420 3675 }
3421 3676 prevpep = pep;
3422 3677 pep = pep->next;
3423 3678 }
3424 3679 flk_proc_release(fromproc);
3425 3680 mutex_exit(&flock_lock);
3426 3681 return;
3427 3682 add:
3428 3683
3429 3684 pep = fromproc->edge;
3430 3685
3431 3686 while (pep != NULL) {
3432 3687 if (pep->to_proc == toproc) {
3433 3688 ASSERT(pep->refcount > 0);
3434 3689 pep->refcount++;
3435 3690 break;
3436 3691 }
3437 3692 pep = pep->next;
3438 3693 }
3439 3694 if (pep == NULL) {
3440 3695 pep = flk_get_proc_edge();
3441 3696 pep->to_proc = toproc;
3442 3697 pep->refcount = 1;
3443 3698 toproc->incount++;
3444 3699 pep->next = fromproc->edge;
3445 3700 fromproc->edge = pep;
3446 3701 }
3447 3702 mutex_exit(&flock_lock);
3448 3703 }
3449 3704
3450 3705 /*
3451 3706 * Set the control status for lock manager requests.
3452 3707 *
3453 3708 */
3454 3709
3455 3710 /*
3456 3711 * PSARC case 1997/292
3457 3712 *
3458 3713 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
3459 3714 * Effects: Set the state of the NLM server identified by "nlmid"
3460 3715 * in the NLM registry to state "nlm_state."
3461 3716 * Raises exception no_such_nlm if "nlmid" doesn't identify a known
3462 3717 * NLM server to this LLM.
3463 3718 * Note that when this routine is called with NLM_SHUTTING_DOWN there
3464 3719 * may be locks requests that have gotten started but not finished. In
3465 3720 * particular, there may be blocking requests that are in the callback code
3466 3721 * before sleeping (so they're not holding the lock for the graph). If
3467 3722 * such a thread reacquires the graph's lock (to go to sleep) after
3468 3723 * NLM state in the NLM registry is set to a non-up value,
3469 3724 * it will notice the status and bail out. If the request gets
3470 3725 * granted before the thread can check the NLM registry, let it
3471 3726 * continue normally. It will get flushed when we are called with NLM_DOWN.
3472 3727 *
3473 3728 * Modifies: nlm_reg_obj (global)
3474 3729 * Arguments:
3475 3730 * nlmid (IN): id uniquely identifying an NLM server
3476 3731 * nlm_state (IN): NLM server state to change "nlmid" to
3477 3732 */
3478 3733 void
3479 3734 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
3480 3735 {
3481 3736 /*
3482 3737 * Check to see if node is booted as a cluster. If not, return.
3483 3738 */
3484 3739 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
3485 3740 return;
3486 3741 }
3487 3742
3488 3743 /*
3489 3744 * Check for development/debugging. It is possible to boot a node
3490 3745 * in non-cluster mode, and then run a special script, currently
3491 3746 * available only to developers, to bring up the node as part of a
3492 3747 * cluster. The problem is that running such a script does not
3493 3748 * result in the routine flk_init() being called and hence global array
3494 3749 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode,
3495 3750 * but the LLM needs to do an additional check to see if the global
3496 3751 * array has been created or not. If nlm_reg_status is NULL, then
3497 3752 * return, else continue.
3498 3753 */
3499 3754 if (nlm_reg_status == NULL) {
3500 3755 return;
3501 3756 }
3502 3757
3503 3758 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3504 3759 mutex_enter(&nlm_reg_lock);
3505 3760
3506 3761 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
3507 3762 /*
3508 3763 * If the NLM server "nlmid" is unknown in the NLM registry,
3509 3764 * add it to the registry in the nlm shutting down state.
3510 3765 */
3511 3766 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3512 3767 FLK_NLM_SHUTTING_DOWN);
3513 3768 } else {
3514 3769 /*
3515 3770 * Change the state of the NLM server identified by "nlmid"
3516 3771 * in the NLM registry to the argument "nlm_state."
3517 3772 */
3518 3773 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
3519 3774 nlm_state);
3520 3775 }
3521 3776
3522 3777 /*
3523 3778 * The reason we must register the NLM server that is shutting down
3524 3779 * with an LLM that doesn't already know about it (never sent a lock
3525 3780 * request) is to handle correctly a race between shutdown and a new
3526 3781 * lock request. Suppose that a shutdown request from the NLM server
3527 3782 * invokes this routine at the LLM, and a thread is spawned to
3528 3783 * service the request. Now suppose a new lock request is in
3529 3784 * progress and has already passed the first line of defense in
3530 3785 * reclock(), which denies new locks requests from NLM servers
3531 3786 * that are not in the NLM_UP state. After the current routine
3532 3787 * is invoked for both phases of shutdown, the routine will return,
3533 3788 * having done nothing, and the lock request will proceed and
3534 3789 * probably be granted. The problem is that the shutdown was ignored
3535 3790 * by the lock request because there was no record of that NLM server
3536 3791 * shutting down. We will be in the peculiar position of thinking
3537 3792 * that we've shutdown the NLM server and all locks at all LLMs have
3538 3793 * been discarded, but in fact there's still one lock held.
3539 3794 * The solution is to record the existence of NLM server and change
3540 3795 * its state immediately to NLM_SHUTTING_DOWN. The lock request in
3541 3796 * progress may proceed because the next phase NLM_DOWN will catch
3542 3797 * this lock and discard it.
3543 3798 */
3544 3799 mutex_exit(&nlm_reg_lock);
3545 3800
3546 3801 switch (nlm_state) {
3547 3802 case FLK_NLM_UP:
3548 3803 /*
3549 3804 * Change the NLM state of all locks still held on behalf of
3550 3805 * the NLM server identified by "nlmid" to NLM_UP.
3551 3806 */
3552 3807 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
3553 3808 break;
3554 3809
3555 3810 case FLK_NLM_SHUTTING_DOWN:
3556 3811 /*
3557 3812 * Wake up all sleeping locks for the NLM server identified
3558 3813 * by "nlmid." Note that eventually all woken threads will
3559 3814 * have their lock requests cancelled and descriptors
3560 3815 * removed from the sleeping lock list. Note that the NLM
3561 3816 * server state associated with each lock descriptor is
3562 3817 * changed to FLK_NLM_SHUTTING_DOWN.
3563 3818 */
3564 3819 cl_flk_wakeup_sleeping_nlm_locks(nlmid);
3565 3820 break;
3566 3821
3567 3822 case FLK_NLM_DOWN:
3568 3823 /*
3569 3824 * Discard all active, granted locks for this NLM server
3570 3825 * identified by "nlmid."
3571 3826 */
3572 3827 cl_flk_unlock_nlm_granted(nlmid);
3573 3828 break;
3574 3829
3575 3830 default:
3576 3831 panic("cl_set_nlm_status: bad status (%d)", nlm_state);
3577 3832 }
3578 3833 }
3579 3834
3580 3835 /*
3581 3836 * Set the control status for lock manager requests.
3582 3837 *
3583 3838 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3584 3839 * may be locks requests that have gotten started but not finished. In
3585 3840 * particular, there may be blocking requests that are in the callback code
3586 3841 * before sleeping (so they're not holding the lock for the graph). If
3587 3842 * such a thread reacquires the graph's lock (to go to sleep) after
3588 3843 * flk_lockmgr_status is set to a non-up value, it will notice the status
3589 3844 * and bail out. If the request gets granted before the thread can check
3590 3845 * flk_lockmgr_status, let it continue normally. It will get flushed when
3591 3846 * we are called with FLK_LOCKMGR_DOWN.
3592 3847 */
3593 3848
3594 3849 void
3595 3850 flk_set_lockmgr_status(flk_lockmgr_status_t status)
3596 3851 {
3597 3852 int i;
3598 3853 graph_t *gp;
3599 3854 struct flock_globals *fg;
3600 3855
3601 3856 fg = flk_get_globals();
3602 3857 ASSERT(fg != NULL);
3603 3858
3604 3859 mutex_enter(&flock_lock);
3605 3860 fg->flk_lockmgr_status = status;
3606 3861 mutex_exit(&flock_lock);
3607 3862
3608 3863 /*
3609 3864 * If the lock manager is coming back up, all that's needed is to
3610 3865 * propagate this information to the graphs. If the lock manager
3611 3866 * is going down, additional action is required, and each graph's
3612 3867 * copy of the state is updated atomically with this other action.
3613 3868 */
3614 3869 switch (status) {
3615 3870 case FLK_LOCKMGR_UP:
3616 3871 for (i = 0; i < HASH_SIZE; i++) {
3617 3872 mutex_enter(&flock_lock);
3618 3873 gp = lock_graph[i];
3619 3874 mutex_exit(&flock_lock);
3620 3875 if (gp == NULL)
3621 3876 continue;
3622 3877 mutex_enter(&gp->gp_mutex);
3623 3878 fg->lockmgr_status[i] = status;
3624 3879 mutex_exit(&gp->gp_mutex);
3625 3880 }
3626 3881 break;
3627 3882 case FLK_WAKEUP_SLEEPERS:
3628 3883 wakeup_sleeping_lockmgr_locks(fg);
3629 3884 break;
3630 3885 case FLK_LOCKMGR_DOWN:
3631 3886 unlock_lockmgr_granted(fg);
3632 3887 break;
3633 3888 default:
3634 3889 panic("flk_set_lockmgr_status: bad status (%d)", status);
3635 3890 break;
3636 3891 }
3637 3892 }
3638 3893
3639 3894 /*
3640 3895 * This routine returns all the locks that are active or sleeping and are
3641 3896 * associated with a particular set of identifiers. If lock_state != 0, then
3642 3897 * only locks that match the lock_state are returned. If lock_state == 0, then
3643 3898 * all locks are returned. If pid == NOPID, the pid is ignored. If
3644 3899 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the
3645 3900 * vnode pointer is ignored.
3646 3901 *
3647 3902 * A list containing the vnode pointer and an flock structure
3648 3903 * describing the lock is returned. Each element in the list is
3649 3904 * dynamically allocated and must be freed by the caller. The
3650 3905 * last item in the list is denoted by a NULL value in the ll_next
3651 3906 * field.
3652 3907 *
3653 3908 * The vnode pointers returned are held. The caller is responsible
3654 3909 * for releasing these. Note that the returned list is only a snapshot of
3655 3910 * the current lock information, and that it is a snapshot of a moving
3656 3911 * target (only one graph is locked at a time).
3657 3912 */
3658 3913
3659 3914 locklist_t *
3660 3915 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3661 3916 pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3662 3917 {
3663 3918 lock_descriptor_t *lock;
3664 3919 lock_descriptor_t *graph_head;
3665 3920 locklist_t listhead;
3666 3921 locklist_t *llheadp;
3667 3922 locklist_t *llp;
3668 3923 locklist_t *lltp;
3669 3924 graph_t *gp;
3670 3925 int i;
3671 3926 int first_index; /* graph index */
3672 3927 int num_indexes; /* graph index */
3673 3928
3674 3929 ASSERT((list_type == FLK_ACTIVE_STATE) ||
3675 3930 (list_type == FLK_SLEEPING_STATE));
3676 3931
3677 3932 /*
3678 3933 * Get a pointer to something to use as a list head while building
3679 3934 * the rest of the list.
3680 3935 */
3681 3936 llheadp = &listhead;
3682 3937 lltp = llheadp;
3683 3938 llheadp->ll_next = (locklist_t *)NULL;
3684 3939
3685 3940 /* Figure out which graphs we want to look at. */
3686 3941 if (vp == NULL) {
3687 3942 first_index = 0;
3688 3943 num_indexes = HASH_SIZE;
3689 3944 } else {
3690 3945 first_index = HASH_INDEX(vp);
3691 3946 num_indexes = 1;
3692 3947 }
3693 3948
3694 3949 for (i = first_index; i < first_index + num_indexes; i++) {
3695 3950 mutex_enter(&flock_lock);
3696 3951 gp = lock_graph[i];
3697 3952 mutex_exit(&flock_lock);
3698 3953 if (gp == NULL) {
3699 3954 continue;
3700 3955 }
3701 3956
3702 3957 mutex_enter(&gp->gp_mutex);
3703 3958 graph_head = (list_type == FLK_ACTIVE_STATE) ?
3704 3959 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3705 3960 for (lock = graph_head->l_next;
3706 3961 lock != graph_head;
3707 3962 lock = lock->l_next) {
3708 3963 if (use_sysid && lock->l_flock.l_sysid != sysid)
3709 3964 continue;
3710 3965 if (pid != NOPID && lock->l_flock.l_pid != pid)
3711 3966 continue;
3712 3967 if (vp != NULL && lock->l_vnode != vp)
3713 3968 continue;
3714 3969 if (lock_state && !(lock_state & lock->l_state))
3715 3970 continue;
3716 3971 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3717 3972 continue;
3718 3973 /*
3719 3974 * A matching lock was found. Allocate
3720 3975 * space for a new locklist entry and fill
3721 3976 * it in.
3722 3977 */
3723 3978 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3724 3979 lltp->ll_next = llp;
3725 3980 VN_HOLD(lock->l_vnode);
3726 3981 llp->ll_vp = lock->l_vnode;
3727 3982 create_flock(lock, &(llp->ll_flock));
3728 3983 llp->ll_next = (locklist_t *)NULL;
3729 3984 lltp = llp;
3730 3985 }
3731 3986 mutex_exit(&gp->gp_mutex);
3732 3987 }
3733 3988
3734 3989 llp = llheadp->ll_next;
3735 3990 return (llp);
3736 3991 }
3737 3992
3738 3993 /*
3739 3994 * These two functions are simply interfaces to get_lock_list. They return
3740 3995 * a list of sleeping or active locks for the given sysid and pid. See
3741 3996 * get_lock_list for details.
3742 3997 *
3743 3998 * In either case we don't particularly care to specify the zone of interest;
3744 3999 * the sysid-space is global across zones, so the sysid will map to exactly one
3745 4000 * zone, and we'll return information for that zone.
3746 4001 */
3747 4002
3748 4003 locklist_t *
3749 4004 flk_get_sleeping_locks(int sysid, pid_t pid)
3750 4005 {
3751 4006 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3752 4007 ALL_ZONES));
3753 4008 }
3754 4009
3755 4010 locklist_t *
3756 4011 flk_get_active_locks(int sysid, pid_t pid)
3757 4012 {
3758 4013 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3759 4014 ALL_ZONES));
3760 4015 }
3761 4016
3762 4017 /*
3763 4018 * Another interface to get_lock_list. This one returns all the active
3764 4019 * locks for a given vnode. Again, see get_lock_list for details.
3765 4020 *
3766 4021 * We don't need to specify which zone's locks we're interested in. The matter
3767 4022 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3768 4023 * be used by multiple zones, so the list of locks will all be from the right
3769 4024 * zone.
3770 4025 */
3771 4026
3772 4027 locklist_t *
3773 4028 flk_active_locks_for_vp(const vnode_t *vp)
3774 4029 {
3775 4030 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3776 4031 ALL_ZONES));
3777 4032 }
3778 4033
3779 4034 /*
3780 4035 * Another interface to get_lock_list. This one returns all the active
3781 4036 * nbmand locks for a given vnode. Again, see get_lock_list for details.
3782 4037 *
3783 4038 * See the comment for flk_active_locks_for_vp() for why we don't care to
3784 4039 * specify the particular zone of interest.
3785 4040 */
3786 4041 locklist_t *
3787 4042 flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3788 4043 {
3789 4044 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3790 4045 NOPID, vp, ALL_ZONES));
3791 4046 }
3792 4047
3793 4048 /*
3794 4049 * Another interface to get_lock_list. This one returns all the active
3795 4050 * nbmand locks for a given pid. Again, see get_lock_list for details.
3796 4051 *
3797 4052 * The zone doesn't need to be specified here; the locks held by a
3798 4053 * particular process will either be local (ie, non-NFS) or from the zone
3799 4054 * the process is executing in. This is because other parts of the system
3800 4055 * ensure that an NFS vnode can't be used in a zone other than that in
3801 4056 * which it was opened.
3802 4057 */
3803 4058 locklist_t *
3804 4059 flk_active_nbmand_locks(pid_t pid)
3805 4060 {
3806 4061 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3807 4062 pid, NULL, ALL_ZONES));
3808 4063 }
3809 4064
3810 4065 /*
3811 4066 * Free up all entries in the locklist.
3812 4067 */
3813 4068 void
3814 4069 flk_free_locklist(locklist_t *llp)
3815 4070 {
3816 4071 locklist_t *next_llp;
3817 4072
3818 4073 while (llp) {
3819 4074 next_llp = llp->ll_next;
3820 4075 VN_RELE(llp->ll_vp);
3821 4076 kmem_free(llp, sizeof (*llp));
3822 4077 llp = next_llp;
3823 4078 }
3824 4079 }
3825 4080
3826 4081 static void
3827 4082 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
3828 4083 {
3829 4084 /*
3830 4085 * For each graph "lg" in the hash table lock_graph do
3831 4086 * a. Get the list of sleeping locks
3832 4087 * b. For each lock descriptor in the list do
3833 4088 * i. If the requested lock is an NLM server request AND
3834 4089 * the nlmid is the same as the routine argument then
3835 4090 * change the lock descriptor's state field to
3836 4091 * "nlm_state."
3837 4092 * c. Get the list of active locks
3838 4093 * d. For each lock descriptor in the list do
3839 4094 * i. If the requested lock is an NLM server request AND
3840 4095 * the nlmid is the same as the routine argument then
3841 4096 * change the lock descriptor's state field to
3842 4097 * "nlm_state."
3843 4098 */
3844 4099
3845 4100 int i;
3846 4101 graph_t *gp; /* lock graph */
3847 4102 lock_descriptor_t *lock; /* lock */
3848 4103 lock_descriptor_t *nlock = NULL; /* next lock */
3849 4104 int lock_nlmid;
3850 4105
3851 4106 for (i = 0; i < HASH_SIZE; i++) {
3852 4107 mutex_enter(&flock_lock);
3853 4108 gp = lock_graph[i];
3854 4109 mutex_exit(&flock_lock);
3855 4110 if (gp == NULL) {
3856 4111 continue;
3857 4112 }
3858 4113
3859 4114 /* Get list of sleeping locks in current lock graph. */
3860 4115 mutex_enter(&gp->gp_mutex);
3861 4116 for (lock = SLEEPING_HEAD(gp)->l_next;
3862 4117 lock != SLEEPING_HEAD(gp);
3863 4118 lock = nlock) {
3864 4119 nlock = lock->l_next;
3865 4120 /* get NLM id */
3866 4121 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3867 4122
3868 4123 /*
3869 4124 * If NLM server request AND nlmid of lock matches
3870 4125 * nlmid of argument, then set the NLM state of the
3871 4126 * lock to "nlm_state."
3872 4127 */
3873 4128 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3874 4129 SET_NLM_STATE(lock, nlm_state);
3875 4130 }
3876 4131 }
3877 4132
3878 4133 /* Get list of active locks in current lock graph. */
3879 4134 for (lock = ACTIVE_HEAD(gp)->l_next;
3880 4135 lock != ACTIVE_HEAD(gp);
3881 4136 lock = nlock) {
3882 4137 nlock = lock->l_next;
3883 4138 /* get NLM id */
3884 4139 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3885 4140
3886 4141 /*
3887 4142 * If NLM server request AND nlmid of lock matches
3888 4143 * nlmid of argument, then set the NLM state of the
3889 4144 * lock to "nlm_state."
3890 4145 */
3891 4146 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
3892 4147 ASSERT(IS_ACTIVE(lock));
3893 4148 SET_NLM_STATE(lock, nlm_state);
3894 4149 }
3895 4150 }
3896 4151 mutex_exit(&gp->gp_mutex);
3897 4152 }
3898 4153 }
3899 4154
3900 4155 /*
3901 4156 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
3902 4157 * Effects: Find all sleeping lock manager requests _only_ for the NLM server
3903 4158 * identified by "nlmid." Poke those lock requests.
3904 4159 */
3905 4160 static void
3906 4161 cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
3907 4162 {
3908 4163 lock_descriptor_t *lock;
3909 4164 lock_descriptor_t *nlock = NULL; /* next lock */
3910 4165 int i;
3911 4166 graph_t *gp;
3912 4167 int lock_nlmid;
3913 4168
3914 4169 for (i = 0; i < HASH_SIZE; i++) {
3915 4170 mutex_enter(&flock_lock);
3916 4171 gp = lock_graph[i];
3917 4172 mutex_exit(&flock_lock);
3918 4173 if (gp == NULL) {
3919 4174 continue;
3920 4175 }
3921 4176
3922 4177 mutex_enter(&gp->gp_mutex);
3923 4178 for (lock = SLEEPING_HEAD(gp)->l_next;
3924 4179 lock != SLEEPING_HEAD(gp);
3925 4180 lock = nlock) {
3926 4181 nlock = lock->l_next;
3927 4182 /*
3928 4183 * If NLM server request _and_ nlmid of lock matches
3929 4184 * nlmid of argument, then set the NLM state of the
3930 4185 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
3931 4186 * request.
3932 4187 */
3933 4188 if (IS_LOCKMGR(lock)) {
3934 4189 /* get NLM id */
3935 4190 lock_nlmid =
3936 4191 GETNLMID(lock->l_flock.l_sysid);
3937 4192 if (nlmid == lock_nlmid) {
3938 4193 SET_NLM_STATE(lock,
3939 4194 FLK_NLM_SHUTTING_DOWN);
3940 4195 INTERRUPT_WAKEUP(lock);
3941 4196 }
3942 4197 }
3943 4198 }
3944 4199 mutex_exit(&gp->gp_mutex);
3945 4200 }
3946 4201 }
3947 4202
3948 4203 /*
3949 4204 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
3950 4205 * Effects: Find all active (granted) lock manager locks _only_ for the
3951 4206 * NLM server identified by "nlmid" and release them.
3952 4207 */
3953 4208 static void
3954 4209 cl_flk_unlock_nlm_granted(int nlmid)
3955 4210 {
3956 4211 lock_descriptor_t *lock;
3957 4212 lock_descriptor_t *nlock = NULL; /* next lock */
3958 4213 int i;
3959 4214 graph_t *gp;
3960 4215 int lock_nlmid;
3961 4216
3962 4217 for (i = 0; i < HASH_SIZE; i++) {
3963 4218 mutex_enter(&flock_lock);
3964 4219 gp = lock_graph[i];
3965 4220 mutex_exit(&flock_lock);
3966 4221 if (gp == NULL) {
3967 4222 continue;
3968 4223 }
3969 4224
3970 4225 mutex_enter(&gp->gp_mutex);
3971 4226 for (lock = ACTIVE_HEAD(gp)->l_next;
3972 4227 lock != ACTIVE_HEAD(gp);
3973 4228 lock = nlock) {
3974 4229 nlock = lock->l_next;
3975 4230 ASSERT(IS_ACTIVE(lock));
3976 4231
3977 4232 /*
3978 4233 * If it's an NLM server request _and_ nlmid of
3979 4234 * the lock matches nlmid of argument, then
3980 4235 * remove the active lock the list, wakup blocked
3981 4236 * threads, and free the storage for the lock.
3982 4237 * Note that there's no need to mark the NLM state
3983 4238 * of this lock to NLM_DOWN because the lock will
3984 4239 * be deleted anyway and its storage freed.
3985 4240 */
3986 4241 if (IS_LOCKMGR(lock)) {
3987 4242 /* get NLM id */
3988 4243 lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
3989 4244 if (nlmid == lock_nlmid) {
3990 4245 flk_delete_active_lock(lock, 0);
3991 4246 flk_wakeup(lock, 1);
3992 4247 flk_free_lock(lock);
3993 4248 }
3994 4249 }
3995 4250 }
3996 4251 mutex_exit(&gp->gp_mutex);
3997 4252 }
3998 4253 }
3999 4254
4000 4255 /*
4001 4256 * Find all sleeping lock manager requests and poke them.
4002 4257 */
4003 4258 static void
4004 4259 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
4005 4260 {
4006 4261 lock_descriptor_t *lock;
4007 4262 lock_descriptor_t *nlock = NULL; /* next lock */
4008 4263 int i;
4009 4264 graph_t *gp;
4010 4265 zoneid_t zoneid = getzoneid();
4011 4266
4012 4267 for (i = 0; i < HASH_SIZE; i++) {
4013 4268 mutex_enter(&flock_lock);
4014 4269 gp = lock_graph[i];
4015 4270 mutex_exit(&flock_lock);
4016 4271 if (gp == NULL) {
4017 4272 continue;
4018 4273 }
4019 4274
4020 4275 mutex_enter(&gp->gp_mutex);
4021 4276 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
4022 4277 for (lock = SLEEPING_HEAD(gp)->l_next;
4023 4278 lock != SLEEPING_HEAD(gp);
4024 4279 lock = nlock) {
4025 4280 nlock = lock->l_next;
4026 4281 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
4027 4282 INTERRUPT_WAKEUP(lock);
4028 4283 }
4029 4284 }
4030 4285 mutex_exit(&gp->gp_mutex);
4031 4286 }
4032 4287 }
4033 4288
4034 4289
4035 4290 /*
4036 4291 * Find all active (granted) lock manager locks and release them.
4037 4292 */
4038 4293 static void
4039 4294 unlock_lockmgr_granted(struct flock_globals *fg)
4040 4295 {
4041 4296 lock_descriptor_t *lock;
4042 4297 lock_descriptor_t *nlock = NULL; /* next lock */
4043 4298 int i;
4044 4299 graph_t *gp;
4045 4300 zoneid_t zoneid = getzoneid();
4046 4301
4047 4302 for (i = 0; i < HASH_SIZE; i++) {
4048 4303 mutex_enter(&flock_lock);
4049 4304 gp = lock_graph[i];
4050 4305 mutex_exit(&flock_lock);
4051 4306 if (gp == NULL) {
4052 4307 continue;
4053 4308 }
4054 4309
4055 4310 mutex_enter(&gp->gp_mutex);
4056 4311 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
4057 4312 for (lock = ACTIVE_HEAD(gp)->l_next;
4058 4313 lock != ACTIVE_HEAD(gp);
4059 4314 lock = nlock) {
4060 4315 nlock = lock->l_next;
4061 4316 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
|
↓ open down ↓ |
721 lines elided |
↑ open up ↑ |
4062 4317 ASSERT(IS_ACTIVE(lock));
4063 4318 flk_delete_active_lock(lock, 0);
4064 4319 flk_wakeup(lock, 1);
4065 4320 flk_free_lock(lock);
4066 4321 }
4067 4322 }
4068 4323 mutex_exit(&gp->gp_mutex);
4069 4324 }
4070 4325 }
4071 4326
4072 -
4073 4327 /*
4074 4328 * Wait until a lock is granted, cancelled, or interrupted.
4075 4329 */
4076 4330
4077 4331 static void
4078 4332 wait_for_lock(lock_descriptor_t *request)
4079 4333 {
4080 4334 graph_t *gp = request->l_graph;
4335 + vnode_t *vp = request->l_vnode;
4081 4336
4082 4337 ASSERT(MUTEX_HELD(&gp->gp_mutex));
4083 4338
4084 4339 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
4085 4340 !(IS_INTERRUPTED(request))) {
4086 - if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
4341 + lock_descriptor_t *lock;
4342 +
4343 + if (stale_lock_timeout == 0) {
4344 + /* The stale lock detection is disabled */
4345 + if (cv_wait_sig(&request->l_cv, &gp->gp_mutex) == 0) {
4346 + flk_set_state(request, FLK_INTERRUPTED_STATE);
4347 + request->l_state |= INTERRUPTED_LOCK;
4348 + }
4349 +
4350 + continue;
4351 + }
4352 +
4353 + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4354 +
4355 + if (lock != NULL) {
4356 + do {
4357 + if (BLOCKS(lock, request)) {
4358 + flk_stale_lock_check(lock);
4359 + break;
4360 + }
4361 + lock = lock->l_next;
4362 + } while (lock->l_vnode == vp);
4363 + }
4364 +
4365 + if (cv_timedwait_sig(&request->l_cv, &gp->gp_mutex,
4366 + ddi_get_lbolt() + SEC_TO_TICK(stale_lock_timeout)) == 0) {
4087 4367 flk_set_state(request, FLK_INTERRUPTED_STATE);
4088 4368 request->l_state |= INTERRUPTED_LOCK;
4089 4369 }
4090 4370 }
4091 4371 }
4092 4372
4093 4373 /*
4094 4374 * Create an flock structure from the existing lock information
4095 4375 *
4096 4376 * This routine is used to create flock structures for the lock manager
4097 4377 * to use in a reclaim request. Since the lock was originated on this
4098 4378 * host, it must be conforming to UNIX semantics, so no checking is
4099 4379 * done to make sure it falls within the lower half of the 32-bit range.
4100 4380 */
4101 4381
4102 4382 static void
4103 4383 create_flock(lock_descriptor_t *lp, flock64_t *flp)
4104 4384 {
4105 4385 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
4106 4386 ASSERT(lp->l_end >= lp->l_start);
4107 4387
4108 4388 flp->l_type = lp->l_type;
4109 4389 flp->l_whence = 0;
4110 4390 flp->l_start = lp->l_start;
4111 4391 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
4112 4392 (lp->l_end - lp->l_start + 1);
4113 4393 flp->l_sysid = lp->l_flock.l_sysid;
4114 4394 flp->l_pid = lp->l_flock.l_pid;
4115 4395 }
4116 4396
4117 4397 /*
4118 4398 * Convert flock_t data describing a lock range into unsigned long starting
4119 4399 * and ending points, which are put into lock_request. Returns 0 or an
4120 4400 * errno value.
4121 4401 */
4122 4402
4123 4403 int
4124 4404 flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
4125 4405 u_offset_t *start, u_offset_t *end, offset_t offset)
4126 4406 {
4127 4407 struct vattr vattr;
4128 4408 int error;
4129 4409
4130 4410 /*
4131 4411 * Determine the starting point of the request
|
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
4132 4412 */
4133 4413 switch (flp->l_whence) {
4134 4414 case 0: /* SEEK_SET */
4135 4415 *start = (u_offset_t)flp->l_start;
4136 4416 break;
4137 4417 case 1: /* SEEK_CUR */
4138 4418 *start = (u_offset_t)(flp->l_start + offset);
4139 4419 break;
4140 4420 case 2: /* SEEK_END */
4141 4421 vattr.va_mask = AT_SIZE;
4142 - if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
4422 + if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
4143 4423 return (error);
4144 4424 *start = (u_offset_t)(flp->l_start + vattr.va_size);
4145 4425 break;
4146 4426 default:
4147 4427 return (EINVAL);
4148 4428 }
4149 4429
4150 4430 /*
4151 4431 * Determine the range covered by the request.
4152 4432 */
4153 4433 if (flp->l_len == 0)
4154 4434 *end = MAX_U_OFFSET_T;
4155 4435 else if ((offset_t)flp->l_len > 0) {
4156 4436 *end = (u_offset_t)(*start + (flp->l_len - 1));
4157 4437 } else {
4158 4438 /*
4159 4439 * Negative length; why do we even allow this ?
4160 4440 * Because this allows easy specification of
4161 4441 * the last n bytes of the file.
4162 4442 */
4163 4443 *end = *start;
4164 4444 *start += (u_offset_t)flp->l_len;
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
4165 4445 (*start)++;
4166 4446 }
4167 4447 return (0);
4168 4448 }
4169 4449
4170 4450 /*
4171 4451 * Check the validity of lock data. This can used by the NFS
4172 4452 * frlock routines to check data before contacting the server. The
4173 4453 * server must support semantics that aren't as restrictive as
4174 4454 * the UNIX API, so the NFS client is required to check.
4175 - * The maximum is now passed in by the caller.
4455 + * The maximum is passed in by the caller.
4176 4456 */
4177 4457
4178 4458 int
4179 4459 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
4180 4460 {
4181 4461 /*
4182 4462 * The end (length) for local locking should never be greater
4183 - * than MAXEND. However, the representation for
4463 + * than max. However, the representation for
4184 4464 * the entire file is MAX_U_OFFSET_T.
4185 4465 */
4186 4466 if ((start > max) ||
4187 4467 ((end > max) && (end != MAX_U_OFFSET_T))) {
4188 4468 return (EINVAL);
4189 4469 }
4190 4470 if (start > end) {
4191 4471 return (EINVAL);
4192 4472 }
4193 4473 return (0);
4194 4474 }
4195 4475
4196 4476 /*
4197 4477 * Fill in request->l_flock with information about the lock blocking the
4198 4478 * request. The complexity here is that lock manager requests are allowed
4199 4479 * to see into the upper part of the 32-bit address range, whereas local
4200 4480 * requests are only allowed to see signed values.
4201 4481 *
4202 4482 * What should be done when "blocker" is a lock manager lock that uses the
4203 4483 * upper portion of the 32-bit range, but "request" is local? Since the
4204 4484 * request has already been determined to have been blocked by the blocker,
4205 4485 * at least some portion of "blocker" must be in the range of the request,
4206 4486 * or the request extends to the end of file. For the first case, the
4207 4487 * portion in the lower range is returned with the indication that it goes
4208 4488 * "to EOF." For the second case, the last byte of the lower range is
4209 4489 * returned with the indication that it goes "to EOF."
4210 4490 */
4211 4491
4212 4492 static void
4213 4493 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
4214 4494 {
4215 4495 flock64_t *flrp; /* l_flock portion of request */
4216 4496
4217 4497 ASSERT(blocker != NULL);
4218 4498
4219 4499 flrp = &request->l_flock;
4220 4500 flrp->l_whence = 0;
4221 4501 flrp->l_type = blocker->l_type;
4222 4502 flrp->l_pid = blocker->l_flock.l_pid;
4223 4503 flrp->l_sysid = blocker->l_flock.l_sysid;
4224 4504 request->l_ofd = blocker->l_ofd;
4225 4505
4226 4506 if (IS_LOCKMGR(request)) {
4227 4507 flrp->l_start = blocker->l_start;
4228 4508 if (blocker->l_end == MAX_U_OFFSET_T)
4229 4509 flrp->l_len = 0;
4230 4510 else
4231 4511 flrp->l_len = blocker->l_end - blocker->l_start + 1;
4232 4512 } else {
4233 4513 if (blocker->l_start > MAXEND) {
4234 4514 flrp->l_start = MAXEND;
4235 4515 flrp->l_len = 0;
4236 4516 } else {
4237 4517 flrp->l_start = blocker->l_start;
4238 4518 if (blocker->l_end == MAX_U_OFFSET_T)
4239 4519 flrp->l_len = 0;
4240 4520 else
4241 4521 flrp->l_len = blocker->l_end -
4242 4522 blocker->l_start + 1;
4243 4523 }
4244 4524 }
4245 4525 }
4246 4526
4247 4527 /*
4248 4528 * PSARC case 1997/292
4249 4529 */
4250 4530 /*
4251 4531 * This is the public routine exported by flock.h.
4252 4532 */
4253 4533 void
4254 4534 cl_flk_change_nlm_state_to_unknown(int nlmid)
4255 4535 {
4256 4536 /*
4257 4537 * Check to see if node is booted as a cluster. If not, return.
4258 4538 */
4259 4539 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
4260 4540 return;
4261 4541 }
4262 4542
4263 4543 /*
4264 4544 * See comment in cl_flk_set_nlm_status().
4265 4545 */
4266 4546 if (nlm_reg_status == NULL) {
4267 4547 return;
4268 4548 }
4269 4549
4270 4550 /*
4271 4551 * protect NLM registry state with a mutex.
4272 4552 */
4273 4553 ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
4274 4554 mutex_enter(&nlm_reg_lock);
4275 4555 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
4276 4556 mutex_exit(&nlm_reg_lock);
4277 4557 }
4278 4558
4279 4559 /*
4280 4560 * Return non-zero if the given I/O request conflicts with an active NBMAND
4281 4561 * lock.
4282 4562 * If svmand is non-zero, it means look at all active locks, not just NBMAND
4283 4563 * locks.
4284 4564 */
4285 4565
4286 4566 int
4287 4567 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
4288 4568 ssize_t length, int svmand, caller_context_t *ct)
4289 4569 {
4290 4570 int conflict = 0;
4291 4571 graph_t *gp;
4292 4572 lock_descriptor_t *lock;
4293 4573 pid_t pid;
4294 4574 int sysid;
4295 4575
4296 4576 if (ct == NULL) {
4297 4577 pid = curproc->p_pid;
4298 4578 sysid = 0;
4299 4579 } else {
4300 4580 pid = ct->cc_pid;
4301 4581 sysid = ct->cc_sysid;
4302 4582 }
4303 4583
4304 4584 mutex_enter(&flock_lock);
4305 4585 gp = lock_graph[HASH_INDEX(vp)];
4306 4586 mutex_exit(&flock_lock);
4307 4587 if (gp == NULL)
4308 4588 return (0);
4309 4589
4310 4590 mutex_enter(&gp->gp_mutex);
4311 4591 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4312 4592
4313 4593 for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
4314 4594 if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
4315 4595 (lock->l_flock.l_sysid != sysid ||
4316 4596 lock->l_flock.l_pid != pid) &&
4317 4597 lock_blocks_io(op, offset, length,
4318 4598 lock->l_type, lock->l_start, lock->l_end)) {
4319 4599 conflict = 1;
4320 4600 break;
4321 4601 }
4322 4602 }
4323 4603 mutex_exit(&gp->gp_mutex);
4324 4604
4325 4605 return (conflict);
4326 4606 }
4327 4607
4328 4608 /*
4329 4609 * Return non-zero if the given I/O request conflicts with the given lock.
4330 4610 */
4331 4611
4332 4612 static int
4333 4613 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
4334 4614 int lock_type, u_offset_t lock_start, u_offset_t lock_end)
4335 4615 {
4336 4616 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
4337 4617 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
4338 4618
4339 4619 if (op == NBL_READ && lock_type == F_RDLCK)
4340 4620 return (0);
4341 4621
4342 4622 if (offset <= lock_start && lock_start < offset + length)
4343 4623 return (1);
4344 4624 if (lock_start <= offset && offset <= lock_end)
4345 4625 return (1);
4346 4626
4347 4627 return (0);
4348 4628 }
4349 4629
4350 4630 #ifdef DEBUG
4351 4631 static void
4352 4632 check_active_locks(graph_t *gp)
4353 4633 {
4354 4634 lock_descriptor_t *lock, *lock1;
4355 4635 edge_t *ep;
4356 4636
4357 4637 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
4358 4638 lock = lock->l_next) {
4359 4639 ASSERT(IS_ACTIVE(lock));
4360 4640 ASSERT(NOT_BLOCKED(lock));
4361 4641 ASSERT(!IS_BARRIER(lock));
4362 4642
4363 4643 ep = FIRST_IN(lock);
4364 4644
4365 4645 while (ep != HEAD(lock)) {
4366 4646 ASSERT(IS_SLEEPING(ep->from_vertex));
4367 4647 ASSERT(!NOT_BLOCKED(ep->from_vertex));
4368 4648 ep = NEXT_IN(ep);
4369 4649 }
4370 4650
4371 4651 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
4372 4652 lock1 = lock1->l_next) {
4373 4653 if (lock1->l_vnode == lock->l_vnode) {
4374 4654 if (BLOCKS(lock1, lock)) {
4375 4655 cmn_err(CE_PANIC,
4376 4656 "active lock %p blocks %p",
4377 4657 (void *)lock1, (void *)lock);
4378 4658 } else if (BLOCKS(lock, lock1)) {
4379 4659 cmn_err(CE_PANIC,
4380 4660 "active lock %p blocks %p",
4381 4661 (void *)lock, (void *)lock1);
4382 4662 }
4383 4663 }
4384 4664 }
4385 4665 }
4386 4666 }
4387 4667
4388 4668 /*
4389 4669 * Effect: This functions checks to see if the transition from 'old_state' to
4390 4670 * 'new_state' is a valid one. It returns 0 if the transition is valid
4391 4671 * and 1 if it is not.
4392 4672 * For a map of valid transitions, see sys/flock_impl.h
4393 4673 */
4394 4674 static int
4395 4675 check_lock_transition(int old_state, int new_state)
4396 4676 {
4397 4677 switch (old_state) {
4398 4678 case FLK_INITIAL_STATE:
4399 4679 if ((new_state == FLK_START_STATE) ||
4400 4680 (new_state == FLK_SLEEPING_STATE) ||
4401 4681 (new_state == FLK_ACTIVE_STATE) ||
4402 4682 (new_state == FLK_DEAD_STATE)) {
4403 4683 return (0);
4404 4684 } else {
4405 4685 return (1);
4406 4686 }
4407 4687 case FLK_START_STATE:
4408 4688 if ((new_state == FLK_ACTIVE_STATE) ||
4409 4689 (new_state == FLK_DEAD_STATE)) {
4410 4690 return (0);
4411 4691 } else {
4412 4692 return (1);
4413 4693 }
4414 4694 case FLK_ACTIVE_STATE:
4415 4695 if (new_state == FLK_DEAD_STATE) {
4416 4696 return (0);
4417 4697 } else {
4418 4698 return (1);
4419 4699 }
4420 4700 case FLK_SLEEPING_STATE:
4421 4701 if ((new_state == FLK_GRANTED_STATE) ||
4422 4702 (new_state == FLK_INTERRUPTED_STATE) ||
4423 4703 (new_state == FLK_CANCELLED_STATE)) {
4424 4704 return (0);
4425 4705 } else {
4426 4706 return (1);
4427 4707 }
4428 4708 case FLK_GRANTED_STATE:
4429 4709 if ((new_state == FLK_START_STATE) ||
4430 4710 (new_state == FLK_INTERRUPTED_STATE) ||
4431 4711 (new_state == FLK_CANCELLED_STATE)) {
4432 4712 return (0);
4433 4713 } else {
4434 4714 return (1);
4435 4715 }
4436 4716 case FLK_CANCELLED_STATE:
4437 4717 if ((new_state == FLK_INTERRUPTED_STATE) ||
4438 4718 (new_state == FLK_DEAD_STATE)) {
4439 4719 return (0);
4440 4720 } else {
4441 4721 return (1);
4442 4722 }
4443 4723 case FLK_INTERRUPTED_STATE:
4444 4724 if (new_state == FLK_DEAD_STATE) {
4445 4725 return (0);
4446 4726 } else {
4447 4727 return (1);
4448 4728 }
4449 4729 case FLK_DEAD_STATE:
4450 4730 /* May be set more than once */
4451 4731 if (new_state == FLK_DEAD_STATE) {
4452 4732 return (0);
4453 4733 } else {
4454 4734 return (1);
4455 4735 }
4456 4736 default:
4457 4737 return (1);
4458 4738 }
4459 4739 }
4460 4740
4461 4741 static void
4462 4742 check_sleeping_locks(graph_t *gp)
4463 4743 {
4464 4744 lock_descriptor_t *lock1, *lock2;
4465 4745 edge_t *ep;
4466 4746 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
4467 4747 lock1 = lock1->l_next) {
4468 4748 ASSERT(!IS_BARRIER(lock1));
4469 4749 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
4470 4750 lock2 = lock2->l_next) {
4471 4751 if (lock1->l_vnode == lock2->l_vnode) {
4472 4752 if (BLOCKS(lock2, lock1)) {
4473 4753 ASSERT(!IS_GRANTED(lock1));
4474 4754 ASSERT(!NOT_BLOCKED(lock1));
4475 4755 path(lock1, lock2);
4476 4756 }
4477 4757 }
4478 4758 }
4479 4759
4480 4760 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
4481 4761 lock2 = lock2->l_next) {
4482 4762 ASSERT(!IS_BARRIER(lock1));
4483 4763 if (lock1->l_vnode == lock2->l_vnode) {
4484 4764 if (BLOCKS(lock2, lock1)) {
4485 4765 ASSERT(!IS_GRANTED(lock1));
4486 4766 ASSERT(!NOT_BLOCKED(lock1));
4487 4767 path(lock1, lock2);
4488 4768 }
4489 4769 }
4490 4770 }
4491 4771 ep = FIRST_ADJ(lock1);
4492 4772 while (ep != HEAD(lock1)) {
4493 4773 ASSERT(BLOCKS(ep->to_vertex, lock1));
4494 4774 ep = NEXT_ADJ(ep);
4495 4775 }
4496 4776 }
4497 4777 }
4498 4778
4499 4779 static int
4500 4780 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
4501 4781 {
4502 4782 edge_t *ep;
4503 4783 lock_descriptor_t *vertex;
4504 4784 lock_descriptor_t *vertex_stack;
4505 4785
4506 4786 STACK_INIT(vertex_stack);
4507 4787
4508 4788 flk_graph_uncolor(lock1->l_graph);
4509 4789 ep = FIRST_ADJ(lock1);
4510 4790 ASSERT(ep != HEAD(lock1));
4511 4791 while (ep != HEAD(lock1)) {
4512 4792 if (no_path)
4513 4793 ASSERT(ep->to_vertex != lock2);
4514 4794 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4515 4795 COLOR(ep->to_vertex);
4516 4796 ep = NEXT_ADJ(ep);
4517 4797 }
4518 4798
4519 4799 while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
4520 4800 STACK_POP(vertex_stack, l_dstack);
4521 4801 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
4522 4802 ep = NEXT_ADJ(ep)) {
4523 4803 if (COLORED(ep->to_vertex))
4524 4804 continue;
4525 4805 COLOR(ep->to_vertex);
4526 4806 if (ep->to_vertex == lock2)
4527 4807 return (1);
4528 4808
4529 4809 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
4530 4810 }
4531 4811 }
4532 4812 return (0);
4533 4813 }
4534 4814
4535 4815 static void
4536 4816 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
4537 4817 {
4538 4818 lock_descriptor_t *lock;
4539 4819
4540 4820 /* Ignore OFD style locks since they're not process-wide. */
4541 4821 if (pid == 0)
4542 4822 return;
4543 4823
4544 4824 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
4545 4825
4546 4826 if (lock) {
4547 4827 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
4548 4828 if (lock->l_flock.l_pid == pid &&
4549 4829 lock->l_flock.l_sysid == sysid)
4550 4830 cmn_err(CE_PANIC,
4551 4831 "owner pid %d's lock %p in active queue",
4552 4832 pid, (void *)lock);
4553 4833 lock = lock->l_next;
4554 4834 }
4555 4835 }
4556 4836 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
4557 4837
4558 4838 if (lock) {
4559 4839 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
4560 4840 if (lock->l_flock.l_pid == pid &&
4561 4841 lock->l_flock.l_sysid == sysid)
4562 4842 cmn_err(CE_PANIC,
4563 4843 "owner pid %d's lock %p in sleep queue",
4564 4844 pid, (void *)lock);
4565 4845 lock = lock->l_next;
4566 4846 }
4567 4847 }
4568 4848 }
4569 4849
4570 4850 static int
4571 4851 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4572 4852 {
4573 4853 edge_t *ep = FIRST_ADJ(lock1);
4574 4854
4575 4855 while (ep != HEAD(lock1)) {
4576 4856 if (ep->to_vertex == lock2)
4577 4857 return (1);
4578 4858 else
4579 4859 ep = NEXT_ADJ(ep);
4580 4860 }
4581 4861 return (0);
4582 4862 }
4583 4863
4584 4864 static int
4585 4865 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4586 4866 {
4587 4867 return (!level_two_path(lock1, lock2, 1));
4588 4868 }
4589 4869
4590 4870 static void
4591 4871 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4592 4872 {
4593 4873 if (level_one_path(lock1, lock2)) {
4594 4874 if (level_two_path(lock1, lock2, 0) != 0) {
4595 4875 cmn_err(CE_WARN,
4596 4876 "one edge one path from lock1 %p lock2 %p",
4597 4877 (void *)lock1, (void *)lock2);
4598 4878 }
4599 4879 } else if (no_path(lock1, lock2)) {
4600 4880 cmn_err(CE_PANIC,
4601 4881 "No path from lock1 %p to lock2 %p",
4602 4882 (void *)lock1, (void *)lock2);
4603 4883 }
4604 4884 }
4605 4885 #endif /* DEBUG */
|
↓ open down ↓ |
412 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX