Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_callback.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_callback.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28
29 29 #include <sys/param.h>
30 30 #include <sys/types.h>
31 31 #include <sys/systm.h>
32 32 #include <sys/cred.h>
33 33 #include <sys/vfs.h>
34 34 #include <sys/vnode.h>
35 35 #include <sys/pathname.h>
36 36 #include <sys/sysmacros.h>
37 37 #include <sys/kmem.h>
38 38 #include <sys/kstat.h>
39 39 #include <sys/mkdev.h>
40 40 #include <sys/mount.h>
41 41 #include <sys/statvfs.h>
42 42 #include <sys/errno.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/cmn_err.h>
45 45 #include <sys/utsname.h>
46 46 #include <sys/bootconf.h>
47 47 #include <sys/modctl.h>
48 48 #include <sys/acl.h>
49 49 #include <sys/flock.h>
50 50 #include <sys/kstr.h>
51 51 #include <sys/stropts.h>
52 52 #include <sys/strsubr.h>
53 53 #include <sys/atomic.h>
54 54 #include <sys/disp.h>
55 55 #include <sys/policy.h>
56 56 #include <sys/list.h>
57 57 #include <sys/zone.h>
58 58
59 59 #include <rpc/types.h>
60 60 #include <rpc/auth.h>
61 61 #include <rpc/rpcsec_gss.h>
62 62 #include <rpc/clnt.h>
63 63 #include <rpc/xdr.h>
64 64
65 65 #include <nfs/nfs.h>
66 66 #include <nfs/nfs_clnt.h>
67 67 #include <nfs/mount.h>
68 68 #include <nfs/nfs_acl.h>
69 69
70 70 #include <fs/fs_subr.h>
71 71
72 72 #include <nfs/nfs4.h>
73 73 #include <nfs/rnode4.h>
74 74 #include <nfs/nfs4_clnt.h>
75 75 #include <nfs/nfssys.h>
76 76
77 77 #ifdef DEBUG
78 78 /*
79 79 * These are "special" state IDs and file handles that
80 80 * match any delegation state ID or file handled. This
81 81 * is for testing purposes only.
82 82 */
83 83
84 84 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 };
85 85 char nfs4_deleg_fh[] = "\0377\0376\0375\0374";
86 86 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh };
87 87 nfsstat4 cb4_getattr_fail = NFS4_OK;
88 88 nfsstat4 cb4_recall_fail = NFS4_OK;
89 89
90 90 int nfs4_callback_debug;
91 91 int nfs4_recall_debug;
92 92 int nfs4_drat_debug;
93 93
94 94 #endif
95 95
96 96 #define CB_NOTE(x) NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
97 97 #define CB_WARN(x) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
98 98 #define CB_WARN1(x, y) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
99 99
100 100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE;
101 101
102 102 static zone_key_t nfs4_callback_zone_key;
103 103
104 104 /*
105 105 * NFS4_MAPSIZE is the number of bytes we are willing to consume
106 106 * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
107 107 * style delegation.
108 108 */
109 109
110 110 #define NFS4_MAPSIZE 8192
111 111 #define NFS4_MAPWORDS NFS4_MAPSIZE/sizeof (uint_t)
112 112 #define NbPW (NBBY*sizeof (uint_t))
113 113
114 114 static int nfs4_num_prognums = 1024;
115 115 static SVC_CALLOUT_TABLE nfs4_cb_sct;
116 116
117 117 struct nfs4_dnode {
118 118 list_node_t linkage;
119 119 rnode4_t *rnodep;
120 120 int flags; /* Flags for nfs4delegreturn_impl() */
121 121 };
122 122
123 123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = {
124 124 { "delegations", KSTAT_DATA_UINT64 },
125 125 { "cb_getattr", KSTAT_DATA_UINT64 },
126 126 { "cb_recall", KSTAT_DATA_UINT64 },
127 127 { "cb_null", KSTAT_DATA_UINT64 },
128 128 { "cb_dispatch", KSTAT_DATA_UINT64 },
129 129 { "delegaccept_r", KSTAT_DATA_UINT64 },
130 130 { "delegaccept_rw", KSTAT_DATA_UINT64 },
131 131 { "delegreturn", KSTAT_DATA_UINT64 },
132 132 { "callbacks", KSTAT_DATA_UINT64 },
133 133 { "claim_cur", KSTAT_DATA_UINT64 },
134 134 { "claim_cur_ok", KSTAT_DATA_UINT64 },
135 135 { "recall_trunc", KSTAT_DATA_UINT64 },
136 136 { "recall_failed", KSTAT_DATA_UINT64 },
137 137 { "return_limit_write", KSTAT_DATA_UINT64 },
138 138 { "return_limit_addmap", KSTAT_DATA_UINT64 },
139 139 { "deleg_recover", KSTAT_DATA_UINT64 },
140 140 { "cb_illegal", KSTAT_DATA_UINT64 }
141 141 };
142 142
143 143 struct nfs4_cb_port {
144 144 list_node_t linkage; /* linkage into per-zone port list */
145 145 char netid[KNC_STRSIZE];
146 146 char uaddr[KNC_STRSIZE];
147 147 char protofmly[KNC_STRSIZE];
148 148 char proto[KNC_STRSIZE];
149 149 };
150 150
151 151 static int cb_getattr_bytes;
152 152
153 153 struct cb_recall_pass {
154 154 rnode4_t *rp;
155 155 int flags; /* Flags for nfs4delegreturn_impl() */
156 156 bool_t truncate;
157 157 };
158 158
159 159 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int);
160 160 static void nfs4delegreturn_thread(struct cb_recall_pass *);
161 161 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *,
162 162 int);
163 163 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int);
164 164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int);
165 165 static int nfs4delegreturn_impl(rnode4_t *, int,
166 166 struct nfs4_callback_globals *);
167 167 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *,
168 168 struct nfs4_callback_globals *);
169 169
170 170 static void
171 171 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
172 172 struct compound_state *cs, struct nfs4_callback_globals *ncg)
173 173 {
174 174 CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr;
175 175 CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr;
176 176 rnode4_t *rp;
177 177 vnode_t *vp;
178 178 bool_t found = FALSE;
179 179 struct nfs4_server *sp;
180 180 struct fattr4 *fap;
181 181 rpc_inline_t *fdata;
182 182 long mapcnt;
183 183 fattr4_change change;
184 184 fattr4_size size;
185 185 uint_t rflag;
186 186
187 187 ncg->nfs4_callback_stats.cb_getattr.value.ui64++;
188 188
189 189 #ifdef DEBUG
190 190 /*
191 191 * error injection hook: set cb_getattr_fail global to
192 192 * NFS4 pcol error to be returned
193 193 */
194 194 if (cb4_getattr_fail != NFS4_OK) {
195 195 *cs->statusp = resp->status = cb4_getattr_fail;
196 196 return;
197 197 }
198 198 #endif
199 199
200 200 resp->obj_attributes.attrmask = 0;
201 201
202 202 mutex_enter(&ncg->nfs4_cb_lock);
203 203 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
204 204 mutex_exit(&ncg->nfs4_cb_lock);
205 205
206 206 if (nfs4_server_vlock(sp, 0) == FALSE) {
207 207
208 208 CB_WARN("cb_getattr: cannot find server\n");
209 209
210 210 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
211 211 return;
212 212 }
213 213
214 214 /*
215 215 * In cb_compound, callback_ident was validated against rq_prog,
216 216 * but we couldn't verify that it was set to the value we provided
217 217 * at setclientid time (because we didn't have server struct yet).
218 218 * Now we have the server struct, but don't have callback_ident
219 219 * handy. So, validate server struct program number against req
220 220 * RPC's prog number. At this point, we know the RPC prog num
221 221 * is valid (else we wouldn't be here); however, we don't know
222 222 * that it was the prog number we supplied to this server at
223 223 * setclientid time. If the prog numbers aren't equivalent, then
224 224 * log the problem and fail the request because either cbserv
225 225 * and/or cbclient are confused. This will probably never happen.
226 226 */
227 227 if (sp->s_program != req->rq_prog) {
228 228 #ifdef DEBUG
229 229 zcmn_err(getzoneid(), CE_WARN,
230 230 "cb_getattr: wrong server program number srv=%d req=%d\n",
231 231 sp->s_program, req->rq_prog);
232 232 #else
233 233 zcmn_err(getzoneid(), CE_WARN,
234 234 "cb_getattr: wrong server program number\n");
235 235 #endif
236 236 mutex_exit(&sp->s_lock);
237 237 nfs4_server_rele(sp);
238 238 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
239 239 return;
240 240 }
241 241
242 242 /*
243 243 * Search the delegation list for a matching file handle;
244 244 * mutex on sp prevents the list from changing.
245 245 */
246 246
247 247 rp = list_head(&sp->s_deleg_list);
248 248 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
249 249 nfs4_fhandle_t fhandle;
250 250
251 251 sfh4_copyval(rp->r_fh, &fhandle);
252 252
253 253 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
254 254 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
255 255 fhandle.fh_len) == 0)) {
256 256
257 257 found = TRUE;
258 258 break;
259 259 }
260 260 #ifdef DEBUG
261 261 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len &&
262 262 bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val,
263 263 args->fh.nfs_fh4_len) == 0) {
264 264
265 265 found = TRUE;
266 266 break;
267 267 }
268 268 #endif
269 269 }
270 270
271 271 /*
272 272 * VN_HOLD the vnode before releasing s_lock to guarantee
273 273 * we have a valid vnode reference.
274 274 */
275 275 if (found == TRUE) {
276 276 vp = RTOV4(rp);
277 277 VN_HOLD(vp);
278 278 }
279 279
280 280 mutex_exit(&sp->s_lock);
281 281 nfs4_server_rele(sp);
282 282
283 283 if (found == FALSE) {
284 284
285 285 CB_WARN("cb_getattr: bad fhandle\n");
286 286
287 287 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
288 288 return;
289 289 }
290 290
291 291 /*
292 292 * Figure out which attributes the server wants. We only
293 293 * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
294 294 */
295 295 fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP);
296 296
297 297 /*
298 298 * Don't actually need to create XDR to encode these
299 299 * simple data structures.
300 300 * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
301 301 */
302 302 fap = &resp->obj_attributes;
303 303
304 304 fap->attrmask = 0;
305 305 /* attrlist4_len starts at 0 and increases as attrs are processed */
306 306 fap->attrlist4 = (char *)fdata;
307 307 fap->attrlist4_len = 0;
308 308
309 309 /* don't supply attrs if request was zero */
310 310 if (args->attr_request != 0) {
311 311 if (args->attr_request & FATTR4_CHANGE_MASK) {
312 312 /*
313 313 * If the file is mmapped, then increment the change
314 314 * attribute and return it. This will guarantee that
315 315 * the server will perceive that the file has changed
316 316 * if there is any chance that the client application
317 317 * has changed it. Otherwise, just return the change
318 318 * attribute as it has been updated by nfs4write_deleg.
319 319 */
320 320
321 321 mutex_enter(&rp->r_statelock);
322 322 mapcnt = rp->r_mapcnt;
323 323 rflag = rp->r_flags;
324 324 mutex_exit(&rp->r_statelock);
325 325
326 326 mutex_enter(&rp->r_statev4_lock);
327 327 /*
328 328 * If object mapped, then always return new change.
329 329 * Otherwise, return change if object has dirty
330 330 * pages. If object doesn't have any dirty pages,
331 331 * then all changes have been pushed to server, so
332 332 * reset change to grant change.
333 333 */
334 334 if (mapcnt)
335 335 rp->r_deleg_change++;
336 336 else if (! (rflag & R4DIRTY))
337 337 rp->r_deleg_change = rp->r_deleg_change_grant;
338 338 change = rp->r_deleg_change;
339 339 mutex_exit(&rp->r_statev4_lock);
340 340
341 341 /*
342 342 * Use inline XDR code directly, we know that we
343 343 * going to a memory buffer and it has enough
344 344 * space so it cannot fail.
345 345 */
346 346 IXDR_PUT_U_HYPER(fdata, change);
347 347 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
348 348 fap->attrmask |= FATTR4_CHANGE_MASK;
349 349 }
350 350
351 351 if (args->attr_request & FATTR4_SIZE_MASK) {
352 352 /*
353 353 * Use an atomic add of 0 to fetch a consistent view
354 354 * of r_size; this avoids having to take rw_lock
355 355 * which could cause a deadlock.
356 356 */
357 357 size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0);
358 358
359 359 /*
360 360 * Use inline XDR code directly, we know that we
361 361 * going to a memory buffer and it has enough
362 362 * space so it cannot fail.
363 363 */
364 364 IXDR_PUT_U_HYPER(fdata, size);
365 365 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
366 366 fap->attrmask |= FATTR4_SIZE_MASK;
367 367 }
368 368 }
369 369
370 370 VN_RELE(vp);
371 371
372 372 *cs->statusp = resp->status = NFS4_OK;
373 373 }
374 374
375 375 static void
376 376 cb_getattr_free(nfs_cb_resop4 *resop)
377 377 {
378 378 if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4)
379 379 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr.
380 380 obj_attributes.attrlist4, cb_getattr_bytes);
381 381 }
382 382
383 383 static void
384 384 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
385 385 struct compound_state *cs, struct nfs4_callback_globals *ncg)
386 386 {
387 387 CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall;
388 388 CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall;
389 389 rnode4_t *rp;
390 390 vnode_t *vp;
391 391 struct nfs4_server *sp;
392 392 bool_t found = FALSE;
393 393
394 394 ncg->nfs4_callback_stats.cb_recall.value.ui64++;
395 395
396 396 ASSERT(req->rq_prog >= NFS4_CALLBACK);
397 397 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
398 398
399 399 #ifdef DEBUG
400 400 /*
401 401 * error injection hook: set cb_recall_fail global to
402 402 * NFS4 pcol error to be returned
403 403 */
404 404 if (cb4_recall_fail != NFS4_OK) {
405 405 *cs->statusp = resp->status = cb4_recall_fail;
406 406 return;
407 407 }
408 408 #endif
409 409
410 410 mutex_enter(&ncg->nfs4_cb_lock);
411 411 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
412 412 mutex_exit(&ncg->nfs4_cb_lock);
413 413
414 414 if (nfs4_server_vlock(sp, 0) == FALSE) {
415 415
416 416 CB_WARN("cb_recall: cannot find server\n");
417 417
418 418 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
419 419 return;
420 420 }
421 421
422 422 /*
423 423 * Search the delegation list for a matching file handle
424 424 * AND stateid; mutex on sp prevents the list from changing.
425 425 */
426 426
427 427 rp = list_head(&sp->s_deleg_list);
428 428 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
429 429 mutex_enter(&rp->r_statev4_lock);
430 430
431 431 /* check both state id and file handle! */
432 432
433 433 if ((bcmp(&rp->r_deleg_stateid, &args->stateid,
434 434 sizeof (stateid4)) == 0)) {
435 435 nfs4_fhandle_t fhandle;
436 436
437 437 sfh4_copyval(rp->r_fh, &fhandle);
438 438 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
439 439 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
440 440 fhandle.fh_len) == 0)) {
441 441
442 442 found = TRUE;
443 443 break;
444 444 } else {
445 445 #ifdef DEBUG
446 446 CB_WARN("cb_recall: stateid OK, bad fh");
447 447 #endif
448 448 }
449 449 }
450 450 #ifdef DEBUG
451 451 if (bcmp(&args->stateid, &nfs4_deleg_any,
452 452 sizeof (stateid4)) == 0) {
453 453
454 454 found = TRUE;
455 455 break;
456 456 }
457 457 #endif
458 458 mutex_exit(&rp->r_statev4_lock);
459 459 }
460 460
461 461 /*
462 462 * VN_HOLD the vnode before releasing s_lock to guarantee
463 463 * we have a valid vnode reference. The async thread will
464 464 * release the hold when it's done.
465 465 */
466 466 if (found == TRUE) {
467 467 mutex_exit(&rp->r_statev4_lock);
468 468 vp = RTOV4(rp);
469 469 VN_HOLD(vp);
470 470 }
471 471 mutex_exit(&sp->s_lock);
472 472 nfs4_server_rele(sp);
473 473
474 474 if (found == FALSE) {
475 475
476 476 CB_WARN("cb_recall: bad stateid\n");
477 477
478 478 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
479 479 return;
480 480 }
481 481
482 482 /* Fire up a thread to do the delegreturn */
483 483 nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN,
484 484 args->truncate);
485 485
486 486 *cs->statusp = resp->status = 0;
487 487 }
488 488
489 489 /* ARGSUSED */
490 490 static void
491 491 cb_recall_free(nfs_cb_resop4 *resop)
492 492 {
493 493 /* nothing to do here, cb_recall doesn't kmem_alloc */
494 494 }
495 495
496 496 /*
497 497 * This function handles the CB_NULL proc call from an NFSv4 Server.
498 498 *
499 499 * We take note that the server has sent a CB_NULL for later processing
500 500 * in the recovery logic. It is noted so we may pause slightly after the
501 501 * setclientid and before reopening files. The pause is to allow the
502 502 * NFSv4 Server time to receive the CB_NULL reply and adjust any of
503 503 * its internal structures such that it has the opportunity to grant
504 504 * delegations to reopened files.
505 505 *
506 506 */
507 507
508 508 /* ARGSUSED */
509 509 static void
510 510 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
511 511 struct nfs4_callback_globals *ncg)
512 512 {
513 513 struct nfs4_server *sp;
514 514
515 515 ncg->nfs4_callback_stats.cb_null.value.ui64++;
516 516
517 517 ASSERT(req->rq_prog >= NFS4_CALLBACK);
518 518 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
519 519
520 520 mutex_enter(&ncg->nfs4_cb_lock);
521 521 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
522 522 mutex_exit(&ncg->nfs4_cb_lock);
523 523
524 524 if (nfs4_server_vlock(sp, 0) != FALSE) {
525 525 sp->s_flags |= N4S_CB_PINGED;
526 526 cv_broadcast(&sp->wait_cb_null);
527 527 mutex_exit(&sp->s_lock);
528 528 nfs4_server_rele(sp);
529 529 }
530 530 }
531 531
532 532 /*
533 533 * cb_illegal args: void
534 534 * res : status (NFS4ERR_OP_CB_ILLEGAL)
535 535 */
536 536 /* ARGSUSED */
537 537 static void
538 538 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
539 539 struct compound_state *cs, struct nfs4_callback_globals *ncg)
540 540 {
541 541 CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal;
542 542
543 543 ncg->nfs4_callback_stats.cb_illegal.value.ui64++;
544 544 resop->resop = OP_CB_ILLEGAL;
545 545 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
546 546 }
547 547
548 548 static void
549 549 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
550 550 struct nfs4_callback_globals *ncg)
551 551 {
552 552 uint_t i;
|
↓ open down ↓ |
552 lines elided |
↑ open up ↑ |
553 553 struct compound_state cs;
554 554 nfs_cb_argop4 *argop;
555 555 nfs_cb_resop4 *resop, *new_res;
556 556 uint_t op;
557 557
558 558 bzero(&cs, sizeof (cs));
559 559 cs.statusp = &resp->status;
560 560 cs.cont = TRUE;
561 561
562 562 /*
563 - * Form a reply tag by copying over the reqeuest tag.
563 + * Form a reply tag by copying over the request tag.
564 564 */
565 565 resp->tag.utf8string_len = args->tag.utf8string_len;
566 - resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len,
567 - KM_SLEEP);
568 - bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
569 - args->tag.utf8string_len);
566 + if (args->tag.utf8string_len != 0) {
567 + resp->tag.utf8string_val =
568 + kmem_alloc(resp->tag.utf8string_len, KM_SLEEP);
569 + bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
570 + args->tag.utf8string_len);
571 + } else {
572 + resp->tag.utf8string_val = NULL;
573 + }
570 574
571 575 /*
572 576 * XXX for now, minorversion should be zero
573 577 */
574 578 if (args->minorversion != CB4_MINORVERSION) {
575 579 resp->array_len = 0;
576 580 resp->array = NULL;
577 581 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
578 582 return;
579 583 }
580 584
581 585 #ifdef DEBUG
582 586 /*
583 587 * Verify callback_ident. It doesn't really matter if it's wrong
584 588 * because we don't really use callback_ident -- we use prog number
585 589 * of the RPC request instead. In this case, just print a DEBUG
586 590 * console message to reveal brokenness of cbclient (at bkoff/cthon).
587 591 */
588 592 if (args->callback_ident != req->rq_prog)
589 593 zcmn_err(getzoneid(), CE_WARN,
590 594 "cb_compound: cb_client using wrong "
591 595 "callback_ident(%d), should be %d",
592 596 args->callback_ident, req->rq_prog);
593 597 #endif
594 598
595 599 resp->array_len = args->array_len;
596 600 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4),
597 601 KM_SLEEP);
598 602
599 603 for (i = 0; i < args->array_len && cs.cont; i++) {
600 604
601 605 argop = &args->array[i];
602 606 resop = &resp->array[i];
603 607 resop->resop = argop->argop;
604 608 op = (uint_t)resop->resop;
605 609
606 610 switch (op) {
607 611
608 612 case OP_CB_GETATTR:
609 613
610 614 cb_getattr(argop, resop, req, &cs, ncg);
611 615 break;
612 616
613 617 case OP_CB_RECALL:
614 618
615 619 cb_recall(argop, resop, req, &cs, ncg);
616 620 break;
617 621
618 622 case OP_CB_ILLEGAL:
619 623
620 624 /* fall through */
621 625
622 626 default:
623 627 /*
624 628 * Handle OP_CB_ILLEGAL and any undefined opcode.
625 629 * Currently, the XDR code will return BADXDR
626 630 * if cb op doesn't decode to legal value, so
627 631 * it really only handles OP_CB_ILLEGAL.
628 632 */
629 633 op = OP_CB_ILLEGAL;
630 634 cb_illegal(argop, resop, req, &cs, ncg);
631 635 }
632 636
633 637 if (*cs.statusp != NFS4_OK)
634 638 cs.cont = FALSE;
635 639
636 640 /*
637 641 * If not at last op, and if we are to stop, then
638 642 * compact the results array.
639 643 */
640 644 if ((i + 1) < args->array_len && !cs.cont) {
641 645
642 646 new_res = kmem_alloc(
643 647 (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP);
644 648 bcopy(resp->array,
645 649 new_res, (i+1) * sizeof (nfs_cb_resop4));
646 650 kmem_free(resp->array,
647 651 args->array_len * sizeof (nfs_cb_resop4));
648 652
649 653 resp->array_len = i + 1;
650 654 resp->array = new_res;
651 655 }
652 656 }
653 657
654 658 }
655 659
656 660 static void
657 661 cb_compound_free(CB_COMPOUND4res *resp)
658 662 {
659 663 uint_t i, op;
660 664 nfs_cb_resop4 *resop;
661 665
662 666 if (resp->tag.utf8string_val) {
663 667 UTF8STRING_FREE(resp->tag)
664 668 }
665 669
666 670 for (i = 0; i < resp->array_len; i++) {
667 671
668 672 resop = &resp->array[i];
669 673 op = (uint_t)resop->resop;
670 674
671 675 switch (op) {
672 676
673 677 case OP_CB_GETATTR:
674 678
675 679 cb_getattr_free(resop);
676 680 break;
677 681
678 682 case OP_CB_RECALL:
679 683
680 684 cb_recall_free(resop);
681 685 break;
682 686
683 687 default:
684 688 break;
685 689 }
686 690 }
687 691
688 692 if (resp->array != NULL) {
689 693 kmem_free(resp->array,
690 694 resp->array_len * sizeof (nfs_cb_resop4));
691 695 }
692 696 }
693 697
694 698 static void
695 699 cb_dispatch(struct svc_req *req, SVCXPRT *xprt)
696 700 {
697 701 CB_COMPOUND4args args;
698 702 CB_COMPOUND4res res;
699 703 struct nfs4_callback_globals *ncg;
700 704
701 705 bool_t (*xdr_args)(), (*xdr_res)();
702 706 void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *,
703 707 struct nfs4_callback_globals *);
704 708 void (*freeproc)(CB_COMPOUND4res *);
705 709
706 710 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
707 711 ASSERT(ncg != NULL);
708 712
709 713 ncg->nfs4_callback_stats.cb_dispatch.value.ui64++;
710 714
711 715 switch (req->rq_proc) {
712 716 case CB_NULL:
713 717 xdr_args = xdr_void;
714 718 xdr_res = xdr_void;
715 719 proc = cb_null;
716 720 freeproc = NULL;
717 721 break;
718 722
719 723 case CB_COMPOUND:
720 724 xdr_args = xdr_CB_COMPOUND4args_clnt;
721 725 xdr_res = xdr_CB_COMPOUND4res;
722 726 proc = cb_compound;
723 727 freeproc = cb_compound_free;
724 728 break;
725 729
726 730 default:
727 731 CB_WARN("cb_dispatch: no proc\n");
728 732 svcerr_noproc(xprt);
729 733 return;
730 734 }
731 735
732 736 args.tag.utf8string_val = NULL;
733 737 args.array = NULL;
734 738
735 739 if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) {
736 740
737 741 CB_WARN("cb_dispatch: cannot getargs\n");
738 742 svcerr_decode(xprt);
739 743 return;
740 744 }
741 745
742 746 (*proc)(&args, &res, req, ncg);
743 747
744 748 if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) {
745 749
746 750 CB_WARN("cb_dispatch: bad sendreply\n");
747 751 svcerr_systemerr(xprt);
748 752 }
749 753
750 754 if (freeproc)
751 755 (*freeproc)(&res);
752 756
753 757 if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) {
754 758
755 759 CB_WARN("cb_dispatch: bad freeargs\n");
756 760 }
757 761 }
758 762
759 763 static rpcprog_t
760 764 nfs4_getnextprogram(struct nfs4_callback_globals *ncg)
761 765 {
762 766 int i, j;
763 767
764 768 j = ncg->nfs4_program_hint;
765 769 for (i = 0; i < nfs4_num_prognums; i++, j++) {
766 770
767 771 if (j >= nfs4_num_prognums)
768 772 j = 0;
769 773
770 774 if (ncg->nfs4prog2server[j] == NULL) {
771 775 ncg->nfs4_program_hint = j+1;
772 776 return (j+NFS4_CALLBACK);
773 777 }
774 778 }
775 779
776 780 return (0);
777 781 }
778 782
779 783 void
780 784 nfs4callback_destroy(nfs4_server_t *np)
781 785 {
782 786 struct nfs4_callback_globals *ncg;
783 787 int i;
784 788
785 789 if (np->s_program == 0)
786 790 return;
787 791
788 792 ncg = np->zone_globals;
789 793 i = np->s_program - NFS4_CALLBACK;
790 794
791 795 mutex_enter(&ncg->nfs4_cb_lock);
792 796
793 797 ASSERT(ncg->nfs4prog2server[i] == np);
794 798
795 799 ncg->nfs4prog2server[i] = NULL;
796 800
797 801 if (i < ncg->nfs4_program_hint)
798 802 ncg->nfs4_program_hint = i;
799 803
800 804 mutex_exit(&ncg->nfs4_cb_lock);
801 805 }
802 806
803 807 /*
804 808 * nfs4_setport - This function saves a netid and univeral address for
805 809 * the callback program. These values will be used during setclientid.
806 810 */
807 811 static void
808 812 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto,
809 813 struct nfs4_callback_globals *ncg)
810 814 {
811 815 struct nfs4_cb_port *p;
812 816 bool_t found = FALSE;
813 817
814 818 ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock));
815 819
816 820 p = list_head(&ncg->nfs4_cb_ports);
817 821 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
818 822 if (strcmp(p->netid, netid) == 0) {
819 823 found = TRUE;
820 824 break;
821 825 }
822 826 }
823 827 if (found == TRUE)
824 828 (void) strcpy(p->uaddr, uaddr);
825 829 else {
826 830 p = kmem_alloc(sizeof (*p), KM_SLEEP);
827 831
828 832 (void) strcpy(p->uaddr, uaddr);
829 833 (void) strcpy(p->netid, netid);
830 834 (void) strcpy(p->protofmly, protofmly);
831 835 (void) strcpy(p->proto, proto);
832 836 list_insert_head(&ncg->nfs4_cb_ports, p);
833 837 }
834 838 }
835 839
836 840 /*
837 841 * nfs4_cb_args - This function is used to construct the callback
838 842 * portion of the arguments needed for setclientid.
839 843 */
840 844
841 845 void
842 846 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args)
843 847 {
844 848 struct nfs4_cb_port *p;
845 849 bool_t found = FALSE;
846 850 rpcprog_t pgm;
847 851 struct nfs4_callback_globals *ncg = np->zone_globals;
848 852
849 853 /*
850 854 * This server structure may already have a program number
851 855 * assigned to it. This happens when the client has to
852 856 * re-issue SETCLIENTID. Just re-use the information.
853 857 */
854 858 if (np->s_program >= NFS4_CALLBACK &&
855 859 np->s_program < NFS4_CALLBACK + nfs4_num_prognums)
856 860 nfs4callback_destroy(np);
857 861
858 862 mutex_enter(&ncg->nfs4_cb_lock);
859 863
860 864 p = list_head(&ncg->nfs4_cb_ports);
861 865 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
862 866 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 &&
863 867 strcmp(p->proto, knc->knc_proto) == 0) {
864 868 found = TRUE;
865 869 break;
866 870 }
867 871 }
868 872
869 873 if (found == FALSE) {
870 874
871 875 NFS4_DEBUG(nfs4_callback_debug,
872 876 (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n",
873 877 knc->knc_protofmly, knc->knc_proto));
874 878
875 879 args->callback.cb_program = 0;
876 880 args->callback.cb_location.r_netid = NULL;
877 881 args->callback.cb_location.r_addr = NULL;
878 882 args->callback_ident = 0;
879 883 mutex_exit(&ncg->nfs4_cb_lock);
880 884 return;
881 885 }
882 886
883 887 if ((pgm = nfs4_getnextprogram(ncg)) == 0) {
884 888 CB_WARN("nfs4_cb_args: out of program numbers\n");
885 889
886 890 args->callback.cb_program = 0;
887 891 args->callback.cb_location.r_netid = NULL;
888 892 args->callback.cb_location.r_addr = NULL;
889 893 args->callback_ident = 0;
890 894 mutex_exit(&ncg->nfs4_cb_lock);
891 895 return;
892 896 }
893 897
894 898 ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np;
895 899 args->callback.cb_program = pgm;
896 900 args->callback.cb_location.r_netid = p->netid;
897 901 args->callback.cb_location.r_addr = p->uaddr;
898 902 args->callback_ident = pgm;
899 903
900 904 np->s_program = pgm;
901 905
902 906 mutex_exit(&ncg->nfs4_cb_lock);
903 907 }
904 908
905 909 static int
906 910 nfs4_dquery(struct nfs4_svc_args *arg, model_t model)
907 911 {
908 912 file_t *fp;
909 913 vnode_t *vp;
910 914 rnode4_t *rp;
911 915 int error;
912 916 STRUCT_HANDLE(nfs4_svc_args, uap);
913 917
914 918 STRUCT_SET_HANDLE(uap, model, arg);
915 919
916 920 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
917 921 return (EBADF);
918 922
919 923 vp = fp->f_vnode;
920 924
921 925 if (vp == NULL || vp->v_type != VREG ||
922 926 !vn_matchops(vp, nfs4_vnodeops)) {
923 927 releasef(STRUCT_FGET(uap, fd));
924 928 return (EBADF);
925 929 }
926 930
927 931 rp = VTOR4(vp);
928 932
929 933 /*
930 934 * I can't convince myself that we need locking here. The
931 935 * rnode cannot disappear and the value returned is instantly
932 936 * stale anway, so why bother?
933 937 */
934 938
935 939 error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type);
936 940 releasef(STRUCT_FGET(uap, fd));
937 941 return (error);
938 942 }
939 943
940 944
941 945 /*
942 946 * NFS4 client system call. This service does the
943 947 * necessary initialization for the callback program.
944 948 * This is fashioned after the server side interaction
945 949 * between nfsd and the kernel. On the client, the
946 950 * mount command forks and the child process does the
947 951 * necessary interaction with the kernel.
948 952 *
949 953 * uap->fd is the fd of an open transport provider
950 954 */
951 955 int
952 956 nfs4_svc(struct nfs4_svc_args *arg, model_t model)
953 957 {
954 958 file_t *fp;
955 959 int error;
956 960 int readsize;
957 961 char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE];
958 962 char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE];
959 963 size_t len;
960 964 STRUCT_HANDLE(nfs4_svc_args, uap);
961 965 struct netbuf addrmask;
962 966 int cmd;
963 967 SVCMASTERXPRT *cb_xprt;
964 968 struct nfs4_callback_globals *ncg;
965 969
966 970 #ifdef lint
967 971 model = model; /* STRUCT macros don't always refer to it */
968 972 #endif
969 973
970 974 STRUCT_SET_HANDLE(uap, model, arg);
971 975
972 976 if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY)
973 977 return (nfs4_dquery(arg, model));
974 978
975 979 if (secpolicy_nfs(CRED()) != 0)
976 980 return (EPERM);
977 981
978 982 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
979 983 return (EBADF);
980 984
981 985 /*
982 986 * Set read buffer size to rsize
983 987 * and add room for RPC headers.
984 988 */
985 989 readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
986 990 if (readsize < RPC_MAXDATASIZE)
987 991 readsize = RPC_MAXDATASIZE;
988 992
989 993 error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
990 994 KNC_STRSIZE, &len);
991 995 if (error) {
992 996 releasef(STRUCT_FGET(uap, fd));
993 997 return (error);
994 998 }
995 999
996 1000 cmd = STRUCT_FGET(uap, cmd);
997 1001
998 1002 if (cmd & NFS4_KRPC_START) {
999 1003 addrmask.len = STRUCT_FGET(uap, addrmask.len);
1000 1004 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
1001 1005 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
1002 1006 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
1003 1007 addrmask.len);
1004 1008 if (error) {
1005 1009 releasef(STRUCT_FGET(uap, fd));
1006 1010 kmem_free(addrmask.buf, addrmask.maxlen);
1007 1011 return (error);
1008 1012 }
1009 1013 }
1010 1014 else
1011 1015 addrmask.buf = NULL;
1012 1016
1013 1017 error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr,
1014 1018 sizeof (uaddr), &len);
1015 1019 if (error) {
1016 1020 releasef(STRUCT_FGET(uap, fd));
1017 1021 if (addrmask.buf)
1018 1022 kmem_free(addrmask.buf, addrmask.maxlen);
1019 1023 return (error);
1020 1024 }
1021 1025
1022 1026 error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly,
1023 1027 sizeof (protofmly), &len);
1024 1028 if (error) {
1025 1029 releasef(STRUCT_FGET(uap, fd));
1026 1030 if (addrmask.buf)
1027 1031 kmem_free(addrmask.buf, addrmask.maxlen);
1028 1032 return (error);
1029 1033 }
1030 1034
1031 1035 error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto,
1032 1036 sizeof (proto), &len);
1033 1037 if (error) {
1034 1038 releasef(STRUCT_FGET(uap, fd));
1035 1039 if (addrmask.buf)
1036 1040 kmem_free(addrmask.buf, addrmask.maxlen);
1037 1041 return (error);
1038 1042 }
1039 1043
1040 1044 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1041 1045 ASSERT(ncg != NULL);
1042 1046
1043 1047 mutex_enter(&ncg->nfs4_cb_lock);
1044 1048 if (cmd & NFS4_SETPORT)
1045 1049 nfs4_setport(buf, uaddr, protofmly, proto, ncg);
1046 1050
1047 1051 if (cmd & NFS4_KRPC_START) {
1048 1052 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt,
1049 1053 &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE);
1050 1054 if (error) {
1051 1055 CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1052 1056 error);
1053 1057 kmem_free(addrmask.buf, addrmask.maxlen);
1054 1058 }
1055 1059 }
1056 1060
1057 1061 mutex_exit(&ncg->nfs4_cb_lock);
1058 1062 releasef(STRUCT_FGET(uap, fd));
1059 1063 return (error);
1060 1064 }
1061 1065
1062 1066 struct nfs4_callback_globals *
1063 1067 nfs4_get_callback_globals(void)
1064 1068 {
1065 1069 return (zone_getspecific(nfs4_callback_zone_key, nfs_zone()));
1066 1070 }
1067 1071
1068 1072 static void *
1069 1073 nfs4_callback_init_zone(zoneid_t zoneid)
1070 1074 {
1071 1075 kstat_t *nfs4_callback_kstat;
1072 1076 struct nfs4_callback_globals *ncg;
1073 1077
1074 1078 ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
1075 1079
1076 1080 ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums *
1077 1081 sizeof (struct nfs4_server *), KM_SLEEP);
1078 1082
1079 1083 /* initialize the dlist */
1080 1084 mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL);
1081 1085 list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode),
1082 1086 offsetof(struct nfs4_dnode, linkage));
1083 1087
1084 1088 /* initialize cb_port list */
1085 1089 mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL);
1086 1090 list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port),
1087 1091 offsetof(struct nfs4_cb_port, linkage));
1088 1092
1089 1093 /* get our own copy of the kstats */
1090 1094 bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats,
1091 1095 sizeof (nfs4_callback_stats_tmpl));
1092 1096 /* register "nfs:0:nfs4_callback_stats" for this zone */
1093 1097 if ((nfs4_callback_kstat =
1094 1098 kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1095 1099 KSTAT_TYPE_NAMED,
1096 1100 sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t),
1097 1101 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
1098 1102 zoneid)) != NULL) {
1099 1103 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats;
1100 1104 kstat_install(nfs4_callback_kstat);
1101 1105 }
1102 1106 return (ncg);
1103 1107 }
1104 1108
1105 1109 static void
1106 1110 nfs4_discard_delegations(struct nfs4_callback_globals *ncg)
1107 1111 {
1108 1112 nfs4_server_t *sp;
1109 1113 int i, num_removed;
1110 1114
1111 1115 /*
1112 1116 * It's OK here to just run through the registered "programs", as
1113 1117 * servers without programs won't have any delegations to handle.
1114 1118 */
1115 1119 for (i = 0; i < nfs4_num_prognums; i++) {
1116 1120 rnode4_t *rp;
1117 1121
1118 1122 mutex_enter(&ncg->nfs4_cb_lock);
1119 1123 sp = ncg->nfs4prog2server[i];
1120 1124 mutex_exit(&ncg->nfs4_cb_lock);
1121 1125
1122 1126 if (nfs4_server_vlock(sp, 1) == FALSE)
1123 1127 continue;
1124 1128 num_removed = 0;
1125 1129 while ((rp = list_head(&sp->s_deleg_list)) != NULL) {
1126 1130 mutex_enter(&rp->r_statev4_lock);
1127 1131 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1128 1132 /*
1129 1133 * We need to take matters into our own hands,
1130 1134 * as nfs4delegreturn_cleanup_impl() won't
1131 1135 * remove this from the list.
1132 1136 */
1133 1137 list_remove(&sp->s_deleg_list, rp);
1134 1138 mutex_exit(&rp->r_statev4_lock);
1135 1139 nfs4_dec_state_ref_count_nolock(sp,
1136 1140 VTOMI4(RTOV4(rp)));
1137 1141 num_removed++;
1138 1142 continue;
1139 1143 }
1140 1144 mutex_exit(&rp->r_statev4_lock);
1141 1145 VN_HOLD(RTOV4(rp));
1142 1146 mutex_exit(&sp->s_lock);
1143 1147 /*
1144 1148 * The following will remove the node from the list.
1145 1149 */
1146 1150 nfs4delegreturn_cleanup_impl(rp, sp, ncg);
1147 1151 VN_RELE(RTOV4(rp));
1148 1152 mutex_enter(&sp->s_lock);
1149 1153 }
1150 1154 mutex_exit(&sp->s_lock);
1151 1155 /* each removed list node reles a reference */
1152 1156 while (num_removed-- > 0)
1153 1157 nfs4_server_rele(sp);
1154 1158 /* remove our reference for nfs4_server_vlock */
1155 1159 nfs4_server_rele(sp);
1156 1160 }
1157 1161 }
1158 1162
1159 1163 /* ARGSUSED */
1160 1164 static void
1161 1165 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data)
1162 1166 {
1163 1167 struct nfs4_callback_globals *ncg = data;
1164 1168
1165 1169 /*
1166 1170 * Clean pending delegation return list.
1167 1171 */
1168 1172 nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD);
1169 1173
1170 1174 /*
1171 1175 * Discard all delegations.
1172 1176 */
1173 1177 nfs4_discard_delegations(ncg);
1174 1178 }
1175 1179
1176 1180 static void
1177 1181 nfs4_callback_fini_zone(zoneid_t zoneid, void *data)
1178 1182 {
1179 1183 struct nfs4_callback_globals *ncg = data;
1180 1184 struct nfs4_cb_port *p;
1181 1185 nfs4_server_t *sp, *next;
1182 1186 nfs4_server_t freelist;
1183 1187 int i;
1184 1188
1185 1189 kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid);
1186 1190
1187 1191 /*
1188 1192 * Discard all delegations that may have crept in since we did the
1189 1193 * _shutdown.
1190 1194 */
1191 1195 nfs4_discard_delegations(ncg);
1192 1196 /*
1193 1197 * We're completely done with this zone and all associated
1194 1198 * nfs4_server_t's. Any remaining nfs4_server_ts should only have one
1195 1199 * more reference outstanding -- the reference we didn't release in
1196 1200 * nfs4_renew_lease_thread().
1197 1201 *
1198 1202 * Here we need to run through the global nfs4_server_lst as we need to
1199 1203 * deal with nfs4_server_ts without programs, as they also have threads
1200 1204 * created for them, and so have outstanding references that we need to
1201 1205 * release.
1202 1206 */
1203 1207 freelist.forw = &freelist;
1204 1208 freelist.back = &freelist;
1205 1209 mutex_enter(&nfs4_server_lst_lock);
1206 1210 sp = nfs4_server_lst.forw;
1207 1211 while (sp != &nfs4_server_lst) {
1208 1212 next = sp->forw;
1209 1213 if (sp->zoneid == zoneid) {
1210 1214 remque(sp);
1211 1215 insque(sp, &freelist);
1212 1216 }
1213 1217 sp = next;
1214 1218 }
1215 1219 mutex_exit(&nfs4_server_lst_lock);
1216 1220
1217 1221 sp = freelist.forw;
1218 1222 while (sp != &freelist) {
1219 1223 next = sp->forw;
1220 1224 nfs4_server_rele(sp); /* free the list's reference */
1221 1225 sp = next;
1222 1226 }
1223 1227
1224 1228 #ifdef DEBUG
1225 1229 for (i = 0; i < nfs4_num_prognums; i++) {
1226 1230 ASSERT(ncg->nfs4prog2server[i] == NULL);
1227 1231 }
1228 1232 #endif
1229 1233 kmem_free(ncg->nfs4prog2server, nfs4_num_prognums *
1230 1234 sizeof (struct nfs4_server *));
1231 1235
1232 1236 mutex_enter(&ncg->nfs4_cb_lock);
1233 1237 while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) {
1234 1238 list_remove(&ncg->nfs4_cb_ports, p);
1235 1239 kmem_free(p, sizeof (*p));
1236 1240 }
1237 1241 list_destroy(&ncg->nfs4_cb_ports);
1238 1242 mutex_destroy(&ncg->nfs4_cb_lock);
1239 1243 list_destroy(&ncg->nfs4_dlist);
1240 1244 mutex_destroy(&ncg->nfs4_dlist_lock);
1241 1245 kmem_free(ncg, sizeof (*ncg));
1242 1246 }
1243 1247
1244 1248 void
1245 1249 nfs4_callback_init(void)
1246 1250 {
1247 1251 int i;
1248 1252 SVC_CALLOUT *nfs4_cb_sc;
1249 1253
1250 1254 /* initialize the callback table */
1251 1255 nfs4_cb_sc = kmem_alloc(nfs4_num_prognums *
1252 1256 sizeof (SVC_CALLOUT), KM_SLEEP);
1253 1257
1254 1258 for (i = 0; i < nfs4_num_prognums; i++) {
1255 1259 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i;
1256 1260 nfs4_cb_sc[i].sc_versmin = NFS_CB;
1257 1261 nfs4_cb_sc[i].sc_versmax = NFS_CB;
1258 1262 nfs4_cb_sc[i].sc_dispatch = cb_dispatch;
1259 1263 }
1260 1264
1261 1265 nfs4_cb_sct.sct_size = nfs4_num_prognums;
1262 1266 nfs4_cb_sct.sct_free = FALSE;
1263 1267 nfs4_cb_sct.sct_sc = nfs4_cb_sc;
1264 1268
1265 1269 /*
1266 1270 * Compute max bytes required for dyamically allocated parts
1267 1271 * of cb_getattr reply. Only size and change are supported now.
1268 1272 * If CB_GETATTR is changed to reply with additional attrs,
1269 1273 * additional sizes must be added below.
1270 1274 *
1271 1275 * fattr4_change + fattr4_size == uint64_t + uint64_t
1272 1276 */
1273 1277 cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT;
1274 1278
1275 1279 zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone,
1276 1280 nfs4_callback_shutdown_zone, nfs4_callback_fini_zone);
1277 1281 }
1278 1282
1279 1283 void
1280 1284 nfs4_callback_fini(void)
1281 1285 {
1282 1286 }
1283 1287
1284 1288 /*
1285 1289 * NB: This function can be called from the *wrong* zone (ie, the zone that
1286 1290 * 'rp' belongs to and the caller's zone may not be the same). This can happen
1287 1291 * if the zone is going away and we get called from nfs4_async_inactive(). In
1288 1292 * this case the globals will be NULL and we won't update the counters, which
1289 1293 * doesn't matter as the zone is going away anyhow.
1290 1294 */
1291 1295 static void
1292 1296 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np,
1293 1297 struct nfs4_callback_globals *ncg)
1294 1298 {
1295 1299 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1296 1300 boolean_t need_rele = B_FALSE;
1297 1301
1298 1302 /*
1299 1303 * Caller must be holding mi_recovlock in read mode
1300 1304 * to call here. This is provided by start_op.
1301 1305 * Delegation management requires to grab s_lock
1302 1306 * first and then r_statev4_lock.
1303 1307 */
1304 1308
1305 1309 if (np == NULL) {
1306 1310 np = find_nfs4_server_all(mi, 1);
1307 1311 if (np == NULL)
1308 1312 return;
1309 1313 need_rele = B_TRUE;
1310 1314 } else {
1311 1315 mutex_enter(&np->s_lock);
1312 1316 }
1313 1317
1314 1318 mutex_enter(&rp->r_statev4_lock);
1315 1319
1316 1320 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1317 1321 mutex_exit(&rp->r_statev4_lock);
1318 1322 mutex_exit(&np->s_lock);
1319 1323 if (need_rele)
1320 1324 nfs4_server_rele(np);
1321 1325 return;
1322 1326 }
1323 1327
1324 1328 /*
1325 1329 * Free the cred originally held when
1326 1330 * the delegation was granted. Caller must
1327 1331 * hold this cred if it wants to use it after
1328 1332 * this call.
1329 1333 */
1330 1334 crfree(rp->r_deleg_cred);
1331 1335 rp->r_deleg_cred = NULL;
1332 1336 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1333 1337 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1334 1338 rp->r_deleg_needs_recall = FALSE;
1335 1339 rp->r_deleg_return_pending = FALSE;
1336 1340
1337 1341 /*
1338 1342 * Remove the rnode from the server's list and
1339 1343 * update the ref counts.
1340 1344 */
1341 1345 list_remove(&np->s_deleg_list, rp);
1342 1346 mutex_exit(&rp->r_statev4_lock);
1343 1347 nfs4_dec_state_ref_count_nolock(np, mi);
1344 1348 mutex_exit(&np->s_lock);
1345 1349 /* removed list node removes a reference */
1346 1350 nfs4_server_rele(np);
1347 1351 if (need_rele)
1348 1352 nfs4_server_rele(np);
1349 1353 if (ncg != NULL)
1350 1354 ncg->nfs4_callback_stats.delegations.value.ui64--;
1351 1355 }
1352 1356
1353 1357 void
1354 1358 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np)
1355 1359 {
1356 1360 struct nfs4_callback_globals *ncg;
1357 1361
1358 1362 if (np != NULL) {
1359 1363 ncg = np->zone_globals;
1360 1364 } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) {
1361 1365 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1362 1366 ASSERT(ncg != NULL);
1363 1367 } else {
1364 1368 /*
1365 1369 * Request coming from the wrong zone.
1366 1370 */
1367 1371 ASSERT(getzoneid() == GLOBAL_ZONEID);
1368 1372 ncg = NULL;
1369 1373 }
1370 1374
1371 1375 nfs4delegreturn_cleanup_impl(rp, np, ncg);
1372 1376 }
1373 1377
1374 1378 static void
1375 1379 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1376 1380 cred_t *cr, vnode_t *vp)
1377 1381 {
1378 1382 if (error != ETIMEDOUT && error != EINTR &&
1379 1383 !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1380 1384 lost_rqstp->lr_op = 0;
1381 1385 return;
1382 1386 }
1383 1387
1384 1388 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1385 1389 "nfs4close_save_lost_rqst: error %d", error));
1386 1390
1387 1391 lost_rqstp->lr_op = OP_DELEGRETURN;
1388 1392 /*
1389 1393 * The vp is held and rele'd via the recovery code.
1390 1394 * See nfs4_save_lost_rqst.
1391 1395 */
1392 1396 lost_rqstp->lr_vp = vp;
1393 1397 lost_rqstp->lr_dvp = NULL;
1394 1398 lost_rqstp->lr_oop = NULL;
1395 1399 lost_rqstp->lr_osp = NULL;
1396 1400 lost_rqstp->lr_lop = NULL;
1397 1401 lost_rqstp->lr_cr = cr;
1398 1402 lost_rqstp->lr_flk = NULL;
1399 1403 lost_rqstp->lr_putfirst = FALSE;
1400 1404 }
1401 1405
1402 1406 static void
1403 1407 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep)
1404 1408 {
1405 1409 COMPOUND4args_clnt args;
1406 1410 COMPOUND4res_clnt res;
1407 1411 nfs_argop4 argops[3];
1408 1412 nfs4_ga_res_t *garp = NULL;
1409 1413 hrtime_t t;
1410 1414 int numops;
1411 1415 int doqueue = 1;
1412 1416
1413 1417 args.ctag = TAG_DELEGRETURN;
1414 1418
1415 1419 numops = 3; /* PUTFH, GETATTR, DELEGRETURN */
1416 1420
1417 1421 args.array = argops;
1418 1422 args.array_len = numops;
1419 1423
1420 1424 argops[0].argop = OP_CPUTFH;
1421 1425 argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1422 1426
1423 1427 argops[1].argop = OP_GETATTR;
1424 1428 argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1425 1429 argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp));
1426 1430
1427 1431 argops[2].argop = OP_DELEGRETURN;
1428 1432 argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid =
1429 1433 rp->r_deleg_stateid;
1430 1434
1431 1435 t = gethrtime();
1432 1436 rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep);
1433 1437
1434 1438 if (ep->error)
1435 1439 return;
1436 1440
1437 1441 if (res.status == NFS4_OK) {
1438 1442 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
1439 1443 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL);
1440 1444
1441 1445 }
1442 1446 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1443 1447 }
1444 1448
1445 1449 int
1446 1450 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr,
1447 1451 struct nfs4_callback_globals *ncg)
1448 1452 {
1449 1453 vnode_t *vp = RTOV4(rp);
1450 1454 mntinfo4_t *mi = VTOMI4(vp);
1451 1455 nfs4_lost_rqst_t lost_rqst;
1452 1456 nfs4_recov_state_t recov_state;
1453 1457 bool_t needrecov = FALSE, recovonly, done = FALSE;
1454 1458 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1455 1459
1456 1460 ncg->nfs4_callback_stats.delegreturn.value.ui64++;
1457 1461
1458 1462 while (!done) {
1459 1463 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN,
1460 1464 &recov_state, &recovonly);
1461 1465
1462 1466 if (e.error) {
1463 1467 if (flags & NFS4_DR_FORCE) {
1464 1468 (void) nfs_rw_enter_sig(&mi->mi_recovlock,
1465 1469 RW_READER, 0);
1466 1470 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1467 1471 nfs_rw_exit(&mi->mi_recovlock);
1468 1472 }
1469 1473 break;
1470 1474 }
1471 1475
1472 1476 /*
1473 1477 * Check to see if the delegation has already been
1474 1478 * returned by the recovery thread. The state of
1475 1479 * the delegation cannot change at this point due
1476 1480 * to start_fop and the r_deleg_recall_lock.
1477 1481 */
1478 1482 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1479 1483 e.error = 0;
1480 1484 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1481 1485 break;
1482 1486 }
1483 1487
1484 1488 if (recovonly) {
1485 1489 /*
1486 1490 * Delegation will be returned via the
1487 1491 * recovery framework. Build a lost request
1488 1492 * structure, start recovery and get out.
1489 1493 */
1490 1494 nfs4_error_init(&e, EINTR);
1491 1495 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1492 1496 cr, vp);
1493 1497 (void) nfs4_start_recovery(&e, mi, vp,
1494 1498 NULL, &rp->r_deleg_stateid,
1495 1499 lost_rqst.lr_op == OP_DELEGRETURN ?
1496 1500 &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1497 1501 NULL, NULL);
1498 1502 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1499 1503 break;
1500 1504 }
1501 1505
1502 1506 nfs4delegreturn_otw(rp, cr, &e);
1503 1507
1504 1508 /*
1505 1509 * Ignore some errors on delegreturn; no point in marking
1506 1510 * the file dead on a state destroying operation.
1507 1511 */
1508 1512 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) ||
1509 1513 e.stat == NFS4ERR_BADHANDLE ||
1510 1514 e.stat == NFS4ERR_STALE))
1511 1515 needrecov = FALSE;
1512 1516 else
1513 1517 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1514 1518
1515 1519 if (needrecov) {
1516 1520 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1517 1521 cr, vp);
1518 1522 (void) nfs4_start_recovery(&e, mi, vp,
1519 1523 NULL, &rp->r_deleg_stateid,
1520 1524 lost_rqst.lr_op == OP_DELEGRETURN ?
1521 1525 &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1522 1526 NULL, NULL);
1523 1527 } else {
1524 1528 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1525 1529 done = TRUE;
1526 1530 }
1527 1531
1528 1532 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1529 1533 }
1530 1534 return (e.error);
1531 1535 }
1532 1536
1533 1537 /*
1534 1538 * nfs4_resend_delegreturn - used to drive the delegreturn
1535 1539 * operation via the recovery thread.
1536 1540 */
1537 1541 void
1538 1542 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep,
1539 1543 nfs4_server_t *np)
1540 1544 {
1541 1545 rnode4_t *rp = VTOR4(lorp->lr_vp);
1542 1546
1543 1547 /* If the file failed recovery, just quit. */
1544 1548 mutex_enter(&rp->r_statelock);
1545 1549 if (rp->r_flags & R4RECOVERR) {
1546 1550 ep->error = EIO;
1547 1551 }
1548 1552 mutex_exit(&rp->r_statelock);
1549 1553
1550 1554 if (!ep->error)
1551 1555 nfs4delegreturn_otw(rp, lorp->lr_cr, ep);
1552 1556
1553 1557 /*
1554 1558 * If recovery is now needed, then return the error
1555 1559 * and status and let the recovery thread handle it,
1556 1560 * including re-driving another delegreturn. Otherwise,
1557 1561 * just give up and clean up the delegation.
1558 1562 */
1559 1563 if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp))
1560 1564 return;
1561 1565
1562 1566 if (rp->r_deleg_type != OPEN_DELEGATE_NONE)
1563 1567 nfs4delegreturn_cleanup(rp, np);
1564 1568
1565 1569 nfs4_error_zinit(ep);
1566 1570 }
1567 1571
1568 1572 /*
1569 1573 * nfs4delegreturn - general function to return a delegation.
1570 1574 *
1571 1575 * NFS4_DR_FORCE - return the delegation even if start_op fails
1572 1576 * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE
1573 1577 * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1574 1578 * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1575 1579 * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1576 1580 * NFS4_DR_REOPEN - do file reopens, if applicable
1577 1581 */
1578 1582 static int
1579 1583 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg)
1580 1584 {
1581 1585 int error = 0;
1582 1586 cred_t *cr = NULL;
1583 1587 vnode_t *vp;
1584 1588 bool_t needrecov = FALSE;
1585 1589 bool_t rw_entered = FALSE;
1586 1590 bool_t do_reopen;
1587 1591
1588 1592 vp = RTOV4(rp);
1589 1593
1590 1594 /*
1591 1595 * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1592 1596 * discard without doing an otw DELEGRETURN. This may only be used
1593 1597 * by the recovery thread because it bypasses the synchronization
1594 1598 * with r_deleg_recall_lock and mi->mi_recovlock.
1595 1599 */
1596 1600 if (flags == NFS4_DR_DISCARD) {
1597 1601 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1598 1602 return (0);
1599 1603 }
1600 1604
1601 1605 if (flags & NFS4_DR_DID_OP) {
1602 1606 /*
1603 1607 * Caller had already done start_op, which means the
1604 1608 * r_deleg_recall_lock is already held in READ mode
1605 1609 * so we cannot take it in write mode. Return the
1606 1610 * delegation asynchronously.
1607 1611 *
1608 1612 * Remove the NFS4_DR_DID_OP flag so we don't
1609 1613 * get stuck looping through here.
1610 1614 */
1611 1615 VN_HOLD(vp);
1612 1616 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE);
1613 1617 return (0);
1614 1618 }
1615 1619
1616 1620 /*
1617 1621 * Verify we still have a delegation and crhold the credential.
1618 1622 */
1619 1623 mutex_enter(&rp->r_statev4_lock);
1620 1624 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1621 1625 mutex_exit(&rp->r_statev4_lock);
1622 1626 goto out;
1623 1627 }
1624 1628 cr = rp->r_deleg_cred;
1625 1629 ASSERT(cr != NULL);
1626 1630 crhold(cr);
1627 1631 mutex_exit(&rp->r_statev4_lock);
1628 1632
1629 1633 /*
1630 1634 * Push the modified data back to the server synchronously
1631 1635 * before doing DELEGRETURN.
1632 1636 */
1633 1637 if (flags & NFS4_DR_PUSH)
1634 1638 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1635 1639
1636 1640 /*
1637 1641 * Take r_deleg_recall_lock in WRITE mode, this will prevent
1638 1642 * nfs4_is_otw_open_necessary from trying to use the delegation
1639 1643 * while the DELEGRETURN is in progress.
1640 1644 */
1641 1645 (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE);
1642 1646
1643 1647 rw_entered = TRUE;
1644 1648
1645 1649 if (rp->r_deleg_type == OPEN_DELEGATE_NONE)
1646 1650 goto out;
1647 1651
1648 1652 if (flags & NFS4_DR_REOPEN) {
1649 1653 /*
1650 1654 * If R4RECOVERRP is already set, then skip re-opening
1651 1655 * the delegation open streams and go straight to doing
1652 1656 * delegreturn. (XXX if the file has failed recovery, then the
1653 1657 * delegreturn attempt is likely to be futile.)
1654 1658 */
1655 1659 mutex_enter(&rp->r_statelock);
1656 1660 do_reopen = !(rp->r_flags & R4RECOVERRP);
1657 1661 mutex_exit(&rp->r_statelock);
1658 1662
1659 1663 if (do_reopen) {
1660 1664 error = deleg_reopen(vp, &needrecov, ncg, flags);
1661 1665 if (error != 0) {
1662 1666 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL))
1663 1667 == 0)
1664 1668 goto out;
1665 1669 } else if (needrecov) {
1666 1670 if ((flags & NFS4_DR_FORCE) == 0)
1667 1671 goto out;
1668 1672 }
1669 1673 }
1670 1674 }
1671 1675
1672 1676 if (flags & NFS4_DR_DISCARD) {
1673 1677 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1674 1678
1675 1679 mutex_enter(&rp->r_statelock);
1676 1680 /*
1677 1681 * deleg_return_pending is cleared inside of delegation_accept
1678 1682 * when a delegation is accepted. if this flag has been
1679 1683 * cleared, then a new delegation has overwritten the one we
1680 1684 * were about to throw away.
1681 1685 */
1682 1686 if (!rp->r_deleg_return_pending) {
1683 1687 mutex_exit(&rp->r_statelock);
1684 1688 goto out;
1685 1689 }
1686 1690 mutex_exit(&rp->r_statelock);
1687 1691 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
1688 1692 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1689 1693 nfs_rw_exit(&mi->mi_recovlock);
1690 1694 } else {
1691 1695 error = nfs4_do_delegreturn(rp, flags, cr, ncg);
1692 1696 }
1693 1697
1694 1698 out:
1695 1699 if (cr)
1696 1700 crfree(cr);
1697 1701 if (rw_entered)
1698 1702 nfs_rw_exit(&rp->r_deleg_recall_lock);
1699 1703 return (error);
1700 1704 }
1701 1705
1702 1706 int
1703 1707 nfs4delegreturn(rnode4_t *rp, int flags)
1704 1708 {
1705 1709 struct nfs4_callback_globals *ncg;
1706 1710
1707 1711 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1708 1712 ASSERT(ncg != NULL);
1709 1713
1710 1714 return (nfs4delegreturn_impl(rp, flags, ncg));
1711 1715 }
1712 1716
1713 1717 void
1714 1718 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc)
1715 1719 {
1716 1720 struct cb_recall_pass *pp;
1717 1721
1718 1722 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
1719 1723 pp->rp = rp;
1720 1724 pp->flags = flags;
1721 1725 pp->truncate = trunc;
1722 1726
1723 1727 /*
1724 1728 * Fire up a thread to do the actual delegreturn
1725 1729 * Caller must guarantee that the rnode doesn't
1726 1730 * vanish (by calling VN_HOLD).
1727 1731 */
1728 1732
1729 1733 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
1730 1734 minclsyspri);
1731 1735 }
1732 1736
1733 1737 static void
1734 1738 delegreturn_all_thread(rpcprog_t *pp)
1735 1739 {
1736 1740 nfs4_server_t *np;
1737 1741 bool_t found = FALSE;
1738 1742 rpcprog_t prog;
1739 1743 rnode4_t *rp;
1740 1744 vnode_t *vp;
1741 1745 zoneid_t zoneid = getzoneid();
1742 1746 struct nfs4_callback_globals *ncg;
1743 1747
1744 1748 NFS4_DEBUG(nfs4_drat_debug,
1745 1749 (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp));
1746 1750
1747 1751 prog = *pp;
1748 1752 kmem_free(pp, sizeof (*pp));
1749 1753 pp = NULL;
1750 1754
1751 1755 mutex_enter(&nfs4_server_lst_lock);
1752 1756 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
1753 1757 if (np->zoneid == zoneid && np->s_program == prog) {
1754 1758 mutex_enter(&np->s_lock);
1755 1759 found = TRUE;
1756 1760 break;
1757 1761 }
1758 1762 }
1759 1763 mutex_exit(&nfs4_server_lst_lock);
1760 1764
1761 1765 /*
1762 1766 * It's possible that the nfs4_server which was using this
1763 1767 * program number has vanished since this thread is async.
1764 1768 * If so, just return. Your work here is finished, my friend.
1765 1769 */
1766 1770 if (!found)
1767 1771 goto out;
1768 1772
1769 1773 ncg = np->zone_globals;
1770 1774 while ((rp = list_head(&np->s_deleg_list)) != NULL) {
1771 1775 vp = RTOV4(rp);
1772 1776 VN_HOLD(vp);
1773 1777 mutex_exit(&np->s_lock);
1774 1778 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN,
1775 1779 ncg);
1776 1780 VN_RELE(vp);
1777 1781
1778 1782 /* retake the s_lock for next trip through the loop */
1779 1783 mutex_enter(&np->s_lock);
1780 1784 }
1781 1785 mutex_exit(&np->s_lock);
1782 1786 out:
1783 1787 NFS4_DEBUG(nfs4_drat_debug,
1784 1788 (CE_NOTE, "delereturn_all_thread: complete\n"));
1785 1789 zthread_exit();
1786 1790 }
1787 1791
1788 1792 void
1789 1793 nfs4_delegreturn_all(nfs4_server_t *sp)
1790 1794 {
1791 1795 rpcprog_t pro, *pp;
1792 1796
1793 1797 mutex_enter(&sp->s_lock);
1794 1798
1795 1799 /* Check to see if the delegation list is empty */
1796 1800
1797 1801 if (list_head(&sp->s_deleg_list) == NULL) {
1798 1802 mutex_exit(&sp->s_lock);
1799 1803 return;
1800 1804 }
1801 1805 /*
1802 1806 * Grab the program number; the async thread will use this
1803 1807 * to find the nfs4_server.
1804 1808 */
1805 1809 pro = sp->s_program;
1806 1810 mutex_exit(&sp->s_lock);
1807 1811 pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP);
1808 1812 *pp = pro;
1809 1813 (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0,
1810 1814 minclsyspri);
1811 1815 }
1812 1816
1813 1817
1814 1818 /*
1815 1819 * Discard any delegations
1816 1820 *
1817 1821 * Iterate over the servers s_deleg_list and
1818 1822 * for matching mount-point rnodes discard
1819 1823 * the delegation.
1820 1824 */
1821 1825 void
1822 1826 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp)
1823 1827 {
1824 1828 rnode4_t *rp, *next;
1825 1829 mntinfo4_t *r_mi;
1826 1830 struct nfs4_callback_globals *ncg;
1827 1831
1828 1832 ASSERT(mutex_owned(&sp->s_lock));
1829 1833 ncg = sp->zone_globals;
1830 1834
1831 1835 for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) {
1832 1836 r_mi = VTOMI4(RTOV4(rp));
1833 1837 next = list_next(&sp->s_deleg_list, rp);
1834 1838
1835 1839 if (r_mi != mi) {
1836 1840 /*
1837 1841 * Skip if this rnode is in not on the
1838 1842 * same mount-point
1839 1843 */
1840 1844 continue;
1841 1845 }
1842 1846
1843 1847 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ);
1844 1848
1845 1849 #ifdef DEBUG
1846 1850 if (nfs4_client_recov_debug) {
1847 1851 zprintf(getzoneid(),
1848 1852 "nfs4_deleg_discard: matched rnode %p "
1849 1853 "-- discarding delegation\n", (void *)rp);
1850 1854 }
1851 1855 #endif
1852 1856 mutex_enter(&rp->r_statev4_lock);
1853 1857 /*
1854 1858 * Free the cred originally held when the delegation
1855 1859 * was granted. Also need to decrement the refcnt
1856 1860 * on this server for each delegation we discard
1857 1861 */
1858 1862 if (rp->r_deleg_cred)
1859 1863 crfree(rp->r_deleg_cred);
1860 1864 rp->r_deleg_cred = NULL;
1861 1865 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1862 1866 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1863 1867 rp->r_deleg_needs_recall = FALSE;
1864 1868 ASSERT(sp->s_refcnt > 1);
1865 1869 sp->s_refcnt--;
1866 1870 list_remove(&sp->s_deleg_list, rp);
1867 1871 mutex_exit(&rp->r_statev4_lock);
1868 1872 nfs4_dec_state_ref_count_nolock(sp, mi);
1869 1873 ncg->nfs4_callback_stats.delegations.value.ui64--;
1870 1874 }
1871 1875 }
1872 1876
1873 1877 /*
1874 1878 * Reopen any open streams that were covered by the given file's
1875 1879 * delegation.
1876 1880 * Returns zero or an errno value. If there was no error, *recovp
1877 1881 * indicates whether recovery was initiated.
1878 1882 */
1879 1883
1880 1884 static int
1881 1885 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg,
1882 1886 int flags)
1883 1887 {
1884 1888 nfs4_open_stream_t *osp;
1885 1889 nfs4_recov_state_t recov_state;
1886 1890 bool_t needrecov = FALSE;
1887 1891 mntinfo4_t *mi;
1888 1892 rnode4_t *rp;
1889 1893 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1890 1894 int claimnull;
1891 1895
1892 1896 mi = VTOMI4(vp);
1893 1897 rp = VTOR4(vp);
1894 1898
1895 1899 recov_state.rs_flags = 0;
1896 1900 recov_state.rs_num_retry_despite_err = 0;
1897 1901
1898 1902 retry:
1899 1903 if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) {
1900 1904 return (e.error);
1901 1905 }
1902 1906
1903 1907 /*
1904 1908 * if we mean to discard the delegation, it must be BAD, so don't
1905 1909 * use it when doing the reopen or it will fail too.
1906 1910 */
1907 1911 claimnull = (flags & NFS4_DR_DISCARD);
1908 1912 /*
1909 1913 * Loop through the open streams for this rnode to find
1910 1914 * all of the ones created using the delegation state ID.
1911 1915 * Each of these needs to be re-opened.
1912 1916 */
1913 1917
1914 1918 while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) {
1915 1919
1916 1920 if (claimnull) {
1917 1921 nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE);
1918 1922 } else {
1919 1923 ncg->nfs4_callback_stats.claim_cur.value.ui64++;
1920 1924
1921 1925 nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE,
1922 1926 FALSE);
1923 1927 if (e.error == 0 && e.stat == NFS4_OK)
1924 1928 ncg->nfs4_callback_stats.
1925 1929 claim_cur_ok.value.ui64++;
1926 1930 }
1927 1931
1928 1932 if (e.error == EAGAIN) {
1929 1933 open_stream_rele(osp, rp);
1930 1934 nfs4_end_op(mi, vp, NULL, &recov_state, TRUE);
1931 1935 goto retry;
1932 1936 }
1933 1937
1934 1938 /*
1935 1939 * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1936 1940 * recovery has already been started inside of nfs4_reopen.
1937 1941 */
1938 1942 if (e.error == EINTR || e.error == ETIMEDOUT ||
1939 1943 NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) {
1940 1944 open_stream_rele(osp, rp);
1941 1945 break;
1942 1946 }
1943 1947
1944 1948 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1945 1949
1946 1950 if (e.error != 0 && !needrecov) {
1947 1951 /*
1948 1952 * Recovery is not possible, but don't give up yet;
1949 1953 * we'd still like to do delegreturn after
1950 1954 * reopening as many streams as possible.
1951 1955 * Continue processing the open streams.
1952 1956 */
1953 1957
1954 1958 ncg->nfs4_callback_stats.recall_failed.value.ui64++;
1955 1959
1956 1960 } else if (needrecov) {
1957 1961 /*
1958 1962 * Start recovery and bail out. The recovery
1959 1963 * thread will take it from here.
1960 1964 */
1961 1965 (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL,
1962 1966 NULL, OP_OPEN, NULL, NULL, NULL);
1963 1967 open_stream_rele(osp, rp);
1964 1968 *recovp = TRUE;
1965 1969 break;
1966 1970 }
1967 1971
1968 1972 open_stream_rele(osp, rp);
1969 1973 }
1970 1974
1971 1975 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1972 1976
1973 1977 return (e.error);
1974 1978 }
1975 1979
1976 1980 /*
1977 1981 * get_next_deleg_stream - returns the next open stream which
1978 1982 * represents a delegation for this rnode. In order to assure
1979 1983 * forward progress, the caller must guarantee that each open
1980 1984 * stream returned is changed so that a future call won't return
1981 1985 * it again.
1982 1986 *
1983 1987 * There are several ways for the open stream to change. If the open
1984 1988 * stream is !os_delegation, then we aren't interested in it. Also, if
1985 1989 * either os_failed_reopen or !os_valid, then don't return the osp.
1986 1990 *
1987 1991 * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
1988 1992 * the osp if it is an os_delegation open stream. Also, if the rnode still
1989 1993 * has r_deleg_return_pending, then return the os_delegation osp. Lastly,
1990 1994 * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
1991 1995 * then return the osp.
1992 1996 *
1993 1997 * We have already taken the 'r_deleg_recall_lock' as WRITER, which
1994 1998 * prevents new OPENs from going OTW (as start_fop takes this
1995 1999 * lock in READ mode); thus, no new open streams can be created
1996 2000 * (which inherently means no new delegation open streams are
1997 2001 * being created).
1998 2002 */
1999 2003
2000 2004 static nfs4_open_stream_t *
2001 2005 get_next_deleg_stream(rnode4_t *rp, int claimnull)
2002 2006 {
2003 2007 nfs4_open_stream_t *osp;
2004 2008
2005 2009 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER));
2006 2010
2007 2011 /*
2008 2012 * Search through the list of open streams looking for
2009 2013 * one that was created while holding the delegation.
2010 2014 */
2011 2015 mutex_enter(&rp->r_os_lock);
2012 2016 for (osp = list_head(&rp->r_open_streams); osp != NULL;
2013 2017 osp = list_next(&rp->r_open_streams, osp)) {
2014 2018 mutex_enter(&osp->os_sync_lock);
2015 2019 if (!osp->os_delegation || osp->os_failed_reopen ||
2016 2020 !osp->os_valid) {
2017 2021 mutex_exit(&osp->os_sync_lock);
2018 2022 continue;
2019 2023 }
2020 2024 if (!claimnull || rp->r_deleg_return_pending ||
2021 2025 !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) {
2022 2026 osp->os_ref_count++;
2023 2027 mutex_exit(&osp->os_sync_lock);
2024 2028 mutex_exit(&rp->r_os_lock);
2025 2029 return (osp);
2026 2030 }
2027 2031 mutex_exit(&osp->os_sync_lock);
2028 2032 }
2029 2033 mutex_exit(&rp->r_os_lock);
2030 2034
2031 2035 return (NULL);
2032 2036 }
2033 2037
2034 2038 static void
2035 2039 nfs4delegreturn_thread(struct cb_recall_pass *args)
2036 2040 {
2037 2041 rnode4_t *rp;
2038 2042 vnode_t *vp;
2039 2043 cred_t *cr;
2040 2044 int dtype, error, flags;
2041 2045 bool_t rdirty, rip;
2042 2046 kmutex_t cpr_lock;
2043 2047 callb_cpr_t cpr_info;
2044 2048 struct nfs4_callback_globals *ncg;
2045 2049
2046 2050 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2047 2051 ASSERT(ncg != NULL);
2048 2052
2049 2053 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
2050 2054
2051 2055 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
2052 2056 "nfsv4delegRtn");
2053 2057
2054 2058 rp = args->rp;
2055 2059 vp = RTOV4(rp);
2056 2060
2057 2061 mutex_enter(&rp->r_statev4_lock);
2058 2062 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2059 2063 mutex_exit(&rp->r_statev4_lock);
2060 2064 goto out;
2061 2065 }
2062 2066 mutex_exit(&rp->r_statev4_lock);
2063 2067
2064 2068 /*
2065 2069 * Take the read-write lock in read mode to prevent other
2066 2070 * threads from modifying the data during the recall. This
2067 2071 * doesn't affect mmappers.
2068 2072 */
2069 2073 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
2070 2074
2071 2075 /* Proceed with delegreturn */
2072 2076
2073 2077 mutex_enter(&rp->r_statev4_lock);
2074 2078 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2075 2079 mutex_exit(&rp->r_statev4_lock);
2076 2080 nfs_rw_exit(&rp->r_rwlock);
2077 2081 goto out;
2078 2082 }
2079 2083 dtype = rp->r_deleg_type;
2080 2084 cr = rp->r_deleg_cred;
2081 2085 ASSERT(cr != NULL);
2082 2086 crhold(cr);
2083 2087 mutex_exit(&rp->r_statev4_lock);
2084 2088
2085 2089 flags = args->flags;
2086 2090
2087 2091 /*
2088 2092 * If the file is being truncated at the server, then throw
2089 2093 * away all of the pages, it doesn't matter what flavor of
2090 2094 * delegation we have.
2091 2095 */
2092 2096
2093 2097 if (args->truncate) {
2094 2098 ncg->nfs4_callback_stats.recall_trunc.value.ui64++;
2095 2099 nfs4_invalidate_pages(vp, 0, cr);
2096 2100 } else if (dtype == OPEN_DELEGATE_WRITE) {
2097 2101
2098 2102 mutex_enter(&rp->r_statelock);
2099 2103 rdirty = rp->r_flags & R4DIRTY;
2100 2104 mutex_exit(&rp->r_statelock);
2101 2105
2102 2106 if (rdirty) {
2103 2107 error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
2104 2108
2105 2109 if (error)
2106 2110 CB_WARN1("nfs4delegreturn_thread:"
2107 2111 " VOP_PUTPAGE: %d\n", error);
2108 2112 }
2109 2113 /* turn off NFS4_DR_PUSH because we just did that above. */
2110 2114 flags &= ~NFS4_DR_PUSH;
2111 2115 }
2112 2116
2113 2117 mutex_enter(&rp->r_statelock);
2114 2118 rip = rp->r_flags & R4RECOVERRP;
2115 2119 mutex_exit(&rp->r_statelock);
2116 2120
2117 2121 /* If a failed recovery is indicated, discard the pages */
2118 2122
2119 2123 if (rip) {
2120 2124
2121 2125 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL);
2122 2126
2123 2127 if (error)
2124 2128 CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n",
2125 2129 error);
2126 2130 }
2127 2131
2128 2132 /*
2129 2133 * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2130 2134 * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2131 2135 */
2132 2136 flags &= ~NFS4_DR_DID_OP;
2133 2137
2134 2138 (void) nfs4delegreturn_impl(rp, flags, ncg);
2135 2139
2136 2140 nfs_rw_exit(&rp->r_rwlock);
2137 2141 crfree(cr);
2138 2142 out:
2139 2143 kmem_free(args, sizeof (struct cb_recall_pass));
2140 2144 VN_RELE(vp);
2141 2145 mutex_enter(&cpr_lock);
2142 2146 CALLB_CPR_EXIT(&cpr_info);
2143 2147 mutex_destroy(&cpr_lock);
2144 2148 zthread_exit();
2145 2149 }
2146 2150
2147 2151 /*
2148 2152 * This function has one assumption that the caller of this function is
2149 2153 * either doing recovery (therefore cannot call nfs4_start_op) or has
2150 2154 * already called nfs4_start_op().
2151 2155 */
2152 2156 void
2153 2157 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res,
2154 2158 nfs4_ga_res_t *garp, cred_t *cr)
2155 2159 {
2156 2160 open_read_delegation4 *orp;
2157 2161 open_write_delegation4 *owp;
2158 2162 nfs4_server_t *np;
2159 2163 bool_t already = FALSE;
2160 2164 bool_t recall = FALSE;
2161 2165 bool_t valid_garp = TRUE;
2162 2166 bool_t delegation_granted = FALSE;
2163 2167 bool_t dr_needed = FALSE;
2164 2168 bool_t recov;
2165 2169 int dr_flags = 0;
2166 2170 long mapcnt;
2167 2171 uint_t rflag;
2168 2172 mntinfo4_t *mi;
2169 2173 struct nfs4_callback_globals *ncg;
2170 2174 open_delegation_type4 odt;
2171 2175
2172 2176 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2173 2177 ASSERT(ncg != NULL);
2174 2178
2175 2179 mi = VTOMI4(RTOV4(rp));
2176 2180
2177 2181 /*
2178 2182 * Accept a delegation granted to the client via an OPEN.
2179 2183 * Set the delegation fields in the rnode and insert the
2180 2184 * rnode onto the list anchored in the nfs4_server_t. The
2181 2185 * proper locking order requires the nfs4_server_t first,
2182 2186 * even though it may not be needed in all cases.
2183 2187 *
2184 2188 * NB: find_nfs4_server returns with s_lock held.
2185 2189 */
2186 2190
2187 2191 if ((np = find_nfs4_server(mi)) == NULL)
2188 2192 return;
2189 2193
2190 2194 /* grab the statelock too, for examining r_mapcnt */
2191 2195 mutex_enter(&rp->r_statelock);
2192 2196 mutex_enter(&rp->r_statev4_lock);
2193 2197
2194 2198 if (rp->r_deleg_type == OPEN_DELEGATE_READ ||
2195 2199 rp->r_deleg_type == OPEN_DELEGATE_WRITE)
2196 2200 already = TRUE;
2197 2201
2198 2202 odt = res->delegation.delegation_type;
2199 2203
2200 2204 if (odt == OPEN_DELEGATE_READ) {
2201 2205
2202 2206 rp->r_deleg_type = res->delegation.delegation_type;
2203 2207 orp = &res->delegation.open_delegation4_u.read;
2204 2208 rp->r_deleg_stateid = orp->stateid;
2205 2209 rp->r_deleg_perms = orp->permissions;
2206 2210 if (claim == CLAIM_PREVIOUS)
2207 2211 if ((recall = orp->recall) != 0)
2208 2212 dr_needed = TRUE;
2209 2213
2210 2214 delegation_granted = TRUE;
2211 2215
2212 2216 ncg->nfs4_callback_stats.delegations.value.ui64++;
2213 2217 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++;
2214 2218
2215 2219 } else if (odt == OPEN_DELEGATE_WRITE) {
2216 2220
2217 2221 rp->r_deleg_type = res->delegation.delegation_type;
2218 2222 owp = &res->delegation.open_delegation4_u.write;
2219 2223 rp->r_deleg_stateid = owp->stateid;
2220 2224 rp->r_deleg_perms = owp->permissions;
2221 2225 rp->r_deleg_limit = owp->space_limit;
2222 2226 if (claim == CLAIM_PREVIOUS)
2223 2227 if ((recall = owp->recall) != 0)
2224 2228 dr_needed = TRUE;
2225 2229
2226 2230 delegation_granted = TRUE;
2227 2231
2228 2232 if (garp == NULL || !garp->n4g_change_valid) {
2229 2233 valid_garp = FALSE;
2230 2234 rp->r_deleg_change = 0;
2231 2235 rp->r_deleg_change_grant = 0;
2232 2236 } else {
2233 2237 rp->r_deleg_change = garp->n4g_change;
2234 2238 rp->r_deleg_change_grant = garp->n4g_change;
2235 2239 }
2236 2240 mapcnt = rp->r_mapcnt;
2237 2241 rflag = rp->r_flags;
2238 2242
2239 2243 /*
2240 2244 * Update the delegation change attribute if
2241 2245 * there are mappers for the file is dirty. This
2242 2246 * might be the case during recovery after server
2243 2247 * reboot.
2244 2248 */
2245 2249 if (mapcnt > 0 || rflag & R4DIRTY)
2246 2250 rp->r_deleg_change++;
2247 2251
2248 2252 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2249 2253 "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2250 2254 (int)(rp->r_deleg_change >> 32)));
2251 2255 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2252 2256 "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2253 2257 (int)(rp->r_deleg_change_grant >> 32)));
2254 2258
2255 2259
2256 2260 ncg->nfs4_callback_stats.delegations.value.ui64++;
2257 2261 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++;
2258 2262 } else if (already) {
2259 2263 /*
2260 2264 * No delegation granted. If the rnode currently has
2261 2265 * has one, then consider it tainted and return it.
2262 2266 */
2263 2267 dr_needed = TRUE;
2264 2268 }
2265 2269
2266 2270 if (delegation_granted) {
2267 2271 /* Add the rnode to the list. */
2268 2272 if (!already) {
2269 2273 crhold(cr);
2270 2274 rp->r_deleg_cred = cr;
2271 2275
2272 2276 ASSERT(mutex_owned(&np->s_lock));
2273 2277 list_insert_head(&np->s_deleg_list, rp);
2274 2278 /* added list node gets a reference */
2275 2279 np->s_refcnt++;
2276 2280 nfs4_inc_state_ref_count_nolock(np, mi);
2277 2281 }
2278 2282 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
2279 2283 }
2280 2284
2281 2285 /*
2282 2286 * We've now safely accepted the delegation, if any. Drop the
2283 2287 * locks and figure out what post-processing is needed. We'd
2284 2288 * like to retain r_statev4_lock, but nfs4_server_rele takes
2285 2289 * s_lock which would be a lock ordering violation.
2286 2290 */
2287 2291 mutex_exit(&rp->r_statev4_lock);
2288 2292 mutex_exit(&rp->r_statelock);
2289 2293 mutex_exit(&np->s_lock);
2290 2294 nfs4_server_rele(np);
2291 2295
2292 2296 /*
2293 2297 * Check to see if we are in recovery. Remember that
2294 2298 * this function is protected by start_op, so a recovery
2295 2299 * cannot begin until we are out of here.
2296 2300 */
2297 2301 mutex_enter(&mi->mi_lock);
2298 2302 recov = mi->mi_recovflags & MI4_RECOV_ACTIV;
2299 2303 mutex_exit(&mi->mi_lock);
2300 2304
2301 2305 mutex_enter(&rp->r_statev4_lock);
2302 2306
2303 2307 if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp)
2304 2308 dr_needed = TRUE;
2305 2309
2306 2310 if (dr_needed && rp->r_deleg_return_pending == FALSE) {
2307 2311 if (recov) {
2308 2312 /*
2309 2313 * We cannot call delegreturn from inside
2310 2314 * of recovery or VOP_PUTPAGE will hang
2311 2315 * due to nfs4_start_fop call in
2312 2316 * nfs4write. Use dlistadd to add the
2313 2317 * rnode to the list of rnodes needing
2314 2318 * cleaning. We do not need to do reopen
2315 2319 * here because recov_openfiles will do it.
2316 2320 * In the non-recall case, just discard the
2317 2321 * delegation as it is no longer valid.
2318 2322 */
2319 2323 if (recall)
2320 2324 dr_flags = NFS4_DR_PUSH;
2321 2325 else
2322 2326 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD;
2323 2327
2324 2328 nfs4_dlistadd(rp, ncg, dr_flags);
2325 2329 dr_flags = 0;
2326 2330 } else {
2327 2331 /*
2328 2332 * Push the modified data back to the server,
2329 2333 * reopen any delegation open streams, and return
2330 2334 * the delegation. Drop the statev4_lock first!
2331 2335 */
2332 2336 dr_flags = NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN;
2333 2337 }
2334 2338 }
2335 2339 mutex_exit(&rp->r_statev4_lock);
2336 2340 if (dr_flags)
2337 2341 (void) nfs4delegreturn_impl(rp, dr_flags, ncg);
2338 2342 }
2339 2343
2340 2344 /*
2341 2345 * nfs4delegabandon - Abandon the delegation on an rnode4. This code
2342 2346 * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2343 2347 * or BADSEQID and the recovery code is unable to recover. Push any
2344 2348 * dirty data back to the server and return the delegation (if any).
2345 2349 */
2346 2350
2347 2351 void
2348 2352 nfs4delegabandon(rnode4_t *rp)
2349 2353 {
2350 2354 vnode_t *vp;
2351 2355 struct cb_recall_pass *pp;
2352 2356 open_delegation_type4 dt;
2353 2357
2354 2358 mutex_enter(&rp->r_statev4_lock);
2355 2359 dt = rp->r_deleg_type;
2356 2360 mutex_exit(&rp->r_statev4_lock);
2357 2361
2358 2362 if (dt == OPEN_DELEGATE_NONE)
2359 2363 return;
2360 2364
2361 2365 vp = RTOV4(rp);
2362 2366 VN_HOLD(vp);
2363 2367
2364 2368 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
2365 2369 pp->rp = rp;
2366 2370 /*
2367 2371 * Recovery on the file has failed and we want to return
2368 2372 * the delegation. We don't want to reopen files and
2369 2373 * nfs4delegreturn_thread() figures out what to do about
2370 2374 * the data. The only thing to do is attempt to return
2371 2375 * the delegation.
2372 2376 */
2373 2377 pp->flags = 0;
2374 2378 pp->truncate = FALSE;
2375 2379
2376 2380 /*
2377 2381 * Fire up a thread to do the delegreturn; this is
2378 2382 * necessary because we could be inside a GETPAGE or
2379 2383 * PUTPAGE and we cannot do another one.
2380 2384 */
2381 2385
2382 2386 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
2383 2387 minclsyspri);
2384 2388 }
2385 2389
2386 2390 static int
2387 2391 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp,
2388 2392 int flg)
2389 2393 {
2390 2394 rnode4_t *rp;
2391 2395 int error = 0;
2392 2396
2393 2397 #ifdef lint
2394 2398 op = op;
2395 2399 #endif
2396 2400
2397 2401 if (vp && vp->v_type == VREG) {
2398 2402 rp = VTOR4(vp);
2399 2403
2400 2404 /*
2401 2405 * Take r_deleg_recall_lock in read mode to synchronize
2402 2406 * with delegreturn.
2403 2407 */
2404 2408 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock,
2405 2409 RW_READER, INTR4(vp));
2406 2410
2407 2411 if (error == 0)
2408 2412 rsp->rs_flags |= flg;
2409 2413
2410 2414 }
2411 2415 return (error);
2412 2416 }
2413 2417
2414 2418 void
2415 2419 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp)
2416 2420 {
2417 2421 NFS4_DEBUG(nfs4_recall_debug,
2418 2422 (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2419 2423 (void *)vp1, (void *)vp2));
2420 2424
2421 2425 if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2)
2422 2426 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock);
2423 2427 if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1)
2424 2428 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2425 2429 }
2426 2430
2427 2431 int
2428 2432 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op,
2429 2433 nfs4_recov_state_t *rsp)
2430 2434 {
2431 2435 int error;
2432 2436
2433 2437 NFS4_DEBUG(nfs4_recall_debug,
2434 2438 (CE_NOTE, "wait_for_recall: 0x%p, 0x%p\n",
2435 2439 (void *)vp1, (void *) vp2));
2436 2440
2437 2441 rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2);
2438 2442
2439 2443 if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0)
2440 2444 return (error);
2441 2445
2442 2446 if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2))
2443 2447 != 0) {
2444 2448 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) {
2445 2449 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2446 2450 rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1;
2447 2451 }
2448 2452
2449 2453 return (error);
2450 2454 }
2451 2455
2452 2456 return (0);
2453 2457 }
2454 2458
2455 2459 /*
2456 2460 * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2457 2461 * DELEGRETURN'd at the end of recovery.
2458 2462 */
2459 2463
2460 2464 static void
2461 2465 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags)
2462 2466 {
2463 2467 struct nfs4_dnode *dp;
2464 2468
2465 2469 ASSERT(mutex_owned(&rp->r_statev4_lock));
2466 2470 /*
2467 2471 * Mark the delegation as having a return pending.
2468 2472 * This will prevent the use of the delegation stateID
2469 2473 * by read, write, setattr and open.
2470 2474 */
2471 2475 rp->r_deleg_return_pending = TRUE;
2472 2476 dp = kmem_alloc(sizeof (*dp), KM_SLEEP);
2473 2477 VN_HOLD(RTOV4(rp));
2474 2478 dp->rnodep = rp;
2475 2479 dp->flags = flags;
2476 2480 mutex_enter(&ncg->nfs4_dlist_lock);
2477 2481 list_insert_head(&ncg->nfs4_dlist, dp);
2478 2482 #ifdef DEBUG
2479 2483 ncg->nfs4_dlistadd_c++;
2480 2484 #endif
2481 2485 mutex_exit(&ncg->nfs4_dlist_lock);
2482 2486 }
2483 2487
2484 2488 /*
2485 2489 * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2486 2490 * of files awaiting cleaning. If the override_flags are non-zero
2487 2491 * then use them rather than the flags that were set when the rnode
2488 2492 * was added to the dlist.
2489 2493 */
2490 2494 static void
2491 2495 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags)
2492 2496 {
2493 2497 rnode4_t *rp;
2494 2498 struct nfs4_dnode *dp;
2495 2499 int flags;
2496 2500
2497 2501 ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD);
2498 2502
2499 2503 mutex_enter(&ncg->nfs4_dlist_lock);
2500 2504 while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) {
2501 2505 #ifdef DEBUG
2502 2506 ncg->nfs4_dlistclean_c++;
2503 2507 #endif
2504 2508 list_remove(&ncg->nfs4_dlist, dp);
2505 2509 mutex_exit(&ncg->nfs4_dlist_lock);
2506 2510 rp = dp->rnodep;
2507 2511 flags = (override_flags != 0) ? override_flags : dp->flags;
2508 2512 kmem_free(dp, sizeof (*dp));
2509 2513 (void) nfs4delegreturn_impl(rp, flags, ncg);
2510 2514 VN_RELE(RTOV4(rp));
2511 2515 mutex_enter(&ncg->nfs4_dlist_lock);
2512 2516 }
2513 2517 mutex_exit(&ncg->nfs4_dlist_lock);
2514 2518 }
2515 2519
2516 2520 void
2517 2521 nfs4_dlistclean(void)
2518 2522 {
2519 2523 struct nfs4_callback_globals *ncg;
2520 2524
2521 2525 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2522 2526 ASSERT(ncg != NULL);
2523 2527
2524 2528 nfs4_dlistclean_impl(ncg, 0);
2525 2529 }
|
↓ open down ↓ |
1946 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX