1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /* All Rights Reserved */
  28 
  29 #include <sys/param.h>
  30 #include <sys/types.h>
  31 #include <sys/systm.h>
  32 #include <sys/cred.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vnode.h>
  35 #include <sys/pathname.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/kmem.h>
  38 #include <sys/kstat.h>
  39 #include <sys/mkdev.h>
  40 #include <sys/mount.h>
  41 #include <sys/statvfs.h>
  42 #include <sys/errno.h>
  43 #include <sys/debug.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/utsname.h>
  46 #include <sys/bootconf.h>
  47 #include <sys/modctl.h>
  48 #include <sys/acl.h>
  49 #include <sys/flock.h>
  50 #include <sys/kstr.h>
  51 #include <sys/stropts.h>
  52 #include <sys/strsubr.h>
  53 #include <sys/atomic.h>
  54 #include <sys/disp.h>
  55 #include <sys/policy.h>
  56 #include <sys/list.h>
  57 #include <sys/zone.h>
  58 
  59 #include <rpc/types.h>
  60 #include <rpc/auth.h>
  61 #include <rpc/rpcsec_gss.h>
  62 #include <rpc/clnt.h>
  63 #include <rpc/xdr.h>
  64 
  65 #include <nfs/nfs.h>
  66 #include <nfs/nfs_clnt.h>
  67 #include <nfs/mount.h>
  68 #include <nfs/nfs_acl.h>
  69 
  70 #include <fs/fs_subr.h>
  71 
  72 #include <nfs/nfs4.h>
  73 #include <nfs/rnode4.h>
  74 #include <nfs/nfs4_clnt.h>
  75 #include <nfs/nfssys.h>
  76 
  77 #ifdef  DEBUG
  78 /*
  79  * These are "special" state IDs and file handles that
  80  * match any delegation state ID or file handled.  This
  81  * is for testing purposes only.
  82  */
  83 
  84 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 };
  85 char nfs4_deleg_fh[] = "\0377\0376\0375\0374";
  86 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh };
  87 nfsstat4 cb4_getattr_fail = NFS4_OK;
  88 nfsstat4 cb4_recall_fail = NFS4_OK;
  89 
  90 int nfs4_callback_debug;
  91 int nfs4_recall_debug;
  92 int nfs4_drat_debug;
  93 
  94 #endif
  95 
  96 #define CB_NOTE(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
  97 #define CB_WARN(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
  98 #define CB_WARN1(x, y)  NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
  99 
 100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE;
 101 
 102 static zone_key_t nfs4_callback_zone_key;
 103 
 104 /*
 105  * NFS4_MAPSIZE is the number of bytes we are willing to consume
 106  * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
 107  * style delegation.
 108  */
 109 
 110 #define NFS4_MAPSIZE    8192
 111 #define NFS4_MAPWORDS   NFS4_MAPSIZE/sizeof (uint_t)
 112 #define NbPW            (NBBY*sizeof (uint_t))
 113 
 114 static int nfs4_num_prognums = 1024;
 115 static SVC_CALLOUT_TABLE nfs4_cb_sct;
 116 
 117 struct nfs4_dnode {
 118         list_node_t     linkage;
 119         rnode4_t        *rnodep;
 120         int             flags;          /* Flags for nfs4delegreturn_impl() */
 121 };
 122 
 123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = {
 124         { "delegations",        KSTAT_DATA_UINT64 },
 125         { "cb_getattr",         KSTAT_DATA_UINT64 },
 126         { "cb_recall",          KSTAT_DATA_UINT64 },
 127         { "cb_null",            KSTAT_DATA_UINT64 },
 128         { "cb_dispatch",        KSTAT_DATA_UINT64 },
 129         { "delegaccept_r",      KSTAT_DATA_UINT64 },
 130         { "delegaccept_rw",     KSTAT_DATA_UINT64 },
 131         { "delegreturn",        KSTAT_DATA_UINT64 },
 132         { "callbacks",          KSTAT_DATA_UINT64 },
 133         { "claim_cur",          KSTAT_DATA_UINT64 },
 134         { "claim_cur_ok",       KSTAT_DATA_UINT64 },
 135         { "recall_trunc",       KSTAT_DATA_UINT64 },
 136         { "recall_failed",      KSTAT_DATA_UINT64 },
 137         { "return_limit_write", KSTAT_DATA_UINT64 },
 138         { "return_limit_addmap", KSTAT_DATA_UINT64 },
 139         { "deleg_recover",      KSTAT_DATA_UINT64 },
 140         { "cb_illegal",         KSTAT_DATA_UINT64 }
 141 };
 142 
 143 struct nfs4_cb_port {
 144         list_node_t             linkage; /* linkage into per-zone port list */
 145         char                    netid[KNC_STRSIZE];
 146         char                    uaddr[KNC_STRSIZE];
 147         char                    protofmly[KNC_STRSIZE];
 148         char                    proto[KNC_STRSIZE];
 149 };
 150 
 151 static int cb_getattr_bytes;
 152 
 153 struct cb_recall_pass {
 154         rnode4_t        *rp;
 155         int             flags;          /* Flags for nfs4delegreturn_impl() */
 156         bool_t          truncate;
 157 };
 158 
 159 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int);
 160 static void nfs4delegreturn_thread(struct cb_recall_pass *);
 161 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *,
 162     int);
 163 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int);
 164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int);
 165 static int nfs4delegreturn_impl(rnode4_t *, int,
 166     struct nfs4_callback_globals *);
 167 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *,
 168     struct nfs4_callback_globals *);
 169 
 170 static void
 171 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 172     struct compound_state *cs, struct nfs4_callback_globals *ncg)
 173 {
 174         CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr;
 175         CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr;
 176         rnode4_t *rp;
 177         vnode_t *vp;
 178         bool_t found = FALSE;
 179         struct nfs4_server *sp;
 180         struct fattr4 *fap;
 181         rpc_inline_t *fdata;
 182         long mapcnt;
 183         fattr4_change change;
 184         fattr4_size size;
 185         uint_t rflag;
 186 
 187         ncg->nfs4_callback_stats.cb_getattr.value.ui64++;
 188 
 189 #ifdef DEBUG
 190         /*
 191          * error injection hook: set cb_getattr_fail global to
 192          * NFS4 pcol error to be returned
 193          */
 194         if (cb4_getattr_fail != NFS4_OK) {
 195                 *cs->statusp = resp->status = cb4_getattr_fail;
 196                 return;
 197         }
 198 #endif
 199 
 200         resp->obj_attributes.attrmask = 0;
 201 
 202         mutex_enter(&ncg->nfs4_cb_lock);
 203         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 204         mutex_exit(&ncg->nfs4_cb_lock);
 205 
 206         if (nfs4_server_vlock(sp, 0) == FALSE) {
 207 
 208                 CB_WARN("cb_getattr: cannot find server\n");
 209 
 210                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 211                 return;
 212         }
 213 
 214         /*
 215          * In cb_compound, callback_ident was validated against rq_prog,
 216          * but we couldn't verify that it was set to the value we provided
 217          * at setclientid time (because we didn't have server struct yet).
 218          * Now we have the server struct, but don't have callback_ident
 219          * handy.  So, validate server struct program number against req
 220          * RPC's prog number.  At this point, we know the RPC prog num
 221          * is valid (else we wouldn't be here); however, we don't know
 222          * that it was the prog number we supplied to this server at
 223          * setclientid time.  If the prog numbers aren't equivalent, then
 224          * log the problem and fail the request because either cbserv
 225          * and/or cbclient are confused.  This will probably never happen.
 226          */
 227         if (sp->s_program != req->rq_prog) {
 228 #ifdef DEBUG
 229                 zcmn_err(getzoneid(), CE_WARN,
 230                     "cb_getattr: wrong server program number srv=%d req=%d\n",
 231                     sp->s_program, req->rq_prog);
 232 #else
 233                 zcmn_err(getzoneid(), CE_WARN,
 234                     "cb_getattr: wrong server program number\n");
 235 #endif
 236                 mutex_exit(&sp->s_lock);
 237                 nfs4_server_rele(sp);
 238                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 239                 return;
 240         }
 241 
 242         /*
 243          * Search the delegation list for a matching file handle;
 244          * mutex on sp prevents the list from changing.
 245          */
 246 
 247         rp = list_head(&sp->s_deleg_list);
 248         for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 249                 nfs4_fhandle_t fhandle;
 250 
 251                 sfh4_copyval(rp->r_fh, &fhandle);
 252 
 253                 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 254                     bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 255                     fhandle.fh_len) == 0)) {
 256 
 257                         found = TRUE;
 258                         break;
 259                 }
 260 #ifdef  DEBUG
 261                 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len &&
 262                     bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val,
 263                     args->fh.nfs_fh4_len) == 0) {
 264 
 265                         found = TRUE;
 266                         break;
 267                 }
 268 #endif
 269         }
 270 
 271         /*
 272          * VN_HOLD the vnode before releasing s_lock to guarantee
 273          * we have a valid vnode reference.
 274          */
 275         if (found == TRUE) {
 276                 vp = RTOV4(rp);
 277                 VN_HOLD(vp);
 278         }
 279 
 280         mutex_exit(&sp->s_lock);
 281         nfs4_server_rele(sp);
 282 
 283         if (found == FALSE) {
 284 
 285                 CB_WARN("cb_getattr: bad fhandle\n");
 286 
 287                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 288                 return;
 289         }
 290 
 291         /*
 292          * Figure out which attributes the server wants.  We only
 293          * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
 294          */
 295         fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP);
 296 
 297         /*
 298          * Don't actually need to create XDR to encode these
 299          * simple data structures.
 300          * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
 301          */
 302         fap = &resp->obj_attributes;
 303 
 304         fap->attrmask = 0;
 305         /* attrlist4_len starts at 0 and increases as attrs are processed */
 306         fap->attrlist4 = (char *)fdata;
 307         fap->attrlist4_len = 0;
 308 
 309         /* don't supply attrs if request was zero */
 310         if (args->attr_request != 0) {
 311                 if (args->attr_request & FATTR4_CHANGE_MASK) {
 312                         /*
 313                          * If the file is mmapped, then increment the change
 314                          * attribute and return it.  This will guarantee that
 315                          * the server will perceive that the file has changed
 316                          * if there is any chance that the client application
 317                          * has changed it.  Otherwise, just return the change
 318                          * attribute as it has been updated by nfs4write_deleg.
 319                          */
 320 
 321                         mutex_enter(&rp->r_statelock);
 322                         mapcnt = rp->r_mapcnt;
 323                         rflag = rp->r_flags;
 324                         mutex_exit(&rp->r_statelock);
 325 
 326                         mutex_enter(&rp->r_statev4_lock);
 327                         /*
 328                          * If object mapped, then always return new change.
 329                          * Otherwise, return change if object has dirty
 330                          * pages.  If object doesn't have any dirty pages,
 331                          * then all changes have been pushed to server, so
 332                          * reset change to grant change.
 333                          */
 334                         if (mapcnt)
 335                                 rp->r_deleg_change++;
 336                         else if (! (rflag & R4DIRTY))
 337                                 rp->r_deleg_change = rp->r_deleg_change_grant;
 338                         change = rp->r_deleg_change;
 339                         mutex_exit(&rp->r_statev4_lock);
 340 
 341                         /*
 342                          * Use inline XDR code directly, we know that we
 343                          * going to a memory buffer and it has enough
 344                          * space so it cannot fail.
 345                          */
 346                         IXDR_PUT_U_HYPER(fdata, change);
 347                         fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 348                         fap->attrmask |= FATTR4_CHANGE_MASK;
 349                 }
 350 
 351                 if (args->attr_request & FATTR4_SIZE_MASK) {
 352                         /*
 353                          * Use an atomic add of 0 to fetch a consistent view
 354                          * of r_size; this avoids having to take rw_lock
 355                          * which could cause a deadlock.
 356                          */
 357                         size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0);
 358 
 359                         /*
 360                          * Use inline XDR code directly, we know that we
 361                          * going to a memory buffer and it has enough
 362                          * space so it cannot fail.
 363                          */
 364                         IXDR_PUT_U_HYPER(fdata, size);
 365                         fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 366                         fap->attrmask |= FATTR4_SIZE_MASK;
 367                 }
 368         }
 369 
 370         VN_RELE(vp);
 371 
 372         *cs->statusp = resp->status = NFS4_OK;
 373 }
 374 
 375 static void
 376 cb_getattr_free(nfs_cb_resop4 *resop)
 377 {
 378         if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4)
 379                 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr.
 380                     obj_attributes.attrlist4, cb_getattr_bytes);
 381 }
 382 
 383 static void
 384 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 385     struct compound_state *cs, struct nfs4_callback_globals *ncg)
 386 {
 387         CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall;
 388         CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall;
 389         rnode4_t *rp;
 390         vnode_t *vp;
 391         struct nfs4_server *sp;
 392         bool_t found = FALSE;
 393 
 394         ncg->nfs4_callback_stats.cb_recall.value.ui64++;
 395 
 396         ASSERT(req->rq_prog >= NFS4_CALLBACK);
 397         ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 398 
 399 #ifdef DEBUG
 400         /*
 401          * error injection hook: set cb_recall_fail global to
 402          * NFS4 pcol error to be returned
 403          */
 404         if (cb4_recall_fail != NFS4_OK) {
 405                 *cs->statusp = resp->status = cb4_recall_fail;
 406                 return;
 407         }
 408 #endif
 409 
 410         mutex_enter(&ncg->nfs4_cb_lock);
 411         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 412         mutex_exit(&ncg->nfs4_cb_lock);
 413 
 414         if (nfs4_server_vlock(sp, 0) == FALSE) {
 415 
 416                 CB_WARN("cb_recall: cannot find server\n");
 417 
 418                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 419                 return;
 420         }
 421 
 422         /*
 423          * Search the delegation list for a matching file handle
 424          * AND stateid; mutex on sp prevents the list from changing.
 425          */
 426 
 427         rp = list_head(&sp->s_deleg_list);
 428         for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 429                 mutex_enter(&rp->r_statev4_lock);
 430 
 431                 /* check both state id and file handle! */
 432 
 433                 if ((bcmp(&rp->r_deleg_stateid, &args->stateid,
 434                     sizeof (stateid4)) == 0)) {
 435                         nfs4_fhandle_t fhandle;
 436 
 437                         sfh4_copyval(rp->r_fh, &fhandle);
 438                         if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 439                             bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 440                             fhandle.fh_len) == 0)) {
 441 
 442                                 found = TRUE;
 443                                 break;
 444                         } else {
 445 #ifdef  DEBUG
 446                                 CB_WARN("cb_recall: stateid OK, bad fh");
 447 #endif
 448                         }
 449                 }
 450 #ifdef  DEBUG
 451                 if (bcmp(&args->stateid, &nfs4_deleg_any,
 452                     sizeof (stateid4)) == 0) {
 453 
 454                         found = TRUE;
 455                         break;
 456                 }
 457 #endif
 458                 mutex_exit(&rp->r_statev4_lock);
 459         }
 460 
 461         /*
 462          * VN_HOLD the vnode before releasing s_lock to guarantee
 463          * we have a valid vnode reference.  The async thread will
 464          * release the hold when it's done.
 465          */
 466         if (found == TRUE) {
 467                 mutex_exit(&rp->r_statev4_lock);
 468                 vp = RTOV4(rp);
 469                 VN_HOLD(vp);
 470         }
 471         mutex_exit(&sp->s_lock);
 472         nfs4_server_rele(sp);
 473 
 474         if (found == FALSE) {
 475 
 476                 CB_WARN("cb_recall: bad stateid\n");
 477 
 478                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
 479                 return;
 480         }
 481 
 482         /* Fire up a thread to do the delegreturn */
 483         nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN,
 484             args->truncate);
 485 
 486         *cs->statusp = resp->status = 0;
 487 }
 488 
 489 /* ARGSUSED */
 490 static void
 491 cb_recall_free(nfs_cb_resop4 *resop)
 492 {
 493         /* nothing to do here, cb_recall doesn't kmem_alloc */
 494 }
 495 
 496 /*
 497  * This function handles the CB_NULL proc call from an NFSv4 Server.
 498  *
 499  * We take note that the server has sent a CB_NULL for later processing
 500  * in the recovery logic. It is noted so we may pause slightly after the
 501  * setclientid and before reopening files. The pause is to allow the
 502  * NFSv4 Server time to receive the CB_NULL reply and adjust any of
 503  * its internal structures such that it has the opportunity to grant
 504  * delegations to reopened files.
 505  *
 506  */
 507 
 508 /* ARGSUSED */
 509 static void
 510 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 511     struct nfs4_callback_globals *ncg)
 512 {
 513         struct nfs4_server *sp;
 514 
 515         ncg->nfs4_callback_stats.cb_null.value.ui64++;
 516 
 517         ASSERT(req->rq_prog >= NFS4_CALLBACK);
 518         ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 519 
 520         mutex_enter(&ncg->nfs4_cb_lock);
 521         sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 522         mutex_exit(&ncg->nfs4_cb_lock);
 523 
 524         if (nfs4_server_vlock(sp, 0) != FALSE) {
 525                 sp->s_flags |= N4S_CB_PINGED;
 526                 cv_broadcast(&sp->wait_cb_null);
 527                 mutex_exit(&sp->s_lock);
 528                 nfs4_server_rele(sp);
 529         }
 530 }
 531 
 532 /*
 533  * cb_illegal   args: void
 534  *              res : status (NFS4ERR_OP_CB_ILLEGAL)
 535  */
 536 /* ARGSUSED */
 537 static void
 538 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 539     struct compound_state *cs, struct nfs4_callback_globals *ncg)
 540 {
 541         CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal;
 542 
 543         ncg->nfs4_callback_stats.cb_illegal.value.ui64++;
 544         resop->resop = OP_CB_ILLEGAL;
 545         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
 546 }
 547 
 548 static void
 549 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 550     struct nfs4_callback_globals *ncg)
 551 {
 552         uint_t i;
 553         struct compound_state cs;
 554         nfs_cb_argop4 *argop;
 555         nfs_cb_resop4 *resop, *new_res;
 556         uint_t op;
 557 
 558         bzero(&cs, sizeof (cs));
 559         cs.statusp = &resp->status;
 560         cs.cont = TRUE;
 561 
 562         /*
 563          * Form a reply tag by copying over the request tag.
 564          */
 565         resp->tag.utf8string_len = args->tag.utf8string_len;
 566         if (args->tag.utf8string_len != 0) {
 567                 resp->tag.utf8string_val =
 568                     kmem_alloc(resp->tag.utf8string_len, KM_SLEEP);
 569                 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
 570                     args->tag.utf8string_len);
 571         } else {
 572                 resp->tag.utf8string_val = NULL;
 573         }
 574 
 575         /*
 576          * XXX for now, minorversion should be zero
 577          */
 578         if (args->minorversion != CB4_MINORVERSION) {
 579                 resp->array_len = 0;
 580                 resp->array = NULL;
 581                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
 582                 return;
 583         }
 584 
 585 #ifdef DEBUG
 586         /*
 587          * Verify callback_ident.  It doesn't really matter if it's wrong
 588          * because we don't really use callback_ident -- we use prog number
 589          * of the RPC request instead.  In this case, just print a DEBUG
 590          * console message to reveal brokenness of cbclient (at bkoff/cthon).
 591          */
 592         if (args->callback_ident != req->rq_prog)
 593                 zcmn_err(getzoneid(), CE_WARN,
 594                     "cb_compound: cb_client using wrong "
 595                     "callback_ident(%d), should be %d",
 596                     args->callback_ident, req->rq_prog);
 597 #endif
 598 
 599         resp->array_len = args->array_len;
 600         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4),
 601             KM_SLEEP);
 602 
 603         for (i = 0; i < args->array_len && cs.cont; i++) {
 604 
 605                 argop = &args->array[i];
 606                 resop = &resp->array[i];
 607                 resop->resop = argop->argop;
 608                 op = (uint_t)resop->resop;
 609 
 610                 switch (op) {
 611 
 612                 case OP_CB_GETATTR:
 613 
 614                         cb_getattr(argop, resop, req, &cs, ncg);
 615                         break;
 616 
 617                 case OP_CB_RECALL:
 618 
 619                         cb_recall(argop, resop, req, &cs, ncg);
 620                         break;
 621 
 622                 case OP_CB_ILLEGAL:
 623 
 624                         /* fall through */
 625 
 626                 default:
 627                         /*
 628                          * Handle OP_CB_ILLEGAL and any undefined opcode.
 629                          * Currently, the XDR code will return BADXDR
 630                          * if cb op doesn't decode to legal value, so
 631                          * it really only handles OP_CB_ILLEGAL.
 632                          */
 633                         op = OP_CB_ILLEGAL;
 634                         cb_illegal(argop, resop, req, &cs, ncg);
 635                 }
 636 
 637                 if (*cs.statusp != NFS4_OK)
 638                         cs.cont = FALSE;
 639 
 640                 /*
 641                  * If not at last op, and if we are to stop, then
 642                  * compact the results array.
 643                  */
 644                 if ((i + 1) < args->array_len && !cs.cont) {
 645 
 646                         new_res = kmem_alloc(
 647                             (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP);
 648                         bcopy(resp->array,
 649                             new_res, (i+1) * sizeof (nfs_cb_resop4));
 650                         kmem_free(resp->array,
 651                             args->array_len * sizeof (nfs_cb_resop4));
 652 
 653                         resp->array_len =  i + 1;
 654                         resp->array = new_res;
 655                 }
 656         }
 657 
 658 }
 659 
 660 static void
 661 cb_compound_free(CB_COMPOUND4res *resp)
 662 {
 663         uint_t i, op;
 664         nfs_cb_resop4 *resop;
 665 
 666         if (resp->tag.utf8string_val) {
 667                 UTF8STRING_FREE(resp->tag)
 668         }
 669 
 670         for (i = 0; i < resp->array_len; i++) {
 671 
 672                 resop = &resp->array[i];
 673                 op = (uint_t)resop->resop;
 674 
 675                 switch (op) {
 676 
 677                 case OP_CB_GETATTR:
 678 
 679                         cb_getattr_free(resop);
 680                         break;
 681 
 682                 case OP_CB_RECALL:
 683 
 684                         cb_recall_free(resop);
 685                         break;
 686 
 687                 default:
 688                         break;
 689                 }
 690         }
 691 
 692         if (resp->array != NULL) {
 693                 kmem_free(resp->array,
 694                     resp->array_len * sizeof (nfs_cb_resop4));
 695         }
 696 }
 697 
 698 static void
 699 cb_dispatch(struct svc_req *req, SVCXPRT *xprt)
 700 {
 701         CB_COMPOUND4args args;
 702         CB_COMPOUND4res res;
 703         struct nfs4_callback_globals *ncg;
 704 
 705         bool_t (*xdr_args)(), (*xdr_res)();
 706         void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *,
 707             struct nfs4_callback_globals *);
 708         void (*freeproc)(CB_COMPOUND4res *);
 709 
 710         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
 711         ASSERT(ncg != NULL);
 712 
 713         ncg->nfs4_callback_stats.cb_dispatch.value.ui64++;
 714 
 715         switch (req->rq_proc) {
 716         case CB_NULL:
 717                 xdr_args = xdr_void;
 718                 xdr_res = xdr_void;
 719                 proc = cb_null;
 720                 freeproc = NULL;
 721                 break;
 722 
 723         case CB_COMPOUND:
 724                 xdr_args = xdr_CB_COMPOUND4args_clnt;
 725                 xdr_res = xdr_CB_COMPOUND4res;
 726                 proc = cb_compound;
 727                 freeproc = cb_compound_free;
 728                 break;
 729 
 730         default:
 731                 CB_WARN("cb_dispatch: no proc\n");
 732                 svcerr_noproc(xprt);
 733                 return;
 734         }
 735 
 736         args.tag.utf8string_val = NULL;
 737         args.array = NULL;
 738 
 739         if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) {
 740 
 741                 CB_WARN("cb_dispatch: cannot getargs\n");
 742                 svcerr_decode(xprt);
 743                 return;
 744         }
 745 
 746         (*proc)(&args, &res, req, ncg);
 747 
 748         if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) {
 749 
 750                 CB_WARN("cb_dispatch: bad sendreply\n");
 751                 svcerr_systemerr(xprt);
 752         }
 753 
 754         if (freeproc)
 755                 (*freeproc)(&res);
 756 
 757         if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) {
 758 
 759                 CB_WARN("cb_dispatch: bad freeargs\n");
 760         }
 761 }
 762 
 763 static rpcprog_t
 764 nfs4_getnextprogram(struct nfs4_callback_globals *ncg)
 765 {
 766         int i, j;
 767 
 768         j = ncg->nfs4_program_hint;
 769         for (i = 0; i < nfs4_num_prognums; i++, j++) {
 770 
 771                 if (j >= nfs4_num_prognums)
 772                         j = 0;
 773 
 774                 if (ncg->nfs4prog2server[j] == NULL) {
 775                         ncg->nfs4_program_hint = j+1;
 776                         return (j+NFS4_CALLBACK);
 777                 }
 778         }
 779 
 780         return (0);
 781 }
 782 
 783 void
 784 nfs4callback_destroy(nfs4_server_t *np)
 785 {
 786         struct nfs4_callback_globals *ncg;
 787         int i;
 788 
 789         if (np->s_program == 0)
 790                 return;
 791 
 792         ncg = np->zone_globals;
 793         i = np->s_program - NFS4_CALLBACK;
 794 
 795         mutex_enter(&ncg->nfs4_cb_lock);
 796 
 797         ASSERT(ncg->nfs4prog2server[i] == np);
 798 
 799         ncg->nfs4prog2server[i] = NULL;
 800 
 801         if (i < ncg->nfs4_program_hint)
 802                 ncg->nfs4_program_hint = i;
 803 
 804         mutex_exit(&ncg->nfs4_cb_lock);
 805 }
 806 
 807 /*
 808  * nfs4_setport - This function saves a netid and univeral address for
 809  * the callback program.  These values will be used during setclientid.
 810  */
 811 static void
 812 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto,
 813     struct nfs4_callback_globals *ncg)
 814 {
 815         struct nfs4_cb_port *p;
 816         bool_t found = FALSE;
 817 
 818         ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock));
 819 
 820         p = list_head(&ncg->nfs4_cb_ports);
 821         for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 822                 if (strcmp(p->netid, netid) == 0) {
 823                         found = TRUE;
 824                         break;
 825                 }
 826         }
 827         if (found == TRUE)
 828                 (void) strcpy(p->uaddr, uaddr);
 829         else {
 830                 p = kmem_alloc(sizeof (*p), KM_SLEEP);
 831 
 832                 (void) strcpy(p->uaddr, uaddr);
 833                 (void) strcpy(p->netid, netid);
 834                 (void) strcpy(p->protofmly, protofmly);
 835                 (void) strcpy(p->proto, proto);
 836                 list_insert_head(&ncg->nfs4_cb_ports, p);
 837         }
 838 }
 839 
 840 /*
 841  * nfs4_cb_args - This function is used to construct the callback
 842  * portion of the arguments needed for setclientid.
 843  */
 844 
 845 void
 846 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args)
 847 {
 848         struct nfs4_cb_port *p;
 849         bool_t found = FALSE;
 850         rpcprog_t pgm;
 851         struct nfs4_callback_globals *ncg = np->zone_globals;
 852 
 853         /*
 854          * This server structure may already have a program number
 855          * assigned to it.  This happens when the client has to
 856          * re-issue SETCLIENTID.  Just re-use the information.
 857          */
 858         if (np->s_program >= NFS4_CALLBACK &&
 859             np->s_program < NFS4_CALLBACK + nfs4_num_prognums)
 860                 nfs4callback_destroy(np);
 861 
 862         mutex_enter(&ncg->nfs4_cb_lock);
 863 
 864         p = list_head(&ncg->nfs4_cb_ports);
 865         for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 866                 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 &&
 867                     strcmp(p->proto, knc->knc_proto) == 0) {
 868                         found = TRUE;
 869                         break;
 870                 }
 871         }
 872 
 873         if (found == FALSE) {
 874 
 875                 NFS4_DEBUG(nfs4_callback_debug,
 876                     (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n",
 877                     knc->knc_protofmly, knc->knc_proto));
 878 
 879                 args->callback.cb_program = 0;
 880                 args->callback.cb_location.r_netid = NULL;
 881                 args->callback.cb_location.r_addr = NULL;
 882                 args->callback_ident = 0;
 883                 mutex_exit(&ncg->nfs4_cb_lock);
 884                 return;
 885         }
 886 
 887         if ((pgm = nfs4_getnextprogram(ncg)) == 0) {
 888                 CB_WARN("nfs4_cb_args: out of program numbers\n");
 889 
 890                 args->callback.cb_program = 0;
 891                 args->callback.cb_location.r_netid = NULL;
 892                 args->callback.cb_location.r_addr = NULL;
 893                 args->callback_ident = 0;
 894                 mutex_exit(&ncg->nfs4_cb_lock);
 895                 return;
 896         }
 897 
 898         ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np;
 899         args->callback.cb_program = pgm;
 900         args->callback.cb_location.r_netid = p->netid;
 901         args->callback.cb_location.r_addr = p->uaddr;
 902         args->callback_ident = pgm;
 903 
 904         np->s_program = pgm;
 905 
 906         mutex_exit(&ncg->nfs4_cb_lock);
 907 }
 908 
 909 static int
 910 nfs4_dquery(struct nfs4_svc_args *arg, model_t model)
 911 {
 912         file_t *fp;
 913         vnode_t *vp;
 914         rnode4_t *rp;
 915         int error;
 916         STRUCT_HANDLE(nfs4_svc_args, uap);
 917 
 918         STRUCT_SET_HANDLE(uap, model, arg);
 919 
 920         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 921                 return (EBADF);
 922 
 923         vp = fp->f_vnode;
 924 
 925         if (vp == NULL || vp->v_type != VREG ||
 926             !vn_matchops(vp, nfs4_vnodeops)) {
 927                 releasef(STRUCT_FGET(uap, fd));
 928                 return (EBADF);
 929         }
 930 
 931         rp = VTOR4(vp);
 932 
 933         /*
 934          * I can't convince myself that we need locking here.  The
 935          * rnode cannot disappear and the value returned is instantly
 936          * stale anway, so why bother?
 937          */
 938 
 939         error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type);
 940         releasef(STRUCT_FGET(uap, fd));
 941         return (error);
 942 }
 943 
 944 
 945 /*
 946  * NFS4 client system call.  This service does the
 947  * necessary initialization for the callback program.
 948  * This is fashioned after the server side interaction
 949  * between nfsd and the kernel.  On the client, the
 950  * mount command forks and the child process does the
 951  * necessary interaction with the kernel.
 952  *
 953  * uap->fd is the fd of an open transport provider
 954  */
 955 int
 956 nfs4_svc(struct nfs4_svc_args *arg, model_t model)
 957 {
 958         file_t *fp;
 959         int error;
 960         int readsize;
 961         char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE];
 962         char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE];
 963         size_t len;
 964         STRUCT_HANDLE(nfs4_svc_args, uap);
 965         struct netbuf addrmask;
 966         int cmd;
 967         SVCMASTERXPRT *cb_xprt;
 968         struct nfs4_callback_globals *ncg;
 969 
 970 #ifdef lint
 971         model = model;          /* STRUCT macros don't always refer to it */
 972 #endif
 973 
 974         STRUCT_SET_HANDLE(uap, model, arg);
 975 
 976         if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY)
 977                 return (nfs4_dquery(arg, model));
 978 
 979         if (secpolicy_nfs(CRED()) != 0)
 980                 return (EPERM);
 981 
 982         if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 983                 return (EBADF);
 984 
 985         /*
 986          * Set read buffer size to rsize
 987          * and add room for RPC headers.
 988          */
 989         readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 990         if (readsize < RPC_MAXDATASIZE)
 991                 readsize = RPC_MAXDATASIZE;
 992 
 993         error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 994             KNC_STRSIZE, &len);
 995         if (error) {
 996                 releasef(STRUCT_FGET(uap, fd));
 997                 return (error);
 998         }
 999 
1000         cmd = STRUCT_FGET(uap, cmd);
1001 
1002         if (cmd & NFS4_KRPC_START) {
1003                 addrmask.len = STRUCT_FGET(uap, addrmask.len);
1004                 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
1005                 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
1006                 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
1007                     addrmask.len);
1008                 if (error) {
1009                         releasef(STRUCT_FGET(uap, fd));
1010                         kmem_free(addrmask.buf, addrmask.maxlen);
1011                         return (error);
1012                 }
1013         }
1014         else
1015                 addrmask.buf = NULL;
1016 
1017         error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr,
1018             sizeof (uaddr), &len);
1019         if (error) {
1020                 releasef(STRUCT_FGET(uap, fd));
1021                 if (addrmask.buf)
1022                         kmem_free(addrmask.buf, addrmask.maxlen);
1023                 return (error);
1024         }
1025 
1026         error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly,
1027             sizeof (protofmly), &len);
1028         if (error) {
1029                 releasef(STRUCT_FGET(uap, fd));
1030                 if (addrmask.buf)
1031                         kmem_free(addrmask.buf, addrmask.maxlen);
1032                 return (error);
1033         }
1034 
1035         error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto,
1036             sizeof (proto), &len);
1037         if (error) {
1038                 releasef(STRUCT_FGET(uap, fd));
1039                 if (addrmask.buf)
1040                         kmem_free(addrmask.buf, addrmask.maxlen);
1041                 return (error);
1042         }
1043 
1044         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1045         ASSERT(ncg != NULL);
1046 
1047         mutex_enter(&ncg->nfs4_cb_lock);
1048         if (cmd & NFS4_SETPORT)
1049                 nfs4_setport(buf, uaddr, protofmly, proto, ncg);
1050 
1051         if (cmd & NFS4_KRPC_START) {
1052                 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt,
1053                     &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE);
1054                 if (error) {
1055                         CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1056                             error);
1057                         kmem_free(addrmask.buf, addrmask.maxlen);
1058                 }
1059         }
1060 
1061         mutex_exit(&ncg->nfs4_cb_lock);
1062         releasef(STRUCT_FGET(uap, fd));
1063         return (error);
1064 }
1065 
1066 struct nfs4_callback_globals *
1067 nfs4_get_callback_globals(void)
1068 {
1069         return (zone_getspecific(nfs4_callback_zone_key, nfs_zone()));
1070 }
1071 
1072 static void *
1073 nfs4_callback_init_zone(zoneid_t zoneid)
1074 {
1075         kstat_t *nfs4_callback_kstat;
1076         struct nfs4_callback_globals *ncg;
1077 
1078         ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
1079 
1080         ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums *
1081             sizeof (struct nfs4_server *), KM_SLEEP);
1082 
1083         /* initialize the dlist */
1084         mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL);
1085         list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode),
1086             offsetof(struct nfs4_dnode, linkage));
1087 
1088         /* initialize cb_port list */
1089         mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL);
1090         list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port),
1091             offsetof(struct nfs4_cb_port, linkage));
1092 
1093         /* get our own copy of the kstats */
1094         bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats,
1095             sizeof (nfs4_callback_stats_tmpl));
1096         /* register "nfs:0:nfs4_callback_stats" for this zone */
1097         if ((nfs4_callback_kstat =
1098             kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1099             KSTAT_TYPE_NAMED,
1100             sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t),
1101             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
1102             zoneid)) != NULL) {
1103                 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats;
1104                 kstat_install(nfs4_callback_kstat);
1105         }
1106         return (ncg);
1107 }
1108 
1109 static void
1110 nfs4_discard_delegations(struct nfs4_callback_globals *ncg)
1111 {
1112         nfs4_server_t *sp;
1113         int i, num_removed;
1114 
1115         /*
1116          * It's OK here to just run through the registered "programs", as
1117          * servers without programs won't have any delegations to handle.
1118          */
1119         for (i = 0; i < nfs4_num_prognums; i++) {
1120                 rnode4_t *rp;
1121 
1122                 mutex_enter(&ncg->nfs4_cb_lock);
1123                 sp = ncg->nfs4prog2server[i];
1124                 mutex_exit(&ncg->nfs4_cb_lock);
1125 
1126                 if (nfs4_server_vlock(sp, 1) == FALSE)
1127                         continue;
1128                 num_removed = 0;
1129                 while ((rp = list_head(&sp->s_deleg_list)) != NULL) {
1130                         mutex_enter(&rp->r_statev4_lock);
1131                         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1132                                 /*
1133                                  * We need to take matters into our own hands,
1134                                  * as nfs4delegreturn_cleanup_impl() won't
1135                                  * remove this from the list.
1136                                  */
1137                                 list_remove(&sp->s_deleg_list, rp);
1138                                 mutex_exit(&rp->r_statev4_lock);
1139                                 nfs4_dec_state_ref_count_nolock(sp,
1140                                     VTOMI4(RTOV4(rp)));
1141                                 num_removed++;
1142                                 continue;
1143                         }
1144                         mutex_exit(&rp->r_statev4_lock);
1145                         VN_HOLD(RTOV4(rp));
1146                         mutex_exit(&sp->s_lock);
1147                         /*
1148                          * The following will remove the node from the list.
1149                          */
1150                         nfs4delegreturn_cleanup_impl(rp, sp, ncg);
1151                         VN_RELE(RTOV4(rp));
1152                         mutex_enter(&sp->s_lock);
1153                 }
1154                 mutex_exit(&sp->s_lock);
1155                 /* each removed list node reles a reference */
1156                 while (num_removed-- > 0)
1157                         nfs4_server_rele(sp);
1158                 /* remove our reference for nfs4_server_vlock */
1159                 nfs4_server_rele(sp);
1160         }
1161 }
1162 
1163 /* ARGSUSED */
1164 static void
1165 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data)
1166 {
1167         struct nfs4_callback_globals *ncg = data;
1168 
1169         /*
1170          * Clean pending delegation return list.
1171          */
1172         nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD);
1173 
1174         /*
1175          * Discard all delegations.
1176          */
1177         nfs4_discard_delegations(ncg);
1178 }
1179 
1180 static void
1181 nfs4_callback_fini_zone(zoneid_t zoneid, void *data)
1182 {
1183         struct nfs4_callback_globals *ncg = data;
1184         struct nfs4_cb_port *p;
1185         nfs4_server_t *sp, *next;
1186         nfs4_server_t freelist;
1187         int i;
1188 
1189         kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid);
1190 
1191         /*
1192          * Discard all delegations that may have crept in since we did the
1193          * _shutdown.
1194          */
1195         nfs4_discard_delegations(ncg);
1196         /*
1197          * We're completely done with this zone and all associated
1198          * nfs4_server_t's.  Any remaining nfs4_server_ts should only have one
1199          * more reference outstanding -- the reference we didn't release in
1200          * nfs4_renew_lease_thread().
1201          *
1202          * Here we need to run through the global nfs4_server_lst as we need to
1203          * deal with nfs4_server_ts without programs, as they also have threads
1204          * created for them, and so have outstanding references that we need to
1205          * release.
1206          */
1207         freelist.forw = &freelist;
1208         freelist.back = &freelist;
1209         mutex_enter(&nfs4_server_lst_lock);
1210         sp = nfs4_server_lst.forw;
1211         while (sp != &nfs4_server_lst) {
1212                 next = sp->forw;
1213                 if (sp->zoneid == zoneid) {
1214                         remque(sp);
1215                         insque(sp, &freelist);
1216                 }
1217                 sp = next;
1218         }
1219         mutex_exit(&nfs4_server_lst_lock);
1220 
1221         sp = freelist.forw;
1222         while (sp != &freelist) {
1223                 next = sp->forw;
1224                 nfs4_server_rele(sp);   /* free the list's reference */
1225                 sp = next;
1226         }
1227 
1228 #ifdef DEBUG
1229         for (i = 0; i < nfs4_num_prognums; i++) {
1230                 ASSERT(ncg->nfs4prog2server[i] == NULL);
1231         }
1232 #endif
1233         kmem_free(ncg->nfs4prog2server, nfs4_num_prognums *
1234             sizeof (struct nfs4_server *));
1235 
1236         mutex_enter(&ncg->nfs4_cb_lock);
1237         while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) {
1238                 list_remove(&ncg->nfs4_cb_ports, p);
1239                 kmem_free(p, sizeof (*p));
1240         }
1241         list_destroy(&ncg->nfs4_cb_ports);
1242         mutex_destroy(&ncg->nfs4_cb_lock);
1243         list_destroy(&ncg->nfs4_dlist);
1244         mutex_destroy(&ncg->nfs4_dlist_lock);
1245         kmem_free(ncg, sizeof (*ncg));
1246 }
1247 
1248 void
1249 nfs4_callback_init(void)
1250 {
1251         int i;
1252         SVC_CALLOUT *nfs4_cb_sc;
1253 
1254         /* initialize the callback table */
1255         nfs4_cb_sc = kmem_alloc(nfs4_num_prognums *
1256             sizeof (SVC_CALLOUT), KM_SLEEP);
1257 
1258         for (i = 0; i < nfs4_num_prognums; i++) {
1259                 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i;
1260                 nfs4_cb_sc[i].sc_versmin = NFS_CB;
1261                 nfs4_cb_sc[i].sc_versmax = NFS_CB;
1262                 nfs4_cb_sc[i].sc_dispatch = cb_dispatch;
1263         }
1264 
1265         nfs4_cb_sct.sct_size = nfs4_num_prognums;
1266         nfs4_cb_sct.sct_free = FALSE;
1267         nfs4_cb_sct.sct_sc = nfs4_cb_sc;
1268 
1269         /*
1270          * Compute max bytes required for dyamically allocated parts
1271          * of cb_getattr reply.  Only size and change are supported now.
1272          * If CB_GETATTR is changed to reply with additional attrs,
1273          * additional sizes must be added below.
1274          *
1275          * fattr4_change + fattr4_size == uint64_t + uint64_t
1276          */
1277         cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT;
1278 
1279         zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone,
1280             nfs4_callback_shutdown_zone, nfs4_callback_fini_zone);
1281 }
1282 
1283 void
1284 nfs4_callback_fini(void)
1285 {
1286 }
1287 
1288 /*
1289  * NB: This function can be called from the *wrong* zone (ie, the zone that
1290  * 'rp' belongs to and the caller's zone may not be the same).  This can happen
1291  * if the zone is going away and we get called from nfs4_async_inactive().  In
1292  * this case the globals will be NULL and we won't update the counters, which
1293  * doesn't matter as the zone is going away anyhow.
1294  */
1295 static void
1296 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np,
1297     struct nfs4_callback_globals *ncg)
1298 {
1299         mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1300         boolean_t need_rele = B_FALSE;
1301 
1302         /*
1303          * Caller must be holding mi_recovlock in read mode
1304          * to call here.  This is provided by start_op.
1305          * Delegation management requires to grab s_lock
1306          * first and then r_statev4_lock.
1307          */
1308 
1309         if (np == NULL) {
1310                 np = find_nfs4_server_all(mi, 1);
1311                 if (np == NULL)
1312                         return;
1313                 need_rele = B_TRUE;
1314         } else {
1315                 mutex_enter(&np->s_lock);
1316         }
1317 
1318         mutex_enter(&rp->r_statev4_lock);
1319 
1320         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1321                 mutex_exit(&rp->r_statev4_lock);
1322                 mutex_exit(&np->s_lock);
1323                 if (need_rele)
1324                         nfs4_server_rele(np);
1325                 return;
1326         }
1327 
1328         /*
1329          * Free the cred originally held when
1330          * the delegation was granted.  Caller must
1331          * hold this cred if it wants to use it after
1332          * this call.
1333          */
1334         crfree(rp->r_deleg_cred);
1335         rp->r_deleg_cred = NULL;
1336         rp->r_deleg_type = OPEN_DELEGATE_NONE;
1337         rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1338         rp->r_deleg_needs_recall = FALSE;
1339         rp->r_deleg_return_pending = FALSE;
1340 
1341         /*
1342          * Remove the rnode from the server's list and
1343          * update the ref counts.
1344          */
1345         list_remove(&np->s_deleg_list, rp);
1346         mutex_exit(&rp->r_statev4_lock);
1347         nfs4_dec_state_ref_count_nolock(np, mi);
1348         mutex_exit(&np->s_lock);
1349         /* removed list node removes a reference */
1350         nfs4_server_rele(np);
1351         if (need_rele)
1352                 nfs4_server_rele(np);
1353         if (ncg != NULL)
1354                 ncg->nfs4_callback_stats.delegations.value.ui64--;
1355 }
1356 
1357 void
1358 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np)
1359 {
1360         struct nfs4_callback_globals *ncg;
1361 
1362         if (np != NULL) {
1363                 ncg = np->zone_globals;
1364         } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) {
1365                 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1366                 ASSERT(ncg != NULL);
1367         } else {
1368                 /*
1369                  * Request coming from the wrong zone.
1370                  */
1371                 ASSERT(getzoneid() == GLOBAL_ZONEID);
1372                 ncg = NULL;
1373         }
1374 
1375         nfs4delegreturn_cleanup_impl(rp, np, ncg);
1376 }
1377 
1378 static void
1379 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1380     cred_t *cr, vnode_t *vp)
1381 {
1382         if (error != ETIMEDOUT && error != EINTR &&
1383             !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1384                 lost_rqstp->lr_op = 0;
1385                 return;
1386         }
1387 
1388         NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1389             "nfs4close_save_lost_rqst: error %d", error));
1390 
1391         lost_rqstp->lr_op = OP_DELEGRETURN;
1392         /*
1393          * The vp is held and rele'd via the recovery code.
1394          * See nfs4_save_lost_rqst.
1395          */
1396         lost_rqstp->lr_vp = vp;
1397         lost_rqstp->lr_dvp = NULL;
1398         lost_rqstp->lr_oop = NULL;
1399         lost_rqstp->lr_osp = NULL;
1400         lost_rqstp->lr_lop = NULL;
1401         lost_rqstp->lr_cr = cr;
1402         lost_rqstp->lr_flk = NULL;
1403         lost_rqstp->lr_putfirst = FALSE;
1404 }
1405 
1406 static void
1407 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep)
1408 {
1409         COMPOUND4args_clnt args;
1410         COMPOUND4res_clnt res;
1411         nfs_argop4 argops[3];
1412         nfs4_ga_res_t *garp = NULL;
1413         hrtime_t t;
1414         int numops;
1415         int doqueue = 1;
1416 
1417         args.ctag = TAG_DELEGRETURN;
1418 
1419         numops = 3;             /* PUTFH, GETATTR, DELEGRETURN */
1420 
1421         args.array = argops;
1422         args.array_len = numops;
1423 
1424         argops[0].argop = OP_CPUTFH;
1425         argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1426 
1427         argops[1].argop = OP_GETATTR;
1428         argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1429         argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp));
1430 
1431         argops[2].argop = OP_DELEGRETURN;
1432         argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid =
1433             rp->r_deleg_stateid;
1434 
1435         t = gethrtime();
1436         rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep);
1437 
1438         if (ep->error)
1439                 return;
1440 
1441         if (res.status == NFS4_OK) {
1442                 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
1443                 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL);
1444 
1445         }
1446         xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1447 }
1448 
1449 int
1450 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr,
1451     struct nfs4_callback_globals *ncg)
1452 {
1453         vnode_t *vp = RTOV4(rp);
1454         mntinfo4_t *mi = VTOMI4(vp);
1455         nfs4_lost_rqst_t lost_rqst;
1456         nfs4_recov_state_t recov_state;
1457         bool_t needrecov = FALSE, recovonly, done = FALSE;
1458         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1459 
1460         ncg->nfs4_callback_stats.delegreturn.value.ui64++;
1461 
1462         while (!done) {
1463                 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN,
1464                     &recov_state, &recovonly);
1465 
1466                 if (e.error) {
1467                         if (flags & NFS4_DR_FORCE) {
1468                                 (void) nfs_rw_enter_sig(&mi->mi_recovlock,
1469                                     RW_READER, 0);
1470                                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1471                                 nfs_rw_exit(&mi->mi_recovlock);
1472                         }
1473                         break;
1474                 }
1475 
1476                 /*
1477                  * Check to see if the delegation has already been
1478                  * returned by the recovery thread.   The state of
1479                  * the delegation cannot change at this point due
1480                  * to start_fop and the r_deleg_recall_lock.
1481                  */
1482                 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1483                         e.error = 0;
1484                         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1485                         break;
1486                 }
1487 
1488                 if (recovonly) {
1489                         /*
1490                          * Delegation will be returned via the
1491                          * recovery framework.  Build a lost request
1492                          * structure, start recovery and get out.
1493                          */
1494                         nfs4_error_init(&e, EINTR);
1495                         nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1496                             cr, vp);
1497                         (void) nfs4_start_recovery(&e, mi, vp,
1498                             NULL, &rp->r_deleg_stateid,
1499                             lost_rqst.lr_op == OP_DELEGRETURN ?
1500                             &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1501                             NULL, NULL);
1502                         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1503                         break;
1504                 }
1505 
1506                 nfs4delegreturn_otw(rp, cr, &e);
1507 
1508                 /*
1509                  * Ignore some errors on delegreturn; no point in marking
1510                  * the file dead on a state destroying operation.
1511                  */
1512                 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) ||
1513                     e.stat == NFS4ERR_BADHANDLE ||
1514                     e.stat == NFS4ERR_STALE))
1515                         needrecov = FALSE;
1516                 else
1517                         needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1518 
1519                 if (needrecov) {
1520                         nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1521                             cr, vp);
1522                         (void) nfs4_start_recovery(&e, mi, vp,
1523                             NULL, &rp->r_deleg_stateid,
1524                             lost_rqst.lr_op == OP_DELEGRETURN ?
1525                             &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1526                             NULL, NULL);
1527                 } else {
1528                         nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1529                         done = TRUE;
1530                 }
1531 
1532                 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1533         }
1534         return (e.error);
1535 }
1536 
1537 /*
1538  * nfs4_resend_delegreturn - used to drive the delegreturn
1539  * operation via the recovery thread.
1540  */
1541 void
1542 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep,
1543     nfs4_server_t *np)
1544 {
1545         rnode4_t *rp = VTOR4(lorp->lr_vp);
1546 
1547         /* If the file failed recovery, just quit. */
1548         mutex_enter(&rp->r_statelock);
1549         if (rp->r_flags & R4RECOVERR) {
1550                 ep->error = EIO;
1551         }
1552         mutex_exit(&rp->r_statelock);
1553 
1554         if (!ep->error)
1555                 nfs4delegreturn_otw(rp, lorp->lr_cr, ep);
1556 
1557         /*
1558          * If recovery is now needed, then return the error
1559          * and status and let the recovery thread handle it,
1560          * including re-driving another delegreturn.  Otherwise,
1561          * just give up and clean up the delegation.
1562          */
1563         if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp))
1564                 return;
1565 
1566         if (rp->r_deleg_type != OPEN_DELEGATE_NONE)
1567                 nfs4delegreturn_cleanup(rp, np);
1568 
1569         nfs4_error_zinit(ep);
1570 }
1571 
1572 /*
1573  * nfs4delegreturn - general function to return a delegation.
1574  *
1575  * NFS4_DR_FORCE - return the delegation even if start_op fails
1576  * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE
1577  * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1578  * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1579  * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1580  * NFS4_DR_REOPEN - do file reopens, if applicable
1581  */
1582 static int
1583 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg)
1584 {
1585         int error = 0;
1586         cred_t *cr = NULL;
1587         vnode_t *vp;
1588         bool_t needrecov = FALSE;
1589         bool_t rw_entered = FALSE;
1590         bool_t do_reopen;
1591 
1592         vp = RTOV4(rp);
1593 
1594         /*
1595          * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1596          * discard without doing an otw DELEGRETURN.  This may only be used
1597          * by the recovery thread because it bypasses the synchronization
1598          * with r_deleg_recall_lock and mi->mi_recovlock.
1599          */
1600         if (flags == NFS4_DR_DISCARD) {
1601                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1602                 return (0);
1603         }
1604 
1605         if (flags & NFS4_DR_DID_OP) {
1606                 /*
1607                  * Caller had already done start_op, which means the
1608                  * r_deleg_recall_lock is already held in READ mode
1609                  * so we cannot take it in write mode.  Return the
1610                  * delegation asynchronously.
1611                  *
1612                  * Remove the NFS4_DR_DID_OP flag so we don't
1613                  * get stuck looping through here.
1614                  */
1615                 VN_HOLD(vp);
1616                 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE);
1617                 return (0);
1618         }
1619 
1620         /*
1621          * Verify we still have a delegation and crhold the credential.
1622          */
1623         mutex_enter(&rp->r_statev4_lock);
1624         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1625                 mutex_exit(&rp->r_statev4_lock);
1626                 goto out;
1627         }
1628         cr = rp->r_deleg_cred;
1629         ASSERT(cr != NULL);
1630         crhold(cr);
1631         mutex_exit(&rp->r_statev4_lock);
1632 
1633         /*
1634          * Push the modified data back to the server synchronously
1635          * before doing DELEGRETURN.
1636          */
1637         if (flags & NFS4_DR_PUSH)
1638                 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1639 
1640         /*
1641          * Take r_deleg_recall_lock in WRITE mode, this will prevent
1642          * nfs4_is_otw_open_necessary from trying to use the delegation
1643          * while the DELEGRETURN is in progress.
1644          */
1645         (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE);
1646 
1647         rw_entered = TRUE;
1648 
1649         if (rp->r_deleg_type == OPEN_DELEGATE_NONE)
1650                 goto out;
1651 
1652         if (flags & NFS4_DR_REOPEN) {
1653                 /*
1654                  * If R4RECOVERRP is already set, then skip re-opening
1655                  * the delegation open streams and go straight to doing
1656                  * delegreturn.  (XXX if the file has failed recovery, then the
1657                  * delegreturn attempt is likely to be futile.)
1658                  */
1659                 mutex_enter(&rp->r_statelock);
1660                 do_reopen = !(rp->r_flags & R4RECOVERRP);
1661                 mutex_exit(&rp->r_statelock);
1662 
1663                 if (do_reopen) {
1664                         error = deleg_reopen(vp, &needrecov, ncg, flags);
1665                         if (error != 0) {
1666                                 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL))
1667                                     == 0)
1668                                         goto out;
1669                         } else if (needrecov) {
1670                                 if ((flags & NFS4_DR_FORCE) == 0)
1671                                         goto out;
1672                         }
1673                 }
1674         }
1675 
1676         if (flags & NFS4_DR_DISCARD) {
1677                 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1678 
1679                 mutex_enter(&rp->r_statelock);
1680                 /*
1681                  * deleg_return_pending is cleared inside of delegation_accept
1682                  * when a delegation is accepted.  if this flag has been
1683                  * cleared, then a new delegation has overwritten the one we
1684                  * were about to throw away.
1685                  */
1686                 if (!rp->r_deleg_return_pending) {
1687                         mutex_exit(&rp->r_statelock);
1688                         goto out;
1689                 }
1690                 mutex_exit(&rp->r_statelock);
1691                 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
1692                 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1693                 nfs_rw_exit(&mi->mi_recovlock);
1694         } else {
1695                 error = nfs4_do_delegreturn(rp, flags, cr, ncg);
1696         }
1697 
1698 out:
1699         if (cr)
1700                 crfree(cr);
1701         if (rw_entered)
1702                 nfs_rw_exit(&rp->r_deleg_recall_lock);
1703         return (error);
1704 }
1705 
1706 int
1707 nfs4delegreturn(rnode4_t *rp, int flags)
1708 {
1709         struct nfs4_callback_globals *ncg;
1710 
1711         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1712         ASSERT(ncg != NULL);
1713 
1714         return (nfs4delegreturn_impl(rp, flags, ncg));
1715 }
1716 
1717 void
1718 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc)
1719 {
1720         struct cb_recall_pass *pp;
1721 
1722         pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
1723         pp->rp = rp;
1724         pp->flags = flags;
1725         pp->truncate = trunc;
1726 
1727         /*
1728          * Fire up a thread to do the actual delegreturn
1729          * Caller must guarantee that the rnode doesn't
1730          * vanish (by calling VN_HOLD).
1731          */
1732 
1733         (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
1734             minclsyspri);
1735 }
1736 
1737 static void
1738 delegreturn_all_thread(rpcprog_t *pp)
1739 {
1740         nfs4_server_t *np;
1741         bool_t found = FALSE;
1742         rpcprog_t prog;
1743         rnode4_t *rp;
1744         vnode_t *vp;
1745         zoneid_t zoneid = getzoneid();
1746         struct nfs4_callback_globals *ncg;
1747 
1748         NFS4_DEBUG(nfs4_drat_debug,
1749             (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp));
1750 
1751         prog = *pp;
1752         kmem_free(pp, sizeof (*pp));
1753         pp = NULL;
1754 
1755         mutex_enter(&nfs4_server_lst_lock);
1756         for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
1757                 if (np->zoneid == zoneid && np->s_program == prog) {
1758                         mutex_enter(&np->s_lock);
1759                         found = TRUE;
1760                         break;
1761                 }
1762         }
1763         mutex_exit(&nfs4_server_lst_lock);
1764 
1765         /*
1766          * It's possible that the nfs4_server which was using this
1767          * program number has vanished since this thread is async.
1768          * If so, just return.  Your work here is finished, my friend.
1769          */
1770         if (!found)
1771                 goto out;
1772 
1773         ncg = np->zone_globals;
1774         while ((rp = list_head(&np->s_deleg_list)) != NULL) {
1775                 vp = RTOV4(rp);
1776                 VN_HOLD(vp);
1777                 mutex_exit(&np->s_lock);
1778                 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN,
1779                     ncg);
1780                 VN_RELE(vp);
1781 
1782                 /* retake the s_lock for next trip through the loop */
1783                 mutex_enter(&np->s_lock);
1784         }
1785         mutex_exit(&np->s_lock);
1786 out:
1787         NFS4_DEBUG(nfs4_drat_debug,
1788             (CE_NOTE, "delereturn_all_thread: complete\n"));
1789         zthread_exit();
1790 }
1791 
1792 void
1793 nfs4_delegreturn_all(nfs4_server_t *sp)
1794 {
1795         rpcprog_t pro, *pp;
1796 
1797         mutex_enter(&sp->s_lock);
1798 
1799         /* Check to see if the delegation list is empty */
1800 
1801         if (list_head(&sp->s_deleg_list) == NULL) {
1802                 mutex_exit(&sp->s_lock);
1803                 return;
1804         }
1805         /*
1806          * Grab the program number; the async thread will use this
1807          * to find the nfs4_server.
1808          */
1809         pro = sp->s_program;
1810         mutex_exit(&sp->s_lock);
1811         pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP);
1812         *pp = pro;
1813         (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0,
1814             minclsyspri);
1815 }
1816 
1817 
1818 /*
1819  * Discard any delegations
1820  *
1821  * Iterate over the servers s_deleg_list and
1822  * for matching mount-point rnodes discard
1823  * the delegation.
1824  */
1825 void
1826 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp)
1827 {
1828         rnode4_t *rp, *next;
1829         mntinfo4_t *r_mi;
1830         struct nfs4_callback_globals *ncg;
1831 
1832         ASSERT(mutex_owned(&sp->s_lock));
1833         ncg = sp->zone_globals;
1834 
1835         for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) {
1836                 r_mi = VTOMI4(RTOV4(rp));
1837                 next = list_next(&sp->s_deleg_list, rp);
1838 
1839                 if (r_mi != mi) {
1840                         /*
1841                          * Skip if this rnode is in not on the
1842                          * same mount-point
1843                          */
1844                         continue;
1845                 }
1846 
1847                 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ);
1848 
1849 #ifdef DEBUG
1850                 if (nfs4_client_recov_debug) {
1851                         zprintf(getzoneid(),
1852                             "nfs4_deleg_discard: matched rnode %p "
1853                         "-- discarding delegation\n", (void *)rp);
1854                 }
1855 #endif
1856                 mutex_enter(&rp->r_statev4_lock);
1857                 /*
1858                  * Free the cred originally held when the delegation
1859                  * was granted. Also need to decrement the refcnt
1860                  * on this server for each delegation we discard
1861                  */
1862                 if (rp->r_deleg_cred)
1863                         crfree(rp->r_deleg_cred);
1864                 rp->r_deleg_cred = NULL;
1865                 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1866                 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1867                 rp->r_deleg_needs_recall = FALSE;
1868                 ASSERT(sp->s_refcnt > 1);
1869                 sp->s_refcnt--;
1870                 list_remove(&sp->s_deleg_list, rp);
1871                 mutex_exit(&rp->r_statev4_lock);
1872                 nfs4_dec_state_ref_count_nolock(sp, mi);
1873                 ncg->nfs4_callback_stats.delegations.value.ui64--;
1874         }
1875 }
1876 
1877 /*
1878  * Reopen any open streams that were covered by the given file's
1879  * delegation.
1880  * Returns zero or an errno value.  If there was no error, *recovp
1881  * indicates whether recovery was initiated.
1882  */
1883 
1884 static int
1885 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg,
1886     int flags)
1887 {
1888         nfs4_open_stream_t *osp;
1889         nfs4_recov_state_t recov_state;
1890         bool_t needrecov = FALSE;
1891         mntinfo4_t *mi;
1892         rnode4_t *rp;
1893         nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1894         int claimnull;
1895 
1896         mi = VTOMI4(vp);
1897         rp = VTOR4(vp);
1898 
1899         recov_state.rs_flags = 0;
1900         recov_state.rs_num_retry_despite_err = 0;
1901 
1902 retry:
1903         if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) {
1904                 return (e.error);
1905         }
1906 
1907         /*
1908          * if we mean to discard the delegation, it must be BAD, so don't
1909          * use it when doing the reopen or it will fail too.
1910          */
1911         claimnull = (flags & NFS4_DR_DISCARD);
1912         /*
1913          * Loop through the open streams for this rnode to find
1914          * all of the ones created using the delegation state ID.
1915          * Each of these needs to be re-opened.
1916          */
1917 
1918         while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) {
1919 
1920                 if (claimnull) {
1921                         nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE);
1922                 } else {
1923                         ncg->nfs4_callback_stats.claim_cur.value.ui64++;
1924 
1925                         nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE,
1926                             FALSE);
1927                         if (e.error == 0 && e.stat == NFS4_OK)
1928                                 ncg->nfs4_callback_stats.
1929                                     claim_cur_ok.value.ui64++;
1930                 }
1931 
1932                 if (e.error == EAGAIN) {
1933                         open_stream_rele(osp, rp);
1934                         nfs4_end_op(mi, vp, NULL, &recov_state, TRUE);
1935                         goto retry;
1936                 }
1937 
1938                 /*
1939                  * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1940                  * recovery has already been started inside of nfs4_reopen.
1941                  */
1942                 if (e.error == EINTR || e.error == ETIMEDOUT ||
1943                     NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) {
1944                         open_stream_rele(osp, rp);
1945                         break;
1946                 }
1947 
1948                 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1949 
1950                 if (e.error != 0 && !needrecov) {
1951                         /*
1952                          * Recovery is not possible, but don't give up yet;
1953                          * we'd still like to do delegreturn after
1954                          * reopening as many streams as possible.
1955                          * Continue processing the open streams.
1956                          */
1957 
1958                         ncg->nfs4_callback_stats.recall_failed.value.ui64++;
1959 
1960                 } else if (needrecov) {
1961                         /*
1962                          * Start recovery and bail out.  The recovery
1963                          * thread will take it from here.
1964                          */
1965                         (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL,
1966                             NULL, OP_OPEN, NULL, NULL, NULL);
1967                         open_stream_rele(osp, rp);
1968                         *recovp = TRUE;
1969                         break;
1970                 }
1971 
1972                 open_stream_rele(osp, rp);
1973         }
1974 
1975         nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1976 
1977         return (e.error);
1978 }
1979 
1980 /*
1981  * get_next_deleg_stream - returns the next open stream which
1982  * represents a delegation for this rnode.  In order to assure
1983  * forward progress, the caller must guarantee that each open
1984  * stream returned is changed so that a future call won't return
1985  * it again.
1986  *
1987  * There are several ways for the open stream to change.  If the open
1988  * stream is !os_delegation, then we aren't interested in it.  Also, if
1989  * either os_failed_reopen or !os_valid, then don't return the osp.
1990  *
1991  * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
1992  * the osp if it is an os_delegation open stream.  Also, if the rnode still
1993  * has r_deleg_return_pending, then return the os_delegation osp.  Lastly,
1994  * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
1995  * then return the osp.
1996  *
1997  * We have already taken the 'r_deleg_recall_lock' as WRITER, which
1998  * prevents new OPENs from going OTW (as start_fop takes this
1999  * lock in READ mode); thus, no new open streams can be created
2000  * (which inherently means no new delegation open streams are
2001  * being created).
2002  */
2003 
2004 static nfs4_open_stream_t *
2005 get_next_deleg_stream(rnode4_t *rp, int claimnull)
2006 {
2007         nfs4_open_stream_t      *osp;
2008 
2009         ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER));
2010 
2011         /*
2012          * Search through the list of open streams looking for
2013          * one that was created while holding the delegation.
2014          */
2015         mutex_enter(&rp->r_os_lock);
2016         for (osp = list_head(&rp->r_open_streams); osp != NULL;
2017             osp = list_next(&rp->r_open_streams, osp)) {
2018                 mutex_enter(&osp->os_sync_lock);
2019                 if (!osp->os_delegation || osp->os_failed_reopen ||
2020                     !osp->os_valid) {
2021                         mutex_exit(&osp->os_sync_lock);
2022                         continue;
2023                 }
2024                 if (!claimnull || rp->r_deleg_return_pending ||
2025                     !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) {
2026                         osp->os_ref_count++;
2027                         mutex_exit(&osp->os_sync_lock);
2028                         mutex_exit(&rp->r_os_lock);
2029                         return (osp);
2030                 }
2031                 mutex_exit(&osp->os_sync_lock);
2032         }
2033         mutex_exit(&rp->r_os_lock);
2034 
2035         return (NULL);
2036 }
2037 
2038 static void
2039 nfs4delegreturn_thread(struct cb_recall_pass *args)
2040 {
2041         rnode4_t *rp;
2042         vnode_t *vp;
2043         cred_t *cr;
2044         int dtype, error, flags;
2045         bool_t rdirty, rip;
2046         kmutex_t cpr_lock;
2047         callb_cpr_t cpr_info;
2048         struct nfs4_callback_globals *ncg;
2049 
2050         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2051         ASSERT(ncg != NULL);
2052 
2053         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
2054 
2055         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
2056             "nfsv4delegRtn");
2057 
2058         rp = args->rp;
2059         vp = RTOV4(rp);
2060 
2061         mutex_enter(&rp->r_statev4_lock);
2062         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2063                 mutex_exit(&rp->r_statev4_lock);
2064                 goto out;
2065         }
2066         mutex_exit(&rp->r_statev4_lock);
2067 
2068         /*
2069          * Take the read-write lock in read mode to prevent other
2070          * threads from modifying the data during the recall.  This
2071          * doesn't affect mmappers.
2072          */
2073         (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
2074 
2075         /* Proceed with delegreturn */
2076 
2077         mutex_enter(&rp->r_statev4_lock);
2078         if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2079                 mutex_exit(&rp->r_statev4_lock);
2080                 nfs_rw_exit(&rp->r_rwlock);
2081                 goto out;
2082         }
2083         dtype = rp->r_deleg_type;
2084         cr = rp->r_deleg_cred;
2085         ASSERT(cr != NULL);
2086         crhold(cr);
2087         mutex_exit(&rp->r_statev4_lock);
2088 
2089         flags = args->flags;
2090 
2091         /*
2092          * If the file is being truncated at the server, then throw
2093          * away all of the pages, it doesn't matter what flavor of
2094          * delegation we have.
2095          */
2096 
2097         if (args->truncate) {
2098                 ncg->nfs4_callback_stats.recall_trunc.value.ui64++;
2099                 nfs4_invalidate_pages(vp, 0, cr);
2100         } else if (dtype == OPEN_DELEGATE_WRITE) {
2101 
2102                 mutex_enter(&rp->r_statelock);
2103                 rdirty = rp->r_flags & R4DIRTY;
2104                 mutex_exit(&rp->r_statelock);
2105 
2106                 if (rdirty) {
2107                         error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
2108 
2109                         if (error)
2110                                 CB_WARN1("nfs4delegreturn_thread:"
2111                                 " VOP_PUTPAGE: %d\n", error);
2112                 }
2113                 /* turn off NFS4_DR_PUSH because we just did that above. */
2114                 flags &= ~NFS4_DR_PUSH;
2115         }
2116 
2117         mutex_enter(&rp->r_statelock);
2118         rip =  rp->r_flags & R4RECOVERRP;
2119         mutex_exit(&rp->r_statelock);
2120 
2121         /* If a failed recovery is indicated, discard the pages */
2122 
2123         if (rip) {
2124 
2125                 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL);
2126 
2127                 if (error)
2128                         CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n",
2129                             error);
2130         }
2131 
2132         /*
2133          * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2134          * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2135          */
2136         flags &= ~NFS4_DR_DID_OP;
2137 
2138         (void) nfs4delegreturn_impl(rp, flags, ncg);
2139 
2140         nfs_rw_exit(&rp->r_rwlock);
2141         crfree(cr);
2142 out:
2143         kmem_free(args, sizeof (struct cb_recall_pass));
2144         VN_RELE(vp);
2145         mutex_enter(&cpr_lock);
2146         CALLB_CPR_EXIT(&cpr_info);
2147         mutex_destroy(&cpr_lock);
2148         zthread_exit();
2149 }
2150 
2151 /*
2152  * This function has one assumption that the caller of this function is
2153  * either doing recovery (therefore cannot call nfs4_start_op) or has
2154  * already called nfs4_start_op().
2155  */
2156 void
2157 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res,
2158     nfs4_ga_res_t *garp, cred_t *cr)
2159 {
2160         open_read_delegation4 *orp;
2161         open_write_delegation4 *owp;
2162         nfs4_server_t *np;
2163         bool_t already = FALSE;
2164         bool_t recall = FALSE;
2165         bool_t valid_garp = TRUE;
2166         bool_t delegation_granted = FALSE;
2167         bool_t dr_needed = FALSE;
2168         bool_t recov;
2169         int dr_flags = 0;
2170         long mapcnt;
2171         uint_t rflag;
2172         mntinfo4_t *mi;
2173         struct nfs4_callback_globals *ncg;
2174         open_delegation_type4 odt;
2175 
2176         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2177         ASSERT(ncg != NULL);
2178 
2179         mi = VTOMI4(RTOV4(rp));
2180 
2181         /*
2182          * Accept a delegation granted to the client via an OPEN.
2183          * Set the delegation fields in the rnode and insert the
2184          * rnode onto the list anchored in the nfs4_server_t.  The
2185          * proper locking order requires the nfs4_server_t first,
2186          * even though it may not be needed in all cases.
2187          *
2188          * NB: find_nfs4_server returns with s_lock held.
2189          */
2190 
2191         if ((np = find_nfs4_server(mi)) == NULL)
2192                 return;
2193 
2194         /* grab the statelock too, for examining r_mapcnt */
2195         mutex_enter(&rp->r_statelock);
2196         mutex_enter(&rp->r_statev4_lock);
2197 
2198         if (rp->r_deleg_type == OPEN_DELEGATE_READ ||
2199             rp->r_deleg_type == OPEN_DELEGATE_WRITE)
2200                 already = TRUE;
2201 
2202         odt = res->delegation.delegation_type;
2203 
2204         if (odt == OPEN_DELEGATE_READ) {
2205 
2206                 rp->r_deleg_type = res->delegation.delegation_type;
2207                 orp = &res->delegation.open_delegation4_u.read;
2208                 rp->r_deleg_stateid = orp->stateid;
2209                 rp->r_deleg_perms = orp->permissions;
2210                 if (claim == CLAIM_PREVIOUS)
2211                         if ((recall = orp->recall) != 0)
2212                                 dr_needed = TRUE;
2213 
2214                 delegation_granted = TRUE;
2215 
2216                 ncg->nfs4_callback_stats.delegations.value.ui64++;
2217                 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++;
2218 
2219         } else if (odt == OPEN_DELEGATE_WRITE) {
2220 
2221                 rp->r_deleg_type = res->delegation.delegation_type;
2222                 owp = &res->delegation.open_delegation4_u.write;
2223                 rp->r_deleg_stateid = owp->stateid;
2224                 rp->r_deleg_perms = owp->permissions;
2225                 rp->r_deleg_limit = owp->space_limit;
2226                 if (claim == CLAIM_PREVIOUS)
2227                         if ((recall = owp->recall) != 0)
2228                                 dr_needed = TRUE;
2229 
2230                 delegation_granted = TRUE;
2231 
2232                 if (garp == NULL || !garp->n4g_change_valid) {
2233                         valid_garp = FALSE;
2234                         rp->r_deleg_change = 0;
2235                         rp->r_deleg_change_grant = 0;
2236                 } else {
2237                         rp->r_deleg_change = garp->n4g_change;
2238                         rp->r_deleg_change_grant = garp->n4g_change;
2239                 }
2240                 mapcnt = rp->r_mapcnt;
2241                 rflag = rp->r_flags;
2242 
2243                 /*
2244                  * Update the delegation change attribute if
2245                  * there are mappers for the file is dirty.  This
2246                  * might be the case during recovery after server
2247                  * reboot.
2248                  */
2249                 if (mapcnt > 0 || rflag & R4DIRTY)
2250                         rp->r_deleg_change++;
2251 
2252                 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2253                     "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2254                     (int)(rp->r_deleg_change >> 32)));
2255                 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2256                     "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2257                     (int)(rp->r_deleg_change_grant >> 32)));
2258 
2259 
2260                 ncg->nfs4_callback_stats.delegations.value.ui64++;
2261                 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++;
2262         } else if (already) {
2263                 /*
2264                  * No delegation granted.  If the rnode currently has
2265                  * has one, then consider it tainted and return it.
2266                  */
2267                 dr_needed = TRUE;
2268         }
2269 
2270         if (delegation_granted) {
2271                 /* Add the rnode to the list. */
2272                 if (!already) {
2273                         crhold(cr);
2274                         rp->r_deleg_cred = cr;
2275 
2276                         ASSERT(mutex_owned(&np->s_lock));
2277                         list_insert_head(&np->s_deleg_list, rp);
2278                         /* added list node gets a reference */
2279                         np->s_refcnt++;
2280                         nfs4_inc_state_ref_count_nolock(np, mi);
2281                 }
2282                 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
2283         }
2284 
2285         /*
2286          * We've now safely accepted the delegation, if any.  Drop the
2287          * locks and figure out what post-processing is needed.  We'd
2288          * like to retain r_statev4_lock, but nfs4_server_rele takes
2289          * s_lock which would be a lock ordering violation.
2290          */
2291         mutex_exit(&rp->r_statev4_lock);
2292         mutex_exit(&rp->r_statelock);
2293         mutex_exit(&np->s_lock);
2294         nfs4_server_rele(np);
2295 
2296         /*
2297          * Check to see if we are in recovery.  Remember that
2298          * this function is protected by start_op, so a recovery
2299          * cannot begin until we are out of here.
2300          */
2301         mutex_enter(&mi->mi_lock);
2302         recov = mi->mi_recovflags & MI4_RECOV_ACTIV;
2303         mutex_exit(&mi->mi_lock);
2304 
2305         mutex_enter(&rp->r_statev4_lock);
2306 
2307         if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp)
2308                 dr_needed = TRUE;
2309 
2310         if (dr_needed && rp->r_deleg_return_pending == FALSE) {
2311                 if (recov) {
2312                         /*
2313                          * We cannot call delegreturn from inside
2314                          * of recovery or VOP_PUTPAGE will hang
2315                          * due to nfs4_start_fop call in
2316                          * nfs4write.  Use dlistadd to add the
2317                          * rnode to the list of rnodes needing
2318                          * cleaning.  We do not need to do reopen
2319                          * here because recov_openfiles will do it.
2320                          * In the non-recall case, just discard the
2321                          * delegation as it is no longer valid.
2322                          */
2323                         if (recall)
2324                                 dr_flags = NFS4_DR_PUSH;
2325                         else
2326                                 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD;
2327 
2328                         nfs4_dlistadd(rp, ncg, dr_flags);
2329                         dr_flags = 0;
2330                 } else {
2331                         /*
2332                          * Push the modified data back to the server,
2333                          * reopen any delegation open streams, and return
2334                          * the delegation.  Drop the statev4_lock first!
2335                          */
2336                         dr_flags =  NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN;
2337                 }
2338         }
2339         mutex_exit(&rp->r_statev4_lock);
2340         if (dr_flags)
2341                 (void) nfs4delegreturn_impl(rp, dr_flags, ncg);
2342 }
2343 
2344 /*
2345  * nfs4delegabandon - Abandon the delegation on an rnode4.  This code
2346  * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2347  * or BADSEQID and the recovery code is unable to recover.  Push any
2348  * dirty data back to the server and return the delegation (if any).
2349  */
2350 
2351 void
2352 nfs4delegabandon(rnode4_t *rp)
2353 {
2354         vnode_t *vp;
2355         struct cb_recall_pass *pp;
2356         open_delegation_type4 dt;
2357 
2358         mutex_enter(&rp->r_statev4_lock);
2359         dt = rp->r_deleg_type;
2360         mutex_exit(&rp->r_statev4_lock);
2361 
2362         if (dt == OPEN_DELEGATE_NONE)
2363                 return;
2364 
2365         vp = RTOV4(rp);
2366         VN_HOLD(vp);
2367 
2368         pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
2369         pp->rp = rp;
2370         /*
2371          * Recovery on the file has failed and we want to return
2372          * the delegation.  We don't want to reopen files and
2373          * nfs4delegreturn_thread() figures out what to do about
2374          * the data.  The only thing to do is attempt to return
2375          * the delegation.
2376          */
2377         pp->flags = 0;
2378         pp->truncate = FALSE;
2379 
2380         /*
2381          * Fire up a thread to do the delegreturn; this is
2382          * necessary because we could be inside a GETPAGE or
2383          * PUTPAGE and we cannot do another one.
2384          */
2385 
2386         (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
2387             minclsyspri);
2388 }
2389 
2390 static int
2391 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp,
2392     int flg)
2393 {
2394         rnode4_t *rp;
2395         int error = 0;
2396 
2397 #ifdef lint
2398         op = op;
2399 #endif
2400 
2401         if (vp && vp->v_type == VREG) {
2402                 rp = VTOR4(vp);
2403 
2404                 /*
2405                  * Take r_deleg_recall_lock in read mode to synchronize
2406                  * with delegreturn.
2407                  */
2408                 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock,
2409                     RW_READER, INTR4(vp));
2410 
2411                 if (error == 0)
2412                         rsp->rs_flags |= flg;
2413 
2414         }
2415         return (error);
2416 }
2417 
2418 void
2419 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp)
2420 {
2421         NFS4_DEBUG(nfs4_recall_debug,
2422             (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2423             (void *)vp1, (void *)vp2));
2424 
2425         if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2)
2426                 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock);
2427         if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1)
2428                 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2429 }
2430 
2431 int
2432 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op,
2433     nfs4_recov_state_t *rsp)
2434 {
2435         int error;
2436 
2437         NFS4_DEBUG(nfs4_recall_debug,
2438             (CE_NOTE, "wait_for_recall:    0x%p, 0x%p\n",
2439             (void *)vp1, (void *) vp2));
2440 
2441         rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2);
2442 
2443         if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0)
2444                 return (error);
2445 
2446         if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2))
2447             != 0) {
2448                 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) {
2449                         nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2450                         rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1;
2451                 }
2452 
2453                 return (error);
2454         }
2455 
2456         return (0);
2457 }
2458 
2459 /*
2460  * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2461  * DELEGRETURN'd at the end of recovery.
2462  */
2463 
2464 static void
2465 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags)
2466 {
2467         struct nfs4_dnode *dp;
2468 
2469         ASSERT(mutex_owned(&rp->r_statev4_lock));
2470         /*
2471          * Mark the delegation as having a return pending.
2472          * This will prevent the use of the delegation stateID
2473          * by read, write, setattr and open.
2474          */
2475         rp->r_deleg_return_pending = TRUE;
2476         dp = kmem_alloc(sizeof (*dp), KM_SLEEP);
2477         VN_HOLD(RTOV4(rp));
2478         dp->rnodep = rp;
2479         dp->flags = flags;
2480         mutex_enter(&ncg->nfs4_dlist_lock);
2481         list_insert_head(&ncg->nfs4_dlist, dp);
2482 #ifdef  DEBUG
2483         ncg->nfs4_dlistadd_c++;
2484 #endif
2485         mutex_exit(&ncg->nfs4_dlist_lock);
2486 }
2487 
2488 /*
2489  * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2490  * of files awaiting cleaning.  If the override_flags are non-zero
2491  * then use them rather than the flags that were set when the rnode
2492  * was added to the dlist.
2493  */
2494 static void
2495 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags)
2496 {
2497         rnode4_t *rp;
2498         struct nfs4_dnode *dp;
2499         int flags;
2500 
2501         ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD);
2502 
2503         mutex_enter(&ncg->nfs4_dlist_lock);
2504         while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) {
2505 #ifdef  DEBUG
2506                 ncg->nfs4_dlistclean_c++;
2507 #endif
2508                 list_remove(&ncg->nfs4_dlist, dp);
2509                 mutex_exit(&ncg->nfs4_dlist_lock);
2510                 rp = dp->rnodep;
2511                 flags = (override_flags != 0) ? override_flags : dp->flags;
2512                 kmem_free(dp, sizeof (*dp));
2513                 (void) nfs4delegreturn_impl(rp, flags, ncg);
2514                 VN_RELE(RTOV4(rp));
2515                 mutex_enter(&ncg->nfs4_dlist_lock);
2516         }
2517         mutex_exit(&ncg->nfs4_dlist_lock);
2518 }
2519 
2520 void
2521 nfs4_dlistclean(void)
2522 {
2523         struct nfs4_callback_globals *ncg;
2524 
2525         ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2526         ASSERT(ncg != NULL);
2527 
2528         nfs4_dlistclean_impl(ncg, 0);
2529 }