1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2018 Nexenta Systems, Inc.
  29  */
  30 
  31 #include <sys/systm.h>
  32 #include <rpc/auth.h>
  33 #include <rpc/clnt.h>
  34 #include <nfs/nfs4_kprot.h>
  35 #include <nfs/nfs4.h>
  36 #include <nfs/lm.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/disp.h>
  39 #include <sys/sdt.h>
  40 
  41 #include <sys/pathname.h>
  42 
  43 #include <sys/strsubr.h>
  44 #include <sys/ddi.h>
  45 
  46 #include <sys/vnode.h>
  47 #include <sys/sdt.h>
  48 #include <inet/common.h>
  49 #include <inet/ip.h>
  50 #include <inet/ip6.h>
  51 
  52 #define MAX_READ_DELEGATIONS 5
  53 
  54 static int rfs4_deleg_wlp = 5;
  55 static int rfs4_deleg_disabled;
  56 static int rfs4_max_setup_cb_tries = 5;
  57 
  58 #ifdef DEBUG
  59 
  60 static int rfs4_test_cbgetattr_fail = 0;
  61 int rfs4_cb_null;
  62 int rfs4_cb_debug;
  63 int rfs4_deleg_debug;
  64 
  65 #endif
  66 
  67 static void rfs4_recall_file(rfs4_file_t *,
  68     void (*recall)(rfs4_deleg_state_t *, bool_t),
  69     bool_t, rfs4_client_t *);
  70 static  void            rfs4_revoke_file(rfs4_file_t *);
  71 static  void            rfs4_cb_chflush(rfs4_cbinfo_t *);
  72 static  CLIENT          *rfs4_cb_getch(rfs4_cbinfo_t *);
  73 static  void            rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
  74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
  75     open_delegation_type4, int *);
  76 
  77 /*
  78  * Convert a universal address to an transport specific
  79  * address using inet_pton.
  80  */
  81 static int
  82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
  83 {
  84         int dots = 0, i, j, len, k;
  85         unsigned char c;
  86         in_port_t port = 0;
  87 
  88         len = strlen(ua);
  89 
  90         for (i = len-1; i >= 0; i--) {
  91 
  92                 if (ua[i] == '.')
  93                         dots++;
  94 
  95                 if (dots == 2) {
  96 
  97                         ua[i] = '\0';
  98                         /*
  99                          * We use k to remember were to stick '.' back, since
 100                          * ua was kmem_allocateded from the pool len+1.
 101                          */
 102                         k = i;
 103                         if (inet_pton(af, ua, ap) == 1) {
 104 
 105                                 c = 0;
 106 
 107                                 for (j = i+1; j < len; j++) {
 108                                         if (ua[j] == '.') {
 109                                                 port = c << 8;
 110                                                 c = 0;
 111                                         } else if (ua[j] >= '0' &&
 112                                             ua[j] <= '9') {
 113                                                 c *= 10;
 114                                                 c += ua[j] - '0';
 115                                         } else {
 116                                                 ua[k] = '.';
 117                                                 return (EINVAL);
 118                                         }
 119                                 }
 120                                 port += c;
 121 
 122                                 *pp = htons(port);
 123 
 124                                 ua[k] = '.';
 125                                 return (0);
 126                         } else {
 127                                 ua[k] = '.';
 128                                 return (EINVAL);
 129                         }
 130                 }
 131         }
 132 
 133         return (EINVAL);
 134 }
 135 
 136 /*
 137  * Update the delegation policy with the
 138  * value of "new_policy"
 139  */
 140 void
 141 rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy)
 142 {
 143         rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER);
 144         nsrv4->nfs4_deleg_policy = new_policy;
 145         rw_exit(&nsrv4->deleg_policy_lock);
 146 }
 147 
 148 void
 149 rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4)
 150 {
 151         rw_enter(&nsrv4->deleg_policy_lock, RW_READER);
 152 }
 153 
 154 void
 155 rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4)
 156 {
 157         rw_exit(&nsrv4->deleg_policy_lock);
 158 }
 159 
 160 srv_deleg_policy_t
 161 nfs4_get_deleg_policy()
 162 {
 163         nfs4_srv_t *nsrv4 = nfs4_get_srv();
 164         return (nsrv4->nfs4_deleg_policy);
 165 }
 166 
 167 
 168 /*
 169  * This free function is to be used when the client struct is being
 170  * released and nothing at all is needed of the callback info any
 171  * longer.
 172  */
 173 void
 174 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
 175 {
 176         char *addr = cbp->cb_callback.cb_location.r_addr;
 177         char *netid = cbp->cb_callback.cb_location.r_netid;
 178 
 179         /* Free old address if any */
 180 
 181         if (addr)
 182                 kmem_free(addr, strlen(addr) + 1);
 183         if (netid)
 184                 kmem_free(netid, strlen(netid) + 1);
 185 
 186         addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
 187         netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
 188 
 189         if (addr)
 190                 kmem_free(addr, strlen(addr) + 1);
 191         if (netid)
 192                 kmem_free(netid, strlen(netid) + 1);
 193 
 194         if (cbp->cb_chc_free) {
 195                 rfs4_cb_chflush(cbp);
 196         }
 197 }
 198 
 199 /*
 200  * The server uses this to check the callback path supplied by the
 201  * client.  The callback connection is marked "in progress" while this
 202  * work is going on and then eventually marked either OK or FAILED.
 203  * This work can be done as part of a separate thread and at the end
 204  * of this the thread will exit or it may be done such that the caller
 205  * will continue with other work.
 206  */
 207 static void
 208 rfs4_do_cb_null(rfs4_client_t *cp)
 209 {
 210         struct timeval tv;
 211         CLIENT *ch;
 212         rfs4_cbstate_t newstate;
 213         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 214 
 215         mutex_enter(cbp->cb_lock);
 216         /* If another thread is doing CB_NULL RPC then return */
 217         if (cbp->cb_nullcaller == TRUE) {
 218                 mutex_exit(cbp->cb_lock);
 219                 rfs4_client_rele(cp);
 220                 zthread_exit();
 221         }
 222 
 223         /* Mark the cbinfo as having a thread in the NULL callback */
 224         cbp->cb_nullcaller = TRUE;
 225 
 226         /*
 227          * Are there other threads still using the cbinfo client
 228          * handles?  If so, this thread must wait before going and
 229          * mucking aroiund with the callback information
 230          */
 231         while (cbp->cb_refcnt != 0)
 232                 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
 233 
 234         /*
 235          * This thread itself may find that new callback info has
 236          * arrived and is set up to handle this case and redrive the
 237          * call to the client's callback server.
 238          */
 239 retry:
 240         if (cbp->cb_newer.cb_new == TRUE &&
 241             cbp->cb_newer.cb_confirmed == TRUE) {
 242                 char *addr = cbp->cb_callback.cb_location.r_addr;
 243                 char *netid = cbp->cb_callback.cb_location.r_netid;
 244 
 245                 /*
 246                  * Free the old stuff if it exists; may be the first
 247                  * time through this path
 248                  */
 249                 if (addr)
 250                         kmem_free(addr, strlen(addr) + 1);
 251                 if (netid)
 252                         kmem_free(netid, strlen(netid) + 1);
 253 
 254                 /* Move over the addr/netid */
 255                 cbp->cb_callback.cb_location.r_addr =
 256                     cbp->cb_newer.cb_callback.cb_location.r_addr;
 257                 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
 258                 cbp->cb_callback.cb_location.r_netid =
 259                     cbp->cb_newer.cb_callback.cb_location.r_netid;
 260                 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
 261 
 262                 /* Get the program number */
 263                 cbp->cb_callback.cb_program =
 264                     cbp->cb_newer.cb_callback.cb_program;
 265                 cbp->cb_newer.cb_callback.cb_program = 0;
 266 
 267                 /* Don't forget the protocol's "cb_ident" field */
 268                 cbp->cb_ident = cbp->cb_newer.cb_ident;
 269                 cbp->cb_newer.cb_ident = 0;
 270 
 271                 /* no longer new */
 272                 cbp->cb_newer.cb_new = FALSE;
 273                 cbp->cb_newer.cb_confirmed = FALSE;
 274 
 275                 /* get rid of the old client handles that may exist */
 276                 rfs4_cb_chflush(cbp);
 277 
 278                 cbp->cb_state = CB_NONE;
 279                 cbp->cb_timefailed = 0; /* reset the clock */
 280                 cbp->cb_notified_of_cb_path_down = TRUE;
 281         }
 282 
 283         if (cbp->cb_state != CB_NONE) {
 284                 cv_broadcast(cbp->cb_cv);    /* let the others know */
 285                 cbp->cb_nullcaller = FALSE;
 286                 mutex_exit(cbp->cb_lock);
 287                 rfs4_client_rele(cp);
 288                 zthread_exit();
 289         }
 290 
 291         /* mark rfs4_client_t as CALLBACK NULL in progress */
 292         cbp->cb_state = CB_INPROG;
 293         mutex_exit(cbp->cb_lock);
 294 
 295         /* get/generate a client handle */
 296         if ((ch = rfs4_cb_getch(cbp)) == NULL) {
 297                 mutex_enter(cbp->cb_lock);
 298                 cbp->cb_state = CB_BAD;
 299                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 300                 goto retry;
 301         }
 302 
 303 
 304         tv.tv_sec = 30;
 305         tv.tv_usec = 0;
 306         if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
 307                 newstate = CB_BAD;
 308         } else {
 309                 newstate = CB_OK;
 310 #ifdef  DEBUG
 311                 rfs4_cb_null++;
 312 #endif
 313         }
 314 
 315         /* Check to see if the client has specified new callback info */
 316         mutex_enter(cbp->cb_lock);
 317         rfs4_cb_freech(cbp, ch, TRUE);
 318         if (cbp->cb_newer.cb_new == TRUE &&
 319             cbp->cb_newer.cb_confirmed == TRUE) {
 320                 goto retry;     /* give the CB_NULL another chance */
 321         }
 322 
 323         cbp->cb_state = newstate;
 324         if (cbp->cb_state == CB_BAD)
 325                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 326 
 327         cv_broadcast(cbp->cb_cv);    /* start up the other threads */
 328         cbp->cb_nullcaller = FALSE;
 329         mutex_exit(cbp->cb_lock);
 330         rfs4_client_rele(cp);
 331         zthread_exit();
 332 }
 333 
 334 /*
 335  * Given a client struct, inspect the callback info to see if the
 336  * callback path is up and available.
 337  *
 338  * If new callback path is available and no one has set it up then
 339  * try to set it up. If setup is not successful after 5 tries (5 secs)
 340  * then gives up and returns NULL.
 341  *
 342  * If callback path is being initialized, then wait for the CB_NULL RPC
 343  * call to occur.
 344  */
 345 static rfs4_cbinfo_t *
 346 rfs4_cbinfo_hold(rfs4_client_t *cp)
 347 {
 348         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 349         int retries = 0;
 350 
 351         mutex_enter(cbp->cb_lock);
 352 
 353         while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
 354                 /*
 355                  * Looks like a new callback path may be available and
 356                  * noone has set it up.
 357                  */
 358                 mutex_exit(cbp->cb_lock);
 359                 rfs4_dbe_hold(cp->rc_dbe);
 360                 rfs4_do_cb_null(cp); /* caller will release client hold */
 361 
 362                 mutex_enter(cbp->cb_lock);
 363                 /*
 364                  * If callback path is no longer new, or it's being setup
 365                  * then stop and wait for it to be done.
 366                  */
 367                 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
 368                         break;
 369                 mutex_exit(cbp->cb_lock);
 370 
 371                 if (++retries >= rfs4_max_setup_cb_tries)
 372                         return (NULL);
 373                 delay(hz);
 374                 mutex_enter(cbp->cb_lock);
 375         }
 376 
 377         /* Is there a thread working on doing the CB_NULL RPC? */
 378         if (cbp->cb_nullcaller == TRUE)
 379                 cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
 380 
 381         /* If the callback path is not okay (up and running), just quit */
 382         if (cbp->cb_state != CB_OK) {
 383                 mutex_exit(cbp->cb_lock);
 384                 return (NULL);
 385         }
 386 
 387         /* Let someone know we are using the current callback info */
 388         cbp->cb_refcnt++;
 389         mutex_exit(cbp->cb_lock);
 390         return (cbp);
 391 }
 392 
 393 /*
 394  * The caller is done with the callback info.  It may be that the
 395  * caller's RPC failed and the NFSv4 client has actually provided new
 396  * callback information.  If so, let the caller know so they can
 397  * advantage of this and maybe retry the RPC that originally failed.
 398  */
 399 static int
 400 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
 401 {
 402         int cb_new = FALSE;
 403 
 404         mutex_enter(cbp->cb_lock);
 405 
 406         /* The caller gets a chance to mark the callback info as bad */
 407         if (newstate != CB_NOCHANGE)
 408                 cbp->cb_state = newstate;
 409         if (newstate == CB_FAILED) {
 410                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 411                 cbp->cb_notified_of_cb_path_down = FALSE;
 412         }
 413 
 414         cbp->cb_refcnt--;    /* no longer using the information */
 415 
 416         /*
 417          * A thread may be waiting on this one to finish and if so,
 418          * let it know that it is okay to do the CB_NULL to the
 419          * client's callback server.
 420          */
 421         if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
 422                 cv_broadcast(cbp->cb_cv_nullcaller);
 423 
 424         /*
 425          * If this is the last thread to use the callback info and
 426          * there is new callback information to try and no thread is
 427          * there ready to do the CB_NULL, then return true to teh
 428          * caller so they can do the CB_NULL
 429          */
 430         if (cbp->cb_refcnt == 0 &&
 431             cbp->cb_nullcaller == FALSE &&
 432             cbp->cb_newer.cb_new == TRUE &&
 433             cbp->cb_newer.cb_confirmed == TRUE)
 434                 cb_new = TRUE;
 435 
 436         mutex_exit(cbp->cb_lock);
 437 
 438         return (cb_new);
 439 }
 440 
 441 /*
 442  * Given the information in the callback info struct, create a client
 443  * handle that can be used by the server for its callback path.
 444  */
 445 static CLIENT *
 446 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
 447 {
 448         struct knetconfig knc;
 449         vnode_t *vp;
 450         struct sockaddr_in addr4;
 451         struct sockaddr_in6 addr6;
 452         void *addr, *taddr;
 453         in_port_t *pp;
 454         int af;
 455         char *devnam;
 456         struct netbuf nb;
 457         int size;
 458         CLIENT *ch = NULL;
 459         int useresvport = 0;
 460 
 461         mutex_enter(cbp->cb_lock);
 462 
 463         if (cbp->cb_callback.cb_location.r_netid == NULL ||
 464             cbp->cb_callback.cb_location.r_addr == NULL) {
 465                 goto cb_init_out;
 466         }
 467 
 468         if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
 469                 knc.knc_semantics = NC_TPI_COTS;
 470                 knc.knc_protofmly = "inet";
 471                 knc.knc_proto = "tcp";
 472                 devnam = "/dev/tcp";
 473                 af = AF_INET;
 474         } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
 475             == 0) {
 476                 knc.knc_semantics = NC_TPI_CLTS;
 477                 knc.knc_protofmly = "inet";
 478                 knc.knc_proto = "udp";
 479                 devnam = "/dev/udp";
 480                 af = AF_INET;
 481         } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
 482             == 0) {
 483                 knc.knc_semantics = NC_TPI_COTS;
 484                 knc.knc_protofmly = "inet6";
 485                 knc.knc_proto = "tcp";
 486                 devnam = "/dev/tcp6";
 487                 af = AF_INET6;
 488         } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
 489             == 0) {
 490                 knc.knc_semantics = NC_TPI_CLTS;
 491                 knc.knc_protofmly = "inet6";
 492                 knc.knc_proto = "udp";
 493                 devnam = "/dev/udp6";
 494                 af = AF_INET6;
 495         } else {
 496                 goto cb_init_out;
 497         }
 498 
 499         if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
 500 
 501                 goto cb_init_out;
 502         }
 503 
 504         if (vp->v_type != VCHR) {
 505                 VN_RELE(vp);
 506                 goto cb_init_out;
 507         }
 508 
 509         knc.knc_rdev = vp->v_rdev;
 510 
 511         VN_RELE(vp);
 512 
 513         if (af == AF_INET) {
 514                 size = sizeof (addr4);
 515                 bzero(&addr4, size);
 516                 addr4.sin_family = (sa_family_t)af;
 517                 addr = &addr4.sin_addr;
 518                 pp = &addr4.sin_port;
 519                 taddr = &addr4;
 520         } else /* AF_INET6 */ {
 521                 size = sizeof (addr6);
 522                 bzero(&addr6, size);
 523                 addr6.sin6_family = (sa_family_t)af;
 524                 addr = &addr6.sin6_addr;
 525                 pp = &addr6.sin6_port;
 526                 taddr = &addr6;
 527         }
 528 
 529         if (uaddr2sockaddr(af,
 530             cbp->cb_callback.cb_location.r_addr, addr, pp)) {
 531 
 532                 goto cb_init_out;
 533         }
 534 
 535 
 536         nb.maxlen = nb.len = size;
 537         nb.buf = (char *)taddr;
 538 
 539         if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
 540             NFS_CB, 0, 0, curthread->t_cred, &ch)) {
 541 
 542                 ch = NULL;
 543         }
 544 
 545         /* turn off reserved port usage */
 546         (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
 547 
 548 cb_init_out:
 549         mutex_exit(cbp->cb_lock);
 550         return (ch);
 551 }
 552 
 553 /*
 554  * Iterate over the client handle cache and
 555  * destroy it.
 556  */
 557 static void
 558 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
 559 {
 560         CLIENT *ch;
 561 
 562         while (cbp->cb_chc_free) {
 563                 cbp->cb_chc_free--;
 564                 ch = cbp->cb_chc[cbp->cb_chc_free];
 565                 cbp->cb_chc[cbp->cb_chc_free] = NULL;
 566                 if (ch) {
 567                         if (ch->cl_auth)
 568                                 auth_destroy(ch->cl_auth);
 569                         clnt_destroy(ch);
 570                 }
 571         }
 572 }
 573 
 574 /*
 575  * Return a client handle, either from a the small
 576  * rfs4_client_t cache or one that we just created.
 577  */
 578 static CLIENT *
 579 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
 580 {
 581         CLIENT *cbch = NULL;
 582         uint32_t zilch = 0;
 583 
 584         mutex_enter(cbp->cb_lock);
 585 
 586         if (cbp->cb_chc_free) {
 587                 cbp->cb_chc_free--;
 588                 cbch = cbp->cb_chc[ cbp->cb_chc_free ];
 589                 mutex_exit(cbp->cb_lock);
 590                 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
 591                 return (cbch);
 592         }
 593 
 594         mutex_exit(cbp->cb_lock);
 595 
 596         /* none free so make it now */
 597         cbch = rfs4_cbch_init(cbp);
 598 
 599         return (cbch);
 600 }
 601 
 602 /*
 603  * Return the client handle to the small cache or
 604  * destroy it.
 605  */
 606 static void
 607 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
 608 {
 609         if (lockheld == FALSE)
 610                 mutex_enter(cbp->cb_lock);
 611 
 612         if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
 613                 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
 614                 if (lockheld == FALSE)
 615                         mutex_exit(cbp->cb_lock);
 616                 return;
 617         }
 618         if (lockheld == FALSE)
 619                 mutex_exit(cbp->cb_lock);
 620 
 621         /*
 622          * cache maxed out of free entries, obliterate
 623          * this client handle, destroy it, throw it away.
 624          */
 625         if (ch->cl_auth)
 626                 auth_destroy(ch->cl_auth);
 627         clnt_destroy(ch);
 628 }
 629 
 630 /*
 631  * With the supplied callback information - initialize the client
 632  * callback data.  If there is a callback in progress, save the
 633  * callback info so that a thread can pick it up in the future.
 634  */
 635 void
 636 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
 637 {
 638         char *addr = NULL;
 639         char *netid = NULL;
 640         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 641         size_t len;
 642 
 643         /* Set the call back for the client */
 644         if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
 645             cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
 646                 len = strlen(cb->cb_location.r_addr) + 1;
 647                 addr = kmem_alloc(len, KM_SLEEP);
 648                 bcopy(cb->cb_location.r_addr, addr, len);
 649                 len = strlen(cb->cb_location.r_netid) + 1;
 650                 netid = kmem_alloc(len, KM_SLEEP);
 651                 bcopy(cb->cb_location.r_netid, netid, len);
 652         }
 653         /* ready to save the new information but first free old, if exists */
 654         mutex_enter(cbp->cb_lock);
 655 
 656         cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
 657 
 658         if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
 659                 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
 660                     strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
 661         cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
 662 
 663         if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
 664                 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
 665                     strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
 666         cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
 667 
 668         cbp->cb_newer.cb_ident = cb_ident;
 669 
 670         if (addr && *addr && netid && *netid) {
 671                 cbp->cb_newer.cb_new = TRUE;
 672                 cbp->cb_newer.cb_confirmed = FALSE;
 673         } else {
 674                 cbp->cb_newer.cb_new = FALSE;
 675                 cbp->cb_newer.cb_confirmed = FALSE;
 676         }
 677 
 678         mutex_exit(cbp->cb_lock);
 679 }
 680 
 681 /*
 682  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
 683  * information may have been provided on SETCLIENTID and this call
 684  * marks that information as confirmed and then starts a thread to
 685  * test the callback path.
 686  */
 687 void
 688 rfs4_deleg_cb_check(rfs4_client_t *cp)
 689 {
 690         if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
 691                 return;
 692 
 693         cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
 694 
 695         rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
 696 
 697         (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0,
 698             minclsyspri);
 699 }
 700 
 701 static void
 702 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
 703 {
 704         CB_RECALL4args  *rec_argp;
 705 
 706         rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 707         if (rec_argp->fh.nfs_fh4_val)
 708                 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
 709 }
 710 
 711 /* ARGSUSED */
 712 static void
 713 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
 714 {
 715         CB_GETATTR4args *argp;
 716 
 717         argp = &argop->nfs_cb_argop4_u.opcbgetattr;
 718         if (argp->fh.nfs_fh4_val)
 719                 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
 720 }
 721 
 722 static void
 723 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
 724 {
 725         int i, arglen;
 726         nfs_cb_argop4 *argop;
 727 
 728         /*
 729          * First free any special args alloc'd for specific ops.
 730          */
 731         arglen = args->array_len;
 732         argop = args->array;
 733         for (i = 0; i < arglen; i++, argop++) {
 734 
 735                 switch (argop->argop) {
 736                 case OP_CB_RECALL:
 737                         rfs4args_cb_recall_free(argop);
 738                         break;
 739 
 740                 case OP_CB_GETATTR:
 741                         rfs4args_cb_getattr_free(argop);
 742                         break;
 743 
 744                 default:
 745                         return;
 746                 }
 747         }
 748 
 749         if (args->tag.utf8string_len > 0)
 750                 UTF8STRING_FREE(args->tag)
 751 
 752         kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
 753         if (resp)
 754                 xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
 755 }
 756 
 757 /*
 758  * General callback routine for the server to the client.
 759  */
 760 static enum clnt_stat
 761 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
 762     CB_COMPOUND4res *res, struct timeval timeout)
 763 {
 764         rfs4_cbinfo_t *cbp;
 765         CLIENT *ch;
 766         /* start with this in case cb_getch() fails */
 767         enum clnt_stat  stat = RPC_FAILED;
 768 
 769         res->tag.utf8string_val = NULL;
 770         res->array = NULL;
 771 
 772 retry:
 773         cbp = rfs4_cbinfo_hold(cp);
 774         if (cbp == NULL)
 775                 return (stat);
 776 
 777         /* get a client handle */
 778         if ((ch = rfs4_cb_getch(cbp)) != NULL) {
 779                 /*
 780                  * reset the cb_ident since it may have changed in
 781                  * rfs4_cbinfo_hold()
 782                  */
 783                 args->callback_ident = cbp->cb_ident;
 784 
 785                 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
 786                     (caddr_t)args, xdr_CB_COMPOUND4res,
 787                     (caddr_t)res, timeout);
 788 
 789                 /* free client handle */
 790                 rfs4_cb_freech(cbp, ch, FALSE);
 791         }
 792 
 793         /*
 794          * If the rele says that there may be new callback info then
 795          * retry this sequence and it may succeed as a result of the
 796          * new callback path
 797          */
 798         if (rfs4_cbinfo_rele(cbp,
 799             (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
 800                 goto retry;
 801 
 802         return (stat);
 803 }
 804 
 805 /*
 806  * Used by the NFSv4 server to get attributes for a file while
 807  * handling the case where a file has been write delegated.  For the
 808  * time being, VOP_GETATTR() is called and CB_GETATTR processing is
 809  * not undertaken.  This call site is maintained in case the server is
 810  * updated in the future to handle write delegation space guarantees.
 811  */
 812 nfsstat4
 813 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 814 {
 815 
 816         int error;
 817 
 818         error = VOP_GETATTR(vp, vap, flag, cr, NULL);
 819         return (puterrno4(error));
 820 }
 821 
 822 /*
 823  * This is used everywhere in the v2/v3 server to allow the
 824  * integration of all NFS versions and the support of delegation.  For
 825  * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
 826  * in the future to provide space guarantees for write delegations
 827  * then this call site should be expanded to interact with the client.
 828  */
 829 int
 830 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 831 {
 832         return (VOP_GETATTR(vp, vap, flag, cr, NULL));
 833 }
 834 
 835 /*
 836  * Place the actual cb_recall otw call to client.
 837  */
 838 static void
 839 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
 840 {
 841         CB_COMPOUND4args        cb4_args;
 842         CB_COMPOUND4res         cb4_res;
 843         CB_RECALL4args          *rec_argp;
 844         CB_RECALL4res           *rec_resp;
 845         nfs_cb_argop4           *argop;
 846         int                     numops;
 847         int                     argoplist_size;
 848         struct timeval          timeout;
 849         nfs_fh4                 *fhp;
 850         enum clnt_stat          call_stat;
 851 
 852         /*
 853          * set up the compound args
 854          */
 855         numops = 1;     /* CB_RECALL only */
 856 
 857         argoplist_size = numops * sizeof (nfs_cb_argop4);
 858         argop = kmem_zalloc(argoplist_size, KM_SLEEP);
 859         argop->argop = OP_CB_RECALL;
 860         rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 861 
 862         (void) str_to_utf8("cb_recall", &cb4_args.tag);
 863         cb4_args.minorversion = CB4_MINORVERSION;
 864         /* cb4_args.callback_ident is set in rfs4_do_callback() */
 865         cb4_args.array_len = numops;
 866         cb4_args.array = argop;
 867 
 868         /*
 869          * fill in the args struct
 870          */
 871         bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
 872         rec_argp->truncate = trunc;
 873 
 874         fhp = &dsp->rds_finfo->rf_filehandle;
 875         rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
 876             fhp->nfs_fh4_len, KM_SLEEP);
 877         nfs_fh4_copy(fhp, &rec_argp->fh);
 878 
 879         /* Keep track of when we did this for observability */
 880         dsp->rds_time_recalled = gethrestime_sec();
 881 
 882         /*
 883          * Set up the timeout for the callback and make the actual call.
 884          * Timeout will be 80% of the lease period for this server.
 885          */
 886         timeout.tv_sec = (rfs4_lease_time * 80) / 100;
 887         timeout.tv_usec = 0;
 888 
 889         DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
 890             rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
 891 
 892         call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
 893             timeout);
 894 
 895         rec_resp = (cb4_res.array_len == 0) ? NULL :
 896             &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
 897 
 898         DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
 899             rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
 900 
 901         if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
 902                 rfs4_return_deleg(dsp, TRUE);
 903         }
 904 
 905         rfs4freeargres(&cb4_args, &cb4_res);
 906 }
 907 
 908 struct recall_arg {
 909         rfs4_deleg_state_t *dsp;
 910         void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
 911         bool_t trunc;
 912 };
 913 
 914 static void
 915 do_recall(struct recall_arg *arg)
 916 {
 917         rfs4_deleg_state_t *dsp = arg->dsp;
 918         rfs4_file_t *fp = dsp->rds_finfo;
 919         callb_cpr_t cpr_info;
 920         kmutex_t cpr_lock;
 921 
 922         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 923         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
 924 
 925         /*
 926          * It is possible that before this thread starts
 927          * the client has send us a return_delegation, and
 928          * if that is the case we do not need to send the
 929          * recall callback.
 930          */
 931         if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
 932                 DTRACE_PROBE3(nfss__i__recall,
 933                     struct recall_arg *, arg,
 934                     struct rfs4_deleg_state_t *, dsp,
 935                     struct rfs4_file_t *, fp);
 936 
 937                 if (arg->recall)
 938                         (void) (*arg->recall)(dsp, arg->trunc);
 939         }
 940 
 941         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 942         /*
 943          * Recall count may go negative if the parent thread that is
 944          * creating the individual callback threads does not modify
 945          * the recall_count field before the callback thread actually
 946          * gets a response from the CB_RECALL
 947          */
 948         fp->rf_dinfo.rd_recall_count--;
 949         if (fp->rf_dinfo.rd_recall_count == 0)
 950                 cv_signal(fp->rf_dinfo.rd_recall_cv);
 951         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 952 
 953         mutex_enter(&cpr_lock);
 954         CALLB_CPR_EXIT(&cpr_info);
 955         mutex_destroy(&cpr_lock);
 956 
 957         rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
 958         kmem_free(arg, sizeof (struct recall_arg));
 959         zthread_exit();
 960 }
 961 
 962 struct master_recall_args {
 963     rfs4_file_t *fp;
 964     void (*recall)(rfs4_deleg_state_t *, bool_t);
 965     bool_t trunc;
 966 };
 967 
 968 static void
 969 do_recall_file(struct master_recall_args *map)
 970 {
 971         rfs4_file_t *fp = map->fp;
 972         rfs4_deleg_state_t *dsp;
 973         struct recall_arg *arg;
 974         callb_cpr_t cpr_info;
 975         kmutex_t cpr_lock;
 976         int32_t recall_count;
 977 
 978         rfs4_dbe_lock(fp->rf_dbe);
 979 
 980         /* Recall already in progress ? */
 981         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 982         if (fp->rf_dinfo.rd_recall_count != 0) {
 983                 mutex_exit(fp->rf_dinfo.rd_recall_lock);
 984                 rfs4_dbe_rele_nolock(fp->rf_dbe);
 985                 rfs4_dbe_unlock(fp->rf_dbe);
 986                 kmem_free(map, sizeof (struct master_recall_args));
 987                 zthread_exit();
 988         }
 989 
 990         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 991 
 992         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 993         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
 994 
 995         recall_count = 0;
 996         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
 997             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
 998 
 999                 rfs4_dbe_lock(dsp->rds_dbe);
1000                 /*
1001                  * if this delegation state
1002                  * is being reaped skip it
1003                  */
1004                 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1005                         rfs4_dbe_unlock(dsp->rds_dbe);
1006                         continue;
1007                 }
1008 
1009                 /* hold for receiving thread */
1010                 rfs4_dbe_hold(dsp->rds_dbe);
1011                 rfs4_dbe_unlock(dsp->rds_dbe);
1012 
1013                 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1014                 arg->recall = map->recall;
1015                 arg->trunc = map->trunc;
1016                 arg->dsp = dsp;
1017 
1018                 recall_count++;
1019 
1020                 (void) zthread_create(NULL, 0, do_recall, arg, 0,
1021                     minclsyspri);
1022         }
1023 
1024         rfs4_dbe_unlock(fp->rf_dbe);
1025 
1026         mutex_enter(fp->rf_dinfo.rd_recall_lock);
1027         /*
1028          * Recall count may go negative if the parent thread that is
1029          * creating the individual callback threads does not modify
1030          * the recall_count field before the callback thread actually
1031          * gets a response from the CB_RECALL
1032          */
1033         fp->rf_dinfo.rd_recall_count += recall_count;
1034         while (fp->rf_dinfo.rd_recall_count)
1035                 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1036 
1037         mutex_exit(fp->rf_dinfo.rd_recall_lock);
1038 
1039         DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1040         rfs4_file_rele(fp);
1041         kmem_free(map, sizeof (struct master_recall_args));
1042         mutex_enter(&cpr_lock);
1043         CALLB_CPR_EXIT(&cpr_info);
1044         mutex_destroy(&cpr_lock);
1045         zthread_exit();
1046 }
1047 
1048 static void
1049 rfs4_recall_file(rfs4_file_t *fp,
1050     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1051     bool_t trunc, rfs4_client_t *cp)
1052 {
1053         struct master_recall_args *args;
1054 
1055         rfs4_dbe_lock(fp->rf_dbe);
1056         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1057                 rfs4_dbe_unlock(fp->rf_dbe);
1058                 return;
1059         }
1060         rfs4_dbe_hold(fp->rf_dbe);   /* hold for new thread */
1061 
1062         /*
1063          * Mark the time we started the recall processing.
1064          * If it has been previously recalled, do not reset the
1065          * timer since this is used for the revocation decision.
1066          */
1067         if (fp->rf_dinfo.rd_time_recalled == 0)
1068                 fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1069         fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1070         /* Client causing recall not always available */
1071         if (cp)
1072                 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1073 
1074         rfs4_dbe_unlock(fp->rf_dbe);
1075 
1076         args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1077         args->fp = fp;
1078         args->recall = recall;
1079         args->trunc = trunc;
1080 
1081         (void) zthread_create(NULL, 0, do_recall_file, args, 0,
1082             minclsyspri);
1083 }
1084 
1085 void
1086 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1087 {
1088         time_t elapsed1, elapsed2;
1089 
1090         if (fp->rf_dinfo.rd_time_recalled != 0) {
1091                 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1092                 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1093                 /* First check to see if a revocation should occur */
1094                 if (elapsed1 > rfs4_lease_time &&
1095                     elapsed2 > rfs4_lease_time) {
1096                         rfs4_revoke_file(fp);
1097                         return;
1098                 }
1099                 /*
1100                  * Next check to see if a recall should be done again
1101                  * so quickly.
1102                  */
1103                 if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1104                         return;
1105         }
1106         rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1107 }
1108 
1109 /*
1110  * rfs4_check_recall is called from rfs4_do_open to determine if the current
1111  * open conflicts with the delegation.
1112  * Return true if we need recall otherwise false.
1113  * Assumes entry locks for sp and sp->rs_finfo are held.
1114  */
1115 bool_t
1116 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1117 {
1118         open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1119 
1120         switch (dtype) {
1121         case OPEN_DELEGATE_NONE:
1122                 /* Not currently delegated so there is nothing to do */
1123                 return (FALSE);
1124         case OPEN_DELEGATE_READ:
1125                 /*
1126                  * If the access is only asking for READ then there is
1127                  * no conflict and nothing to do.  If it is asking
1128                  * for write, then there will be conflict and the read
1129                  * delegation should be recalled.
1130                  */
1131                 if (access == OPEN4_SHARE_ACCESS_READ)
1132                         return (FALSE);
1133                 else
1134                         return (TRUE);
1135         case OPEN_DELEGATE_WRITE:
1136                 /* Check to see if this client has the delegation */
1137                 return (rfs4_is_deleg(sp));
1138         }
1139 
1140         return (FALSE);
1141 }
1142 
1143 /*
1144  * Return the "best" allowable delegation available given the current
1145  * delegation type and the desired access and deny modes on the file.
1146  * At the point that this routine is called we know that the access and
1147  * deny modes are consistent with the file modes.
1148  */
1149 static open_delegation_type4
1150 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1151 {
1152         open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1153         uint32_t access = sp->rs_share_access;
1154         uint32_t deny = sp->rs_share_deny;
1155         int readcnt = 0;
1156         int writecnt = 0;
1157 
1158         switch (dtype) {
1159         case OPEN_DELEGATE_NONE:
1160                 /*
1161                  * Determine if more than just this OPEN have the file
1162                  * open and if so, no delegation may be provided to
1163                  * the client.
1164                  */
1165                 if (access & OPEN4_SHARE_ACCESS_WRITE)
1166                         writecnt++;
1167                 if (access & OPEN4_SHARE_ACCESS_READ)
1168                         readcnt++;
1169 
1170                 if (fp->rf_access_read > readcnt ||
1171                     fp->rf_access_write > writecnt)
1172                         return (OPEN_DELEGATE_NONE);
1173 
1174                 /*
1175                  * If the client is going to write, or if the client
1176                  * has exclusive access, return a write delegation.
1177                  */
1178                 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1179                     (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1180                         return (OPEN_DELEGATE_WRITE);
1181                 /*
1182                  * If we don't want to write or we've haven't denied read
1183                  * access to others, return a read delegation.
1184                  */
1185                 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1186                     (deny & ~OPEN4_SHARE_DENY_READ))
1187                         return (OPEN_DELEGATE_READ);
1188 
1189                 /* Shouldn't get here */
1190                 return (OPEN_DELEGATE_NONE);
1191 
1192         case OPEN_DELEGATE_READ:
1193                 /*
1194                  * If the file is delegated for read but we wan't to
1195                  * write or deny others to read then we can't delegate
1196                  * the file. We shouldn't get here since the delegation should
1197                  * have been recalled already.
1198                  */
1199                 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1200                     (deny & OPEN4_SHARE_DENY_READ))
1201                         return (OPEN_DELEGATE_NONE);
1202                 return (OPEN_DELEGATE_READ);
1203 
1204         case OPEN_DELEGATE_WRITE:
1205                 return (OPEN_DELEGATE_WRITE);
1206         }
1207 
1208         /* Shouldn't get here */
1209         return (OPEN_DELEGATE_NONE);
1210 }
1211 
1212 /*
1213  * Given the desired delegation type and the "history" of the file
1214  * determine the actual delegation type to return.
1215  */
1216 static open_delegation_type4
1217 rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype,
1218     rfs4_dinfo_t *dinfo, clientid4 cid)
1219 {
1220         time_t elapsed;
1221 
1222         if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1223                 return (OPEN_DELEGATE_NONE);
1224 
1225         /*
1226          * Has this file/delegation ever been recalled?  If not then
1227          * no further checks for a delegation race need to be done.
1228          * However if a recall has occurred, then check to see if a
1229          * client has caused its own delegation recall to occur.  If
1230          * not, then has a delegation for this file been returned
1231          * recently?  If so, then do not assign a new delegation to
1232          * avoid a "delegation race" between the original client and
1233          * the new/conflicting client.
1234          */
1235         if (dinfo->rd_ever_recalled == TRUE) {
1236                 if (dinfo->rd_conflicted_client != cid) {
1237                         elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1238                         if (elapsed < rfs4_lease_time)
1239                                 return (OPEN_DELEGATE_NONE);
1240                 }
1241         }
1242 
1243         /* Limit the number of read grants */
1244         if (dtype == OPEN_DELEGATE_READ &&
1245             dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1246                 return (OPEN_DELEGATE_NONE);
1247 
1248         /*
1249          * Should consider limiting total number of read/write
1250          * delegations the server will permit.
1251          */
1252 
1253         return (dtype);
1254 }
1255 
1256 /*
1257  * Try and grant a delegation for an open give the state. The routine
1258  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1259  *
1260  * The state and associate file entry must be locked
1261  */
1262 rfs4_deleg_state_t *
1263 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1264 {
1265         nfs4_srv_t *nsrv4;
1266         rfs4_file_t *fp = sp->rs_finfo;
1267         open_delegation_type4 dtype;
1268         int no_delegation;
1269 
1270         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1271         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1272 
1273         nsrv4 = nfs4_get_srv();
1274 
1275         /* Is the server even providing delegations? */
1276         if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1277                 return (NULL);
1278 
1279         /* Check to see if delegations have been temporarily disabled */
1280         mutex_enter(&nsrv4->deleg_lock);
1281         no_delegation = rfs4_deleg_disabled;
1282         mutex_exit(&nsrv4->deleg_lock);
1283 
1284         if (no_delegation)
1285                 return (NULL);
1286 
1287         /* Don't grant a delegation if a deletion is impending. */
1288         if (fp->rf_dinfo.rd_hold_grant > 0) {
1289                 return (NULL);
1290         }
1291 
1292         /*
1293          * Don't grant a delegation if there are any lock manager
1294          * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1295          * if there are only read locks we should be able to grant a
1296          * read-only delegation), but it's good enough for now.
1297          *
1298          * MT safety: the lock manager checks for conflicting delegations
1299          * before processing a lock request.  That check will block until
1300          * we are done here.  So if the lock manager acquires a lock after
1301          * we decide to grant the delegation, the delegation will get
1302          * immediately recalled (if there's a conflict), so we're safe.
1303          */
1304         if (lm_vp_active(fp->rf_vp)) {
1305                 return (NULL);
1306         }
1307 
1308         /*
1309          * Based on the type of delegation request passed in, take the
1310          * appropriate action (DELEG_NONE is handled above)
1311          */
1312         switch (dreq) {
1313 
1314         case DELEG_READ:
1315         case DELEG_WRITE:
1316                 /*
1317                  * The server "must" grant the delegation in this case.
1318                  * Client is using open previous
1319                  */
1320                 dtype = (open_delegation_type4)dreq;
1321                 *recall = 1;
1322                 break;
1323         case DELEG_ANY:
1324                 /*
1325                  * If a valid callback path does not exist, no delegation may
1326                  * be granted.
1327                  */
1328                 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1329                         return (NULL);
1330 
1331                 /*
1332                  * If the original operation which caused time_rm_delayed
1333                  * to be set hasn't been retried and completed for one
1334                  * full lease period, clear it and allow delegations to
1335                  * get granted again.
1336                  */
1337                 if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1338                     gethrestime_sec() >
1339                     fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1340                         fp->rf_dinfo.rd_time_rm_delayed = 0;
1341 
1342                 /*
1343                  * If we are waiting for a delegation to be returned then
1344                  * don't delegate this file. We do this for correctness as
1345                  * well as if the file is being recalled we would likely
1346                  * recall this file again.
1347                  */
1348 
1349                 if (fp->rf_dinfo.rd_time_recalled != 0 ||
1350                     fp->rf_dinfo.rd_time_rm_delayed != 0)
1351                         return (NULL);
1352 
1353                 /* Get the "best" delegation candidate */
1354                 dtype = rfs4_check_delegation(sp, fp);
1355 
1356                 if (dtype == OPEN_DELEGATE_NONE)
1357                         return (NULL);
1358 
1359                 /*
1360                  * Based on policy and the history of the file get the
1361                  * actual delegation.
1362                  */
1363                 dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo,
1364                     sp->rs_owner->ro_client->rc_clientid);
1365 
1366                 if (dtype == OPEN_DELEGATE_NONE)
1367                         return (NULL);
1368                 break;
1369         default:
1370                 return (NULL);
1371         }
1372 
1373         /* set the delegation for the state */
1374         return (rfs4_deleg_state(sp, dtype, recall));
1375 }
1376 
1377 void
1378 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1379     nfsace4 *ace,  int recall)
1380 {
1381         open_write_delegation4 *wp;
1382         open_read_delegation4 *rp;
1383         nfs_space_limit4 *spl;
1384         nfsace4 nace;
1385 
1386         /*
1387          * We need to allocate a new copy of the who string.
1388          * this string will be freed by the rfs4_op_open dis_resfree
1389          * routine. We need to do this allocation since replays will
1390          * be allocated and rfs4_compound can't tell the difference from
1391          * a replay and an inital open. N.B. if an ace is passed in, it
1392          * the caller's responsibility to free it.
1393          */
1394 
1395         if (ace == NULL) {
1396                 /*
1397                  * Default is to deny all access, the client will have
1398                  * to contact the server.  XXX Do we want to actually
1399                  * set a deny for every one, or do we simply want to
1400                  * construct an entity that will match no one?
1401                  */
1402                 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1403                 nace.flag = 0;
1404                 nace.access_mask = ACE4_VALID_MASK_BITS;
1405                 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1406         } else {
1407                 nace.type = ace->type;
1408                 nace.flag = ace->flag;
1409                 nace.access_mask = ace->access_mask;
1410                 (void) utf8_copy(&ace->who, &nace.who);
1411         }
1412 
1413         dp->delegation_type = dsp->rds_dtype;
1414 
1415         switch (dsp->rds_dtype) {
1416         case OPEN_DELEGATE_NONE:
1417                 break;
1418         case OPEN_DELEGATE_READ:
1419                 rp = &dp->open_delegation4_u.read;
1420                 rp->stateid = dsp->rds_delegid.stateid;
1421                 rp->recall = (bool_t)recall;
1422                 rp->permissions = nace;
1423                 break;
1424         case OPEN_DELEGATE_WRITE:
1425                 wp = &dp->open_delegation4_u.write;
1426                 wp->stateid = dsp->rds_delegid.stateid;
1427                 wp->recall = (bool_t)recall;
1428                 spl = &wp->space_limit;
1429                 spl->limitby = NFS_LIMIT_SIZE;
1430                 spl->nfs_space_limit4_u.filesize = 0;
1431                 wp->permissions = nace;
1432                 break;
1433         }
1434 }
1435 
1436 /*
1437  * Check if the file is delegated via the provided file struct.
1438  * Return TRUE if it is delegated.  This is intended for use by
1439  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1440  *
1441  * Note that if the file is found to have a delegation, it is
1442  * recalled, unless the clientid of the caller matches the clientid of the
1443  * delegation. If the caller has specified, there is a slight delay
1444  * inserted in the hopes that the delegation will be returned quickly.
1445  */
1446 bool_t
1447 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1448     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1449 {
1450         rfs4_deleg_state_t *dsp;
1451 
1452         nfs4_srv_t *nsrv4 = nfs4_get_srv();
1453 
1454         /* Is delegation enabled? */
1455         if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1456                 return (FALSE);
1457 
1458         /* do we have a delegation on this file? */
1459         rfs4_dbe_lock(fp->rf_dbe);
1460         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1461                 if (is_rm)
1462                         fp->rf_dinfo.rd_hold_grant++;
1463                 rfs4_dbe_unlock(fp->rf_dbe);
1464                 return (FALSE);
1465         }
1466         /*
1467          * do we have a write delegation on this file or are we
1468          * requesting write access to a file with any type of existing
1469          * delegation?
1470          */
1471         if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1472                 if (cp != NULL) {
1473                         dsp = list_head(&fp->rf_delegstatelist);
1474                         if (dsp == NULL) {
1475                                 rfs4_dbe_unlock(fp->rf_dbe);
1476                                 return (FALSE);
1477                         }
1478                         /*
1479                          * Does the requestor already own the delegation?
1480                          */
1481                         if (dsp->rds_client->rc_clientid == *(cp)) {
1482                                 rfs4_dbe_unlock(fp->rf_dbe);
1483                                 return (FALSE);
1484                         }
1485                 }
1486 
1487                 rfs4_dbe_unlock(fp->rf_dbe);
1488                 rfs4_recall_deleg(fp, trunc, NULL);
1489 
1490                 if (!do_delay) {
1491                         rfs4_dbe_lock(fp->rf_dbe);
1492                         fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1493                         rfs4_dbe_unlock(fp->rf_dbe);
1494                         return (TRUE);
1495                 }
1496 
1497                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
1498 
1499                 rfs4_dbe_lock(fp->rf_dbe);
1500                 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1501                         fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1502                         rfs4_dbe_unlock(fp->rf_dbe);
1503                         return (TRUE);
1504                 }
1505         }
1506         if (is_rm)
1507                 fp->rf_dinfo.rd_hold_grant++;
1508         rfs4_dbe_unlock(fp->rf_dbe);
1509         return (FALSE);
1510 }
1511 
1512 /*
1513  * Check if the file is delegated in the case of a v2 or v3 access.
1514  * Return TRUE if it is delegated which in turn means that v2 should
1515  * drop the request and in the case of v3 JUKEBOX should be returned.
1516  */
1517 bool_t
1518 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1519 {
1520         nfs4_srv_t *nsrv4;
1521         rfs4_file_t *fp;
1522         bool_t create = FALSE;
1523         bool_t rc = FALSE;
1524 
1525         nsrv4 = nfs4_get_srv();
1526         rfs4_hold_deleg_policy(nsrv4);
1527 
1528         /* Is delegation enabled? */
1529         if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1530                 fp = rfs4_findfile(vp, NULL, &create);
1531                 if (fp != NULL) {
1532                         if (rfs4_check_delegated_byfp(mode, fp, trunc,
1533                             TRUE, FALSE, NULL)) {
1534                                 rc = TRUE;
1535                         }
1536                         rfs4_file_rele(fp);
1537                 }
1538         }
1539         rfs4_rele_deleg_policy(nsrv4);
1540         return (rc);
1541 }
1542 
1543 /*
1544  * Release a hold on the hold_grant counter which
1545  * prevents delegation from being granted while a remove
1546  * or a rename is in progress.
1547  */
1548 void
1549 rfs4_clear_dont_grant(rfs4_file_t *fp)
1550 {
1551         nfs4_srv_t *nsrv4 = nfs4_get_srv();
1552 
1553         if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1554                 return;
1555         rfs4_dbe_lock(fp->rf_dbe);
1556         ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1557         fp->rf_dinfo.rd_hold_grant--;
1558         fp->rf_dinfo.rd_time_rm_delayed = 0;
1559         rfs4_dbe_unlock(fp->rf_dbe);
1560 }
1561 
1562 /*
1563  * State support for delegation.
1564  * Set the state delegation type for this state;
1565  * This routine is called from open via rfs4_grant_delegation and the entry
1566  * locks on sp and sp->rs_finfo are assumed.
1567  */
1568 static rfs4_deleg_state_t *
1569 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1570 {
1571         rfs4_file_t *fp = sp->rs_finfo;
1572         bool_t create = TRUE;
1573         rfs4_deleg_state_t *dsp;
1574         vnode_t *vp;
1575         int open_prev = *recall;
1576         int ret;
1577         int fflags = 0;
1578 
1579         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1580         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1581 
1582         /* Shouldn't happen */
1583         if (fp->rf_dinfo.rd_recall_count != 0 ||
1584             (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1585             dtype != OPEN_DELEGATE_READ)) {
1586                 return (NULL);
1587         }
1588 
1589         /* Unlock to avoid deadlock */
1590         rfs4_dbe_unlock(fp->rf_dbe);
1591         rfs4_dbe_unlock(sp->rs_dbe);
1592 
1593         dsp = rfs4_finddeleg(sp, &create);
1594 
1595         rfs4_dbe_lock(sp->rs_dbe);
1596         rfs4_dbe_lock(fp->rf_dbe);
1597 
1598         if (dsp == NULL)
1599                 return (NULL);
1600 
1601         /*
1602          * It is possible that since we dropped the lock
1603          * in order to call finddeleg, the rfs4_file_t
1604          * was marked such that we should not grant a
1605          * delegation, if so bail out.
1606          */
1607         if (fp->rf_dinfo.rd_hold_grant > 0) {
1608                 rfs4_deleg_state_rele(dsp);
1609                 return (NULL);
1610         }
1611 
1612         if (create == FALSE) {
1613                 if (sp->rs_owner->ro_client == dsp->rds_client &&
1614                     dsp->rds_dtype == dtype) {
1615                         return (dsp);
1616                 } else {
1617                         rfs4_deleg_state_rele(dsp);
1618                         return (NULL);
1619                 }
1620         }
1621 
1622         /*
1623          * Check that this file has not been delegated to another
1624          * client
1625          */
1626         if (fp->rf_dinfo.rd_recall_count != 0 ||
1627             fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1628             (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1629             dtype != OPEN_DELEGATE_READ)) {
1630                 rfs4_deleg_state_rele(dsp);
1631                 return (NULL);
1632         }
1633 
1634         vp = fp->rf_vp;
1635         /* vnevent_support returns 0 if file system supports vnevents */
1636         if (vnevent_support(vp, NULL)) {
1637                 rfs4_deleg_state_rele(dsp);
1638                 return (NULL);
1639         }
1640 
1641         /* Calculate the fflags for this OPEN. */
1642         if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1643                 fflags |= FREAD;
1644         if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1645                 fflags |= FWRITE;
1646 
1647         *recall = 0;
1648         /*
1649          * Before granting a delegation we need to know if anyone else has
1650          * opened the file in a conflicting mode.  However, first we need to
1651          * know how we opened the file to check the counts properly.
1652          */
1653         if (dtype == OPEN_DELEGATE_READ) {
1654                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1655                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1656                     vn_is_mapped(vp, V_WRITE)) {
1657                         if (open_prev) {
1658                                 *recall = 1;
1659                         } else {
1660                                 rfs4_deleg_state_rele(dsp);
1661                                 return (NULL);
1662                         }
1663                 }
1664                 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1665                     rfs4_mon_hold, rfs4_mon_rele);
1666                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1667                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1668                     vn_is_mapped(vp, V_WRITE)) {
1669                         if (open_prev) {
1670                                 *recall = 1;
1671                         } else {
1672                                 (void) fem_uninstall(vp, deleg_rdops,
1673                                     (void *)fp);
1674                                 rfs4_deleg_state_rele(dsp);
1675                                 return (NULL);
1676                         }
1677                 }
1678                 /*
1679                  * Because a client can hold onto a delegation after the
1680                  * file has been closed, we need to keep track of the
1681                  * access to this file.  Otherwise the CIFS server would
1682                  * not know about the client accessing the file and could
1683                  * inappropriately grant an OPLOCK.
1684                  * fem_install() returns EBUSY when asked to install a
1685                  * OPUNIQ monitor more than once.  Therefore, check the
1686                  * return code because we only want this done once.
1687                  */
1688                 if (ret == 0)
1689                         vn_open_upgrade(vp, FREAD);
1690         } else { /* WRITE */
1691                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1692                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1693                     ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1694                     (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1695                     vn_is_mapped(vp, V_RDORWR)) {
1696                         if (open_prev) {
1697                                 *recall = 1;
1698                         } else {
1699                                 rfs4_deleg_state_rele(dsp);
1700                                 return (NULL);
1701                         }
1702                 }
1703                 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1704                     rfs4_mon_hold, rfs4_mon_rele);
1705                 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1706                     (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1707                     ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1708                     (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1709                     vn_is_mapped(vp, V_RDORWR)) {
1710                         if (open_prev) {
1711                                 *recall = 1;
1712                         } else {
1713                                 (void) fem_uninstall(vp, deleg_wrops,
1714                                     (void *)fp);
1715                                 rfs4_deleg_state_rele(dsp);
1716                                 return (NULL);
1717                         }
1718                 }
1719                 /*
1720                  * Because a client can hold onto a delegation after the
1721                  * file has been closed, we need to keep track of the
1722                  * access to this file.  Otherwise the CIFS server would
1723                  * not know about the client accessing the file and could
1724                  * inappropriately grant an OPLOCK.
1725                  * fem_install() returns EBUSY when asked to install a
1726                  * OPUNIQ monitor more than once.  Therefore, check the
1727                  * return code because we only want this done once.
1728                  */
1729                 if (ret == 0)
1730                         vn_open_upgrade(vp, FREAD|FWRITE);
1731         }
1732         /* Place on delegation list for file */
1733         ASSERT(!list_link_active(&dsp->rds_node));
1734         list_insert_tail(&fp->rf_delegstatelist, dsp);
1735 
1736         dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1737 
1738         /* Update delegation stats for this file */
1739         fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1740 
1741         /* reset since this is a new delegation */
1742         fp->rf_dinfo.rd_conflicted_client = 0;
1743         fp->rf_dinfo.rd_ever_recalled = FALSE;
1744 
1745         if (dtype == OPEN_DELEGATE_READ)
1746                 fp->rf_dinfo.rd_rdgrants++;
1747         else
1748                 fp->rf_dinfo.rd_wrgrants++;
1749 
1750         return (dsp);
1751 }
1752 
1753 /*
1754  * State routine for the server when a delegation is returned.
1755  */
1756 void
1757 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1758 {
1759         rfs4_file_t *fp = dsp->rds_finfo;
1760         open_delegation_type4 dtypewas;
1761 
1762         rfs4_dbe_lock(fp->rf_dbe);
1763 
1764         /* nothing to do if no longer on list */
1765         if (!list_link_active(&dsp->rds_node)) {
1766                 rfs4_dbe_unlock(fp->rf_dbe);
1767                 return;
1768         }
1769 
1770         /* Remove state from recall list */
1771         list_remove(&fp->rf_delegstatelist, dsp);
1772 
1773         if (list_is_empty(&fp->rf_delegstatelist)) {
1774                 dtypewas = fp->rf_dinfo.rd_dtype;
1775                 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1776                 rfs4_dbe_cv_broadcast(fp->rf_dbe);
1777 
1778                 /* if file system was unshared, the vp will be NULL */
1779                 if (fp->rf_vp != NULL) {
1780                         /*
1781                          * Once a delegation is no longer held by any client,
1782                          * the monitor is uninstalled.  At this point, the
1783                          * client must send OPEN otw, so we don't need the
1784                          * reference on the vnode anymore.  The open
1785                          * downgrade removes the reference put on earlier.
1786                          */
1787                         if (dtypewas == OPEN_DELEGATE_READ) {
1788                                 (void) fem_uninstall(fp->rf_vp, deleg_rdops,
1789                                     (void *)fp);
1790                                 vn_open_downgrade(fp->rf_vp, FREAD);
1791                         } else if (dtypewas == OPEN_DELEGATE_WRITE) {
1792                                 (void) fem_uninstall(fp->rf_vp, deleg_wrops,
1793                                     (void *)fp);
1794                                 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1795                         }
1796                 }
1797         }
1798 
1799         switch (dsp->rds_dtype) {
1800         case OPEN_DELEGATE_READ:
1801                 fp->rf_dinfo.rd_rdgrants--;
1802                 break;
1803         case OPEN_DELEGATE_WRITE:
1804                 fp->rf_dinfo.rd_wrgrants--;
1805                 break;
1806         default:
1807                 break;
1808         }
1809 
1810         /* used in the policy decision */
1811         fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1812 
1813         /*
1814          * reset the time_recalled field so future delegations are not
1815          * accidentally revoked
1816          */
1817         if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1818                 fp->rf_dinfo.rd_time_recalled = 0;
1819 
1820         rfs4_dbe_unlock(fp->rf_dbe);
1821 
1822         rfs4_dbe_lock(dsp->rds_dbe);
1823 
1824         dsp->rds_dtype = OPEN_DELEGATE_NONE;
1825 
1826         if (revoked == TRUE)
1827                 dsp->rds_time_revoked = gethrestime_sec();
1828 
1829         rfs4_dbe_invalidate(dsp->rds_dbe);
1830 
1831         rfs4_dbe_unlock(dsp->rds_dbe);
1832 
1833         if (revoked == TRUE) {
1834                 rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1835                 dsp->rds_client->rc_deleg_revoked++;      /* observability */
1836                 rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1837         }
1838 }
1839 
1840 static void
1841 rfs4_revoke_file(rfs4_file_t *fp)
1842 {
1843         rfs4_deleg_state_t *dsp;
1844 
1845         /*
1846          * The lock for rfs4_file_t must be held when traversing the
1847          * delegation list but that lock needs to be released to call
1848          * rfs4_return_deleg()
1849          */
1850         rfs4_dbe_lock(fp->rf_dbe);
1851         while (dsp = list_head(&fp->rf_delegstatelist)) {
1852                 rfs4_dbe_hold(dsp->rds_dbe);
1853                 rfs4_dbe_unlock(fp->rf_dbe);
1854                 rfs4_return_deleg(dsp, TRUE);
1855                 rfs4_deleg_state_rele(dsp);
1856                 rfs4_dbe_lock(fp->rf_dbe);
1857         }
1858         rfs4_dbe_unlock(fp->rf_dbe);
1859 }
1860 
1861 /*
1862  * A delegation is assumed to be present on the file associated with
1863  * "sp".  Check to see if the delegation matches is associated with
1864  * the same client as referenced by "sp".  If it is not, TRUE is
1865  * returned.  If the delegation DOES match the client (or no
1866  * delegation is present), return FALSE.
1867  * Assume the state entry and file entry are locked.
1868  */
1869 bool_t
1870 rfs4_is_deleg(rfs4_state_t *sp)
1871 {
1872         rfs4_deleg_state_t *dsp;
1873         rfs4_file_t *fp = sp->rs_finfo;
1874         rfs4_client_t *cp = sp->rs_owner->ro_client;
1875 
1876         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1877         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1878             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1879                 if (cp != dsp->rds_client) {
1880                         return (TRUE);
1881                 }
1882         }
1883         return (FALSE);
1884 }
1885 
1886 void
1887 rfs4_disable_delegation(void)
1888 {
1889         nfs4_srv_t *nsrv4;
1890 
1891         nsrv4 = nfs4_get_srv();
1892         mutex_enter(&nsrv4->deleg_lock);
1893         rfs4_deleg_disabled++;
1894         mutex_exit(&nsrv4->deleg_lock);
1895 }
1896 
1897 void
1898 rfs4_enable_delegation(void)
1899 {
1900         nfs4_srv_t *nsrv4;
1901 
1902         nsrv4 = nfs4_get_srv();
1903         mutex_enter(&nsrv4->deleg_lock);
1904         ASSERT(rfs4_deleg_disabled > 0);
1905         rfs4_deleg_disabled--;
1906         mutex_exit(&nsrv4->deleg_lock);
1907 }
1908 
1909 void
1910 rfs4_mon_hold(void *arg)
1911 {
1912         rfs4_file_t *fp = arg;
1913 
1914         rfs4_dbe_hold(fp->rf_dbe);
1915 }
1916 
1917 void
1918 rfs4_mon_rele(void *arg)
1919 {
1920         rfs4_file_t *fp = arg;
1921 
1922         rfs4_dbe_rele_nolock(fp->rf_dbe);
1923 }