Print this page
    
Fix NFS design problems re. multiple zone keys
Make NFS server zone-specific data all have the same lifetime
Fix rfs4_clean_state_exi
Fix exi_cache_reclaim
Fix mistakes in zone keys work
More fixes re. exi_zoneid and exi_tree
(danmcd -> Keep some ASSERT()s around for readability.)
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*
  28   28   * Copyright 2018 Nexenta Systems, Inc.
  29   29   */
  30   30  
  31   31  #include <sys/systm.h>
  32   32  #include <rpc/auth.h>
  33   33  #include <rpc/clnt.h>
  34   34  #include <nfs/nfs4_kprot.h>
  35   35  #include <nfs/nfs4.h>
  36   36  #include <nfs/lm.h>
  37   37  #include <sys/cmn_err.h>
  38   38  #include <sys/disp.h>
  39   39  #include <sys/sdt.h>
  40   40  
  41   41  #include <sys/pathname.h>
  42   42  
  43   43  #include <sys/strsubr.h>
  44   44  #include <sys/ddi.h>
  45   45  
  46   46  #include <sys/vnode.h>
  47   47  #include <sys/sdt.h>
  48   48  #include <inet/common.h>
  49   49  #include <inet/ip.h>
  50   50  #include <inet/ip6.h>
  51   51  
  52   52  #define MAX_READ_DELEGATIONS 5
  53   53  
  54   54  static int rfs4_deleg_wlp = 5;
  55   55  static int rfs4_deleg_disabled;
  56   56  static int rfs4_max_setup_cb_tries = 5;
  57   57  
  58   58  #ifdef DEBUG
  59   59  
  60   60  static int rfs4_test_cbgetattr_fail = 0;
  61   61  int rfs4_cb_null;
  62   62  int rfs4_cb_debug;
  63   63  int rfs4_deleg_debug;
  64   64  
  65   65  #endif
  66   66  
  67   67  static void rfs4_recall_file(rfs4_file_t *,
  68   68      void (*recall)(rfs4_deleg_state_t *, bool_t),
  69   69      bool_t, rfs4_client_t *);
  70   70  static  void            rfs4_revoke_file(rfs4_file_t *);
  71   71  static  void            rfs4_cb_chflush(rfs4_cbinfo_t *);
  72   72  static  CLIENT          *rfs4_cb_getch(rfs4_cbinfo_t *);
  73   73  static  void            rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
  74   74  static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
  75   75      open_delegation_type4, int *);
  76   76  
  77   77  /*
  78   78   * Convert a universal address to an transport specific
  79   79   * address using inet_pton.
  80   80   */
  81   81  static int
  82   82  uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
  83   83  {
  84   84          int dots = 0, i, j, len, k;
  85   85          unsigned char c;
  86   86          in_port_t port = 0;
  87   87  
  88   88          len = strlen(ua);
  89   89  
  90   90          for (i = len-1; i >= 0; i--) {
  91   91  
  92   92                  if (ua[i] == '.')
  93   93                          dots++;
  94   94  
  95   95                  if (dots == 2) {
  96   96  
  97   97                          ua[i] = '\0';
  98   98                          /*
  99   99                           * We use k to remember were to stick '.' back, since
 100  100                           * ua was kmem_allocateded from the pool len+1.
 101  101                           */
 102  102                          k = i;
 103  103                          if (inet_pton(af, ua, ap) == 1) {
 104  104  
 105  105                                  c = 0;
 106  106  
 107  107                                  for (j = i+1; j < len; j++) {
 108  108                                          if (ua[j] == '.') {
 109  109                                                  port = c << 8;
 110  110                                                  c = 0;
 111  111                                          } else if (ua[j] >= '0' &&
 112  112                                              ua[j] <= '9') {
 113  113                                                  c *= 10;
 114  114                                                  c += ua[j] - '0';
 115  115                                          } else {
 116  116                                                  ua[k] = '.';
 117  117                                                  return (EINVAL);
 118  118                                          }
 119  119                                  }
 120  120                                  port += c;
 121  121  
 122  122                                  *pp = htons(port);
 123  123  
 124  124                                  ua[k] = '.';
 125  125                                  return (0);
 126  126                          } else {
 127  127                                  ua[k] = '.';
 128  128                                  return (EINVAL);
 129  129                          }
 130  130                  }
 131  131          }
 132  132  
 133  133          return (EINVAL);
 134  134  }
 135  135  
 136  136  /*
 137  137   * Update the delegation policy with the
 138  138   * value of "new_policy"
 139  139   */
 140  140  void
 141  141  rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy)
 142  142  {
 143  143          rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER);
 144  144          nsrv4->nfs4_deleg_policy = new_policy;
 145  145          rw_exit(&nsrv4->deleg_policy_lock);
 146  146  }
 147  147  
 148  148  void
 149  149  rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4)
 150  150  {
 151  151          rw_enter(&nsrv4->deleg_policy_lock, RW_READER);
 152  152  }
  
    | 
      ↓ open down ↓ | 
    152 lines elided | 
    
      ↑ open up ↑ | 
  
 153  153  
 154  154  void
 155  155  rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4)
 156  156  {
 157  157          rw_exit(&nsrv4->deleg_policy_lock);
 158  158  }
 159  159  
 160  160  srv_deleg_policy_t
 161  161  nfs4_get_deleg_policy()
 162  162  {
 163      -        nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
      163 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
 164  164          return (nsrv4->nfs4_deleg_policy);
 165  165  }
 166  166  
 167  167  
 168  168  /*
 169  169   * This free function is to be used when the client struct is being
 170  170   * released and nothing at all is needed of the callback info any
 171  171   * longer.
 172  172   */
 173  173  void
 174  174  rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
 175  175  {
 176  176          char *addr = cbp->cb_callback.cb_location.r_addr;
 177  177          char *netid = cbp->cb_callback.cb_location.r_netid;
 178  178  
 179  179          /* Free old address if any */
 180  180  
 181  181          if (addr)
 182  182                  kmem_free(addr, strlen(addr) + 1);
 183  183          if (netid)
 184  184                  kmem_free(netid, strlen(netid) + 1);
 185  185  
 186  186          addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
 187  187          netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
 188  188  
 189  189          if (addr)
 190  190                  kmem_free(addr, strlen(addr) + 1);
 191  191          if (netid)
 192  192                  kmem_free(netid, strlen(netid) + 1);
 193  193  
 194  194          if (cbp->cb_chc_free) {
 195  195                  rfs4_cb_chflush(cbp);
 196  196          }
 197  197  }
 198  198  
 199  199  /*
 200  200   * The server uses this to check the callback path supplied by the
 201  201   * client.  The callback connection is marked "in progress" while this
 202  202   * work is going on and then eventually marked either OK or FAILED.
 203  203   * This work can be done as part of a separate thread and at the end
 204  204   * of this the thread will exit or it may be done such that the caller
 205  205   * will continue with other work.
 206  206   */
 207  207  static void
 208  208  rfs4_do_cb_null(rfs4_client_t *cp)
 209  209  {
 210  210          struct timeval tv;
 211  211          CLIENT *ch;
 212  212          rfs4_cbstate_t newstate;
 213  213          rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 214  214  
 215  215          mutex_enter(cbp->cb_lock);
 216  216          /* If another thread is doing CB_NULL RPC then return */
 217  217          if (cbp->cb_nullcaller == TRUE) {
 218  218                  mutex_exit(cbp->cb_lock);
 219  219                  rfs4_client_rele(cp);
 220  220                  zthread_exit();
 221  221          }
 222  222  
 223  223          /* Mark the cbinfo as having a thread in the NULL callback */
 224  224          cbp->cb_nullcaller = TRUE;
 225  225  
 226  226          /*
 227  227           * Are there other threads still using the cbinfo client
 228  228           * handles?  If so, this thread must wait before going and
 229  229           * mucking aroiund with the callback information
 230  230           */
 231  231          while (cbp->cb_refcnt != 0)
 232  232                  cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
 233  233  
 234  234          /*
 235  235           * This thread itself may find that new callback info has
 236  236           * arrived and is set up to handle this case and redrive the
 237  237           * call to the client's callback server.
 238  238           */
 239  239  retry:
 240  240          if (cbp->cb_newer.cb_new == TRUE &&
 241  241              cbp->cb_newer.cb_confirmed == TRUE) {
 242  242                  char *addr = cbp->cb_callback.cb_location.r_addr;
 243  243                  char *netid = cbp->cb_callback.cb_location.r_netid;
 244  244  
 245  245                  /*
 246  246                   * Free the old stuff if it exists; may be the first
 247  247                   * time through this path
 248  248                   */
 249  249                  if (addr)
 250  250                          kmem_free(addr, strlen(addr) + 1);
 251  251                  if (netid)
 252  252                          kmem_free(netid, strlen(netid) + 1);
 253  253  
 254  254                  /* Move over the addr/netid */
 255  255                  cbp->cb_callback.cb_location.r_addr =
 256  256                      cbp->cb_newer.cb_callback.cb_location.r_addr;
 257  257                  cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
 258  258                  cbp->cb_callback.cb_location.r_netid =
 259  259                      cbp->cb_newer.cb_callback.cb_location.r_netid;
 260  260                  cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
 261  261  
 262  262                  /* Get the program number */
 263  263                  cbp->cb_callback.cb_program =
 264  264                      cbp->cb_newer.cb_callback.cb_program;
 265  265                  cbp->cb_newer.cb_callback.cb_program = 0;
 266  266  
 267  267                  /* Don't forget the protocol's "cb_ident" field */
 268  268                  cbp->cb_ident = cbp->cb_newer.cb_ident;
 269  269                  cbp->cb_newer.cb_ident = 0;
 270  270  
 271  271                  /* no longer new */
 272  272                  cbp->cb_newer.cb_new = FALSE;
 273  273                  cbp->cb_newer.cb_confirmed = FALSE;
 274  274  
 275  275                  /* get rid of the old client handles that may exist */
 276  276                  rfs4_cb_chflush(cbp);
 277  277  
 278  278                  cbp->cb_state = CB_NONE;
 279  279                  cbp->cb_timefailed = 0; /* reset the clock */
 280  280                  cbp->cb_notified_of_cb_path_down = TRUE;
 281  281          }
 282  282  
 283  283          if (cbp->cb_state != CB_NONE) {
 284  284                  cv_broadcast(cbp->cb_cv);       /* let the others know */
 285  285                  cbp->cb_nullcaller = FALSE;
 286  286                  mutex_exit(cbp->cb_lock);
 287  287                  rfs4_client_rele(cp);
 288  288                  zthread_exit();
 289  289          }
 290  290  
 291  291          /* mark rfs4_client_t as CALLBACK NULL in progress */
 292  292          cbp->cb_state = CB_INPROG;
 293  293          mutex_exit(cbp->cb_lock);
 294  294  
 295  295          /* get/generate a client handle */
 296  296          if ((ch = rfs4_cb_getch(cbp)) == NULL) {
 297  297                  mutex_enter(cbp->cb_lock);
 298  298                  cbp->cb_state = CB_BAD;
 299  299                  cbp->cb_timefailed = gethrestime_sec(); /* observability */
 300  300                  goto retry;
 301  301          }
 302  302  
 303  303  
 304  304          tv.tv_sec = 30;
 305  305          tv.tv_usec = 0;
 306  306          if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
 307  307                  newstate = CB_BAD;
 308  308          } else {
 309  309                  newstate = CB_OK;
 310  310  #ifdef  DEBUG
 311  311                  rfs4_cb_null++;
 312  312  #endif
 313  313          }
 314  314  
 315  315          /* Check to see if the client has specified new callback info */
 316  316          mutex_enter(cbp->cb_lock);
 317  317          rfs4_cb_freech(cbp, ch, TRUE);
 318  318          if (cbp->cb_newer.cb_new == TRUE &&
 319  319              cbp->cb_newer.cb_confirmed == TRUE) {
 320  320                  goto retry;     /* give the CB_NULL another chance */
 321  321          }
 322  322  
 323  323          cbp->cb_state = newstate;
 324  324          if (cbp->cb_state == CB_BAD)
 325  325                  cbp->cb_timefailed = gethrestime_sec(); /* observability */
 326  326  
 327  327          cv_broadcast(cbp->cb_cv);       /* start up the other threads */
 328  328          cbp->cb_nullcaller = FALSE;
 329  329          mutex_exit(cbp->cb_lock);
 330  330          rfs4_client_rele(cp);
 331  331          zthread_exit();
 332  332  }
 333  333  
 334  334  /*
 335  335   * Given a client struct, inspect the callback info to see if the
 336  336   * callback path is up and available.
 337  337   *
 338  338   * If new callback path is available and no one has set it up then
 339  339   * try to set it up. If setup is not successful after 5 tries (5 secs)
 340  340   * then gives up and returns NULL.
 341  341   *
 342  342   * If callback path is being initialized, then wait for the CB_NULL RPC
 343  343   * call to occur.
 344  344   */
 345  345  static rfs4_cbinfo_t *
 346  346  rfs4_cbinfo_hold(rfs4_client_t *cp)
 347  347  {
 348  348          rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 349  349          int retries = 0;
 350  350  
 351  351          mutex_enter(cbp->cb_lock);
 352  352  
 353  353          while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
 354  354                  /*
 355  355                   * Looks like a new callback path may be available and
 356  356                   * noone has set it up.
 357  357                   */
 358  358                  mutex_exit(cbp->cb_lock);
 359  359                  rfs4_dbe_hold(cp->rc_dbe);
 360  360                  rfs4_do_cb_null(cp); /* caller will release client hold */
 361  361  
 362  362                  mutex_enter(cbp->cb_lock);
 363  363                  /*
 364  364                   * If callback path is no longer new, or it's being setup
 365  365                   * then stop and wait for it to be done.
 366  366                   */
 367  367                  if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
 368  368                          break;
 369  369                  mutex_exit(cbp->cb_lock);
 370  370  
 371  371                  if (++retries >= rfs4_max_setup_cb_tries)
 372  372                          return (NULL);
 373  373                  delay(hz);
 374  374                  mutex_enter(cbp->cb_lock);
 375  375          }
 376  376  
 377  377          /* Is there a thread working on doing the CB_NULL RPC? */
 378  378          if (cbp->cb_nullcaller == TRUE)
 379  379                  cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
 380  380  
 381  381          /* If the callback path is not okay (up and running), just quit */
 382  382          if (cbp->cb_state != CB_OK) {
 383  383                  mutex_exit(cbp->cb_lock);
 384  384                  return (NULL);
 385  385          }
 386  386  
 387  387          /* Let someone know we are using the current callback info */
 388  388          cbp->cb_refcnt++;
 389  389          mutex_exit(cbp->cb_lock);
 390  390          return (cbp);
 391  391  }
 392  392  
 393  393  /*
 394  394   * The caller is done with the callback info.  It may be that the
 395  395   * caller's RPC failed and the NFSv4 client has actually provided new
 396  396   * callback information.  If so, let the caller know so they can
 397  397   * advantage of this and maybe retry the RPC that originally failed.
 398  398   */
 399  399  static int
 400  400  rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
 401  401  {
 402  402          int cb_new = FALSE;
 403  403  
 404  404          mutex_enter(cbp->cb_lock);
 405  405  
 406  406          /* The caller gets a chance to mark the callback info as bad */
 407  407          if (newstate != CB_NOCHANGE)
 408  408                  cbp->cb_state = newstate;
 409  409          if (newstate == CB_FAILED) {
 410  410                  cbp->cb_timefailed = gethrestime_sec(); /* observability */
 411  411                  cbp->cb_notified_of_cb_path_down = FALSE;
 412  412          }
 413  413  
 414  414          cbp->cb_refcnt--;       /* no longer using the information */
 415  415  
 416  416          /*
 417  417           * A thread may be waiting on this one to finish and if so,
 418  418           * let it know that it is okay to do the CB_NULL to the
 419  419           * client's callback server.
 420  420           */
 421  421          if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
 422  422                  cv_broadcast(cbp->cb_cv_nullcaller);
 423  423  
 424  424          /*
 425  425           * If this is the last thread to use the callback info and
 426  426           * there is new callback information to try and no thread is
 427  427           * there ready to do the CB_NULL, then return true to teh
 428  428           * caller so they can do the CB_NULL
 429  429           */
 430  430          if (cbp->cb_refcnt == 0 &&
 431  431              cbp->cb_nullcaller == FALSE &&
 432  432              cbp->cb_newer.cb_new == TRUE &&
 433  433              cbp->cb_newer.cb_confirmed == TRUE)
 434  434                  cb_new = TRUE;
 435  435  
 436  436          mutex_exit(cbp->cb_lock);
 437  437  
 438  438          return (cb_new);
 439  439  }
 440  440  
 441  441  /*
 442  442   * Given the information in the callback info struct, create a client
 443  443   * handle that can be used by the server for its callback path.
 444  444   */
 445  445  static CLIENT *
 446  446  rfs4_cbch_init(rfs4_cbinfo_t *cbp)
 447  447  {
 448  448          struct knetconfig knc;
 449  449          vnode_t *vp;
 450  450          struct sockaddr_in addr4;
 451  451          struct sockaddr_in6 addr6;
 452  452          void *addr, *taddr;
 453  453          in_port_t *pp;
 454  454          int af;
 455  455          char *devnam;
 456  456          struct netbuf nb;
 457  457          int size;
 458  458          CLIENT *ch = NULL;
 459  459          int useresvport = 0;
 460  460  
 461  461          mutex_enter(cbp->cb_lock);
 462  462  
 463  463          if (cbp->cb_callback.cb_location.r_netid == NULL ||
 464  464              cbp->cb_callback.cb_location.r_addr == NULL) {
 465  465                  goto cb_init_out;
 466  466          }
 467  467  
 468  468          if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
 469  469                  knc.knc_semantics = NC_TPI_COTS;
 470  470                  knc.knc_protofmly = "inet";
 471  471                  knc.knc_proto = "tcp";
 472  472                  devnam = "/dev/tcp";
 473  473                  af = AF_INET;
 474  474          } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
 475  475              == 0) {
 476  476                  knc.knc_semantics = NC_TPI_CLTS;
 477  477                  knc.knc_protofmly = "inet";
 478  478                  knc.knc_proto = "udp";
 479  479                  devnam = "/dev/udp";
 480  480                  af = AF_INET;
 481  481          } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
 482  482              == 0) {
 483  483                  knc.knc_semantics = NC_TPI_COTS;
 484  484                  knc.knc_protofmly = "inet6";
 485  485                  knc.knc_proto = "tcp";
 486  486                  devnam = "/dev/tcp6";
 487  487                  af = AF_INET6;
 488  488          } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
 489  489              == 0) {
 490  490                  knc.knc_semantics = NC_TPI_CLTS;
 491  491                  knc.knc_protofmly = "inet6";
 492  492                  knc.knc_proto = "udp";
 493  493                  devnam = "/dev/udp6";
 494  494                  af = AF_INET6;
 495  495          } else {
 496  496                  goto cb_init_out;
 497  497          }
 498  498  
 499  499          if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
 500  500  
 501  501                  goto cb_init_out;
 502  502          }
 503  503  
 504  504          if (vp->v_type != VCHR) {
 505  505                  VN_RELE(vp);
 506  506                  goto cb_init_out;
 507  507          }
 508  508  
 509  509          knc.knc_rdev = vp->v_rdev;
 510  510  
 511  511          VN_RELE(vp);
 512  512  
 513  513          if (af == AF_INET) {
 514  514                  size = sizeof (addr4);
 515  515                  bzero(&addr4, size);
 516  516                  addr4.sin_family = (sa_family_t)af;
 517  517                  addr = &addr4.sin_addr;
 518  518                  pp = &addr4.sin_port;
 519  519                  taddr = &addr4;
 520  520          } else /* AF_INET6 */ {
 521  521                  size = sizeof (addr6);
 522  522                  bzero(&addr6, size);
 523  523                  addr6.sin6_family = (sa_family_t)af;
 524  524                  addr = &addr6.sin6_addr;
 525  525                  pp = &addr6.sin6_port;
 526  526                  taddr = &addr6;
 527  527          }
 528  528  
 529  529          if (uaddr2sockaddr(af,
 530  530              cbp->cb_callback.cb_location.r_addr, addr, pp)) {
 531  531  
 532  532                  goto cb_init_out;
 533  533          }
 534  534  
 535  535  
 536  536          nb.maxlen = nb.len = size;
 537  537          nb.buf = (char *)taddr;
 538  538  
 539  539          if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
 540  540              NFS_CB, 0, 0, curthread->t_cred, &ch)) {
 541  541  
 542  542                  ch = NULL;
 543  543          }
 544  544  
 545  545          /* turn off reserved port usage */
 546  546          (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
 547  547  
 548  548  cb_init_out:
 549  549          mutex_exit(cbp->cb_lock);
 550  550          return (ch);
 551  551  }
 552  552  
 553  553  /*
 554  554   * Iterate over the client handle cache and
 555  555   * destroy it.
 556  556   */
 557  557  static void
 558  558  rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
 559  559  {
 560  560          CLIENT *ch;
 561  561  
 562  562          while (cbp->cb_chc_free) {
 563  563                  cbp->cb_chc_free--;
 564  564                  ch = cbp->cb_chc[cbp->cb_chc_free];
 565  565                  cbp->cb_chc[cbp->cb_chc_free] = NULL;
 566  566                  if (ch) {
 567  567                          if (ch->cl_auth)
 568  568                                  auth_destroy(ch->cl_auth);
 569  569                          clnt_destroy(ch);
 570  570                  }
 571  571          }
 572  572  }
 573  573  
 574  574  /*
 575  575   * Return a client handle, either from a the small
 576  576   * rfs4_client_t cache or one that we just created.
 577  577   */
 578  578  static CLIENT *
 579  579  rfs4_cb_getch(rfs4_cbinfo_t *cbp)
 580  580  {
 581  581          CLIENT *cbch = NULL;
 582  582          uint32_t zilch = 0;
 583  583  
 584  584          mutex_enter(cbp->cb_lock);
 585  585  
 586  586          if (cbp->cb_chc_free) {
 587  587                  cbp->cb_chc_free--;
 588  588                  cbch = cbp->cb_chc[ cbp->cb_chc_free ];
 589  589                  mutex_exit(cbp->cb_lock);
 590  590                  (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
 591  591                  return (cbch);
 592  592          }
 593  593  
 594  594          mutex_exit(cbp->cb_lock);
 595  595  
 596  596          /* none free so make it now */
 597  597          cbch = rfs4_cbch_init(cbp);
 598  598  
 599  599          return (cbch);
 600  600  }
 601  601  
 602  602  /*
 603  603   * Return the client handle to the small cache or
 604  604   * destroy it.
 605  605   */
 606  606  static void
 607  607  rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
 608  608  {
 609  609          if (lockheld == FALSE)
 610  610                  mutex_enter(cbp->cb_lock);
 611  611  
 612  612          if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
 613  613                  cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
 614  614                  if (lockheld == FALSE)
 615  615                          mutex_exit(cbp->cb_lock);
 616  616                  return;
 617  617          }
 618  618          if (lockheld == FALSE)
 619  619                  mutex_exit(cbp->cb_lock);
 620  620  
 621  621          /*
 622  622           * cache maxed out of free entries, obliterate
 623  623           * this client handle, destroy it, throw it away.
 624  624           */
 625  625          if (ch->cl_auth)
 626  626                  auth_destroy(ch->cl_auth);
 627  627          clnt_destroy(ch);
 628  628  }
 629  629  
 630  630  /*
 631  631   * With the supplied callback information - initialize the client
 632  632   * callback data.  If there is a callback in progress, save the
 633  633   * callback info so that a thread can pick it up in the future.
 634  634   */
 635  635  void
 636  636  rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
 637  637  {
 638  638          char *addr = NULL;
 639  639          char *netid = NULL;
 640  640          rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 641  641          size_t len;
 642  642  
 643  643          /* Set the call back for the client */
 644  644          if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
 645  645              cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
 646  646                  len = strlen(cb->cb_location.r_addr) + 1;
 647  647                  addr = kmem_alloc(len, KM_SLEEP);
 648  648                  bcopy(cb->cb_location.r_addr, addr, len);
 649  649                  len = strlen(cb->cb_location.r_netid) + 1;
 650  650                  netid = kmem_alloc(len, KM_SLEEP);
 651  651                  bcopy(cb->cb_location.r_netid, netid, len);
 652  652          }
 653  653          /* ready to save the new information but first free old, if exists */
 654  654          mutex_enter(cbp->cb_lock);
 655  655  
 656  656          cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
 657  657  
 658  658          if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
 659  659                  kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
 660  660                      strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
 661  661          cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
 662  662  
 663  663          if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
 664  664                  kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
 665  665                      strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
 666  666          cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
 667  667  
 668  668          cbp->cb_newer.cb_ident = cb_ident;
 669  669  
 670  670          if (addr && *addr && netid && *netid) {
 671  671                  cbp->cb_newer.cb_new = TRUE;
 672  672                  cbp->cb_newer.cb_confirmed = FALSE;
 673  673          } else {
 674  674                  cbp->cb_newer.cb_new = FALSE;
 675  675                  cbp->cb_newer.cb_confirmed = FALSE;
 676  676          }
 677  677  
 678  678          mutex_exit(cbp->cb_lock);
 679  679  }
 680  680  
 681  681  /*
 682  682   * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
 683  683   * information may have been provided on SETCLIENTID and this call
 684  684   * marks that information as confirmed and then starts a thread to
 685  685   * test the callback path.
 686  686   */
 687  687  void
 688  688  rfs4_deleg_cb_check(rfs4_client_t *cp)
 689  689  {
 690  690          if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
 691  691                  return;
 692  692  
 693  693          cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
 694  694  
 695  695          rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
 696  696  
 697  697          (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0,
 698  698              minclsyspri);
 699  699  }
 700  700  
 701  701  static void
 702  702  rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
 703  703  {
 704  704          CB_RECALL4args  *rec_argp;
 705  705  
 706  706          rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 707  707          if (rec_argp->fh.nfs_fh4_val)
 708  708                  kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
 709  709  }
 710  710  
 711  711  /* ARGSUSED */
 712  712  static void
 713  713  rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
 714  714  {
 715  715          CB_GETATTR4args *argp;
 716  716  
 717  717          argp = &argop->nfs_cb_argop4_u.opcbgetattr;
 718  718          if (argp->fh.nfs_fh4_val)
 719  719                  kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
 720  720  }
 721  721  
 722  722  static void
 723  723  rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
 724  724  {
 725  725          int i, arglen;
 726  726          nfs_cb_argop4 *argop;
 727  727  
 728  728          /*
 729  729           * First free any special args alloc'd for specific ops.
 730  730           */
 731  731          arglen = args->array_len;
 732  732          argop = args->array;
 733  733          for (i = 0; i < arglen; i++, argop++) {
 734  734  
 735  735                  switch (argop->argop) {
 736  736                  case OP_CB_RECALL:
 737  737                          rfs4args_cb_recall_free(argop);
 738  738                          break;
 739  739  
 740  740                  case OP_CB_GETATTR:
 741  741                          rfs4args_cb_getattr_free(argop);
 742  742                          break;
 743  743  
 744  744                  default:
 745  745                          return;
 746  746                  }
 747  747          }
 748  748  
 749  749          if (args->tag.utf8string_len > 0)
 750  750                  UTF8STRING_FREE(args->tag)
 751  751  
 752  752          kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
 753  753          if (resp)
 754  754                  xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
 755  755  }
 756  756  
 757  757  /*
 758  758   * General callback routine for the server to the client.
 759  759   */
 760  760  static enum clnt_stat
 761  761  rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
 762  762      CB_COMPOUND4res *res, struct timeval timeout)
 763  763  {
 764  764          rfs4_cbinfo_t *cbp;
 765  765          CLIENT *ch;
 766  766          /* start with this in case cb_getch() fails */
 767  767          enum clnt_stat  stat = RPC_FAILED;
 768  768  
 769  769          res->tag.utf8string_val = NULL;
 770  770          res->array = NULL;
 771  771  
 772  772  retry:
 773  773          cbp = rfs4_cbinfo_hold(cp);
 774  774          if (cbp == NULL)
 775  775                  return (stat);
 776  776  
 777  777          /* get a client handle */
 778  778          if ((ch = rfs4_cb_getch(cbp)) != NULL) {
 779  779                  /*
 780  780                   * reset the cb_ident since it may have changed in
 781  781                   * rfs4_cbinfo_hold()
 782  782                   */
 783  783                  args->callback_ident = cbp->cb_ident;
 784  784  
 785  785                  stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
 786  786                      (caddr_t)args, xdr_CB_COMPOUND4res,
 787  787                      (caddr_t)res, timeout);
 788  788  
 789  789                  /* free client handle */
 790  790                  rfs4_cb_freech(cbp, ch, FALSE);
 791  791          }
 792  792  
 793  793          /*
 794  794           * If the rele says that there may be new callback info then
 795  795           * retry this sequence and it may succeed as a result of the
 796  796           * new callback path
 797  797           */
 798  798          if (rfs4_cbinfo_rele(cbp,
 799  799              (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
 800  800                  goto retry;
 801  801  
 802  802          return (stat);
 803  803  }
 804  804  
 805  805  /*
 806  806   * Used by the NFSv4 server to get attributes for a file while
 807  807   * handling the case where a file has been write delegated.  For the
 808  808   * time being, VOP_GETATTR() is called and CB_GETATTR processing is
 809  809   * not undertaken.  This call site is maintained in case the server is
 810  810   * updated in the future to handle write delegation space guarantees.
 811  811   */
 812  812  nfsstat4
 813  813  rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 814  814  {
 815  815  
 816  816          int error;
 817  817  
 818  818          error = VOP_GETATTR(vp, vap, flag, cr, NULL);
 819  819          return (puterrno4(error));
 820  820  }
 821  821  
 822  822  /*
 823  823   * This is used everywhere in the v2/v3 server to allow the
 824  824   * integration of all NFS versions and the support of delegation.  For
 825  825   * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
 826  826   * in the future to provide space guarantees for write delegations
 827  827   * then this call site should be expanded to interact with the client.
 828  828   */
 829  829  int
 830  830  rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 831  831  {
 832  832          return (VOP_GETATTR(vp, vap, flag, cr, NULL));
 833  833  }
 834  834  
 835  835  /*
 836  836   * Place the actual cb_recall otw call to client.
 837  837   */
 838  838  static void
 839  839  rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
 840  840  {
 841  841          CB_COMPOUND4args        cb4_args;
 842  842          CB_COMPOUND4res         cb4_res;
 843  843          CB_RECALL4args          *rec_argp;
 844  844          CB_RECALL4res           *rec_resp;
 845  845          nfs_cb_argop4           *argop;
 846  846          int                     numops;
 847  847          int                     argoplist_size;
 848  848          struct timeval          timeout;
 849  849          nfs_fh4                 *fhp;
 850  850          enum clnt_stat          call_stat;
 851  851  
 852  852          /*
 853  853           * set up the compound args
 854  854           */
 855  855          numops = 1;     /* CB_RECALL only */
 856  856  
 857  857          argoplist_size = numops * sizeof (nfs_cb_argop4);
 858  858          argop = kmem_zalloc(argoplist_size, KM_SLEEP);
 859  859          argop->argop = OP_CB_RECALL;
 860  860          rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 861  861  
 862  862          (void) str_to_utf8("cb_recall", &cb4_args.tag);
 863  863          cb4_args.minorversion = CB4_MINORVERSION;
 864  864          /* cb4_args.callback_ident is set in rfs4_do_callback() */
 865  865          cb4_args.array_len = numops;
 866  866          cb4_args.array = argop;
 867  867  
 868  868          /*
 869  869           * fill in the args struct
 870  870           */
 871  871          bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
 872  872          rec_argp->truncate = trunc;
 873  873  
 874  874          fhp = &dsp->rds_finfo->rf_filehandle;
 875  875          rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
 876  876              fhp->nfs_fh4_len, KM_SLEEP);
 877  877          nfs_fh4_copy(fhp, &rec_argp->fh);
 878  878  
 879  879          /* Keep track of when we did this for observability */
 880  880          dsp->rds_time_recalled = gethrestime_sec();
 881  881  
 882  882          /*
 883  883           * Set up the timeout for the callback and make the actual call.
 884  884           * Timeout will be 80% of the lease period for this server.
 885  885           */
 886  886          timeout.tv_sec = (rfs4_lease_time * 80) / 100;
 887  887          timeout.tv_usec = 0;
 888  888  
 889  889          DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
 890  890              rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
 891  891  
 892  892          call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
 893  893              timeout);
 894  894  
 895  895          rec_resp = (cb4_res.array_len == 0) ? NULL :
 896  896              &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
 897  897  
 898  898          DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
 899  899              rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
 900  900  
 901  901          if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
 902  902                  rfs4_return_deleg(dsp, TRUE);
 903  903          }
 904  904  
 905  905          rfs4freeargres(&cb4_args, &cb4_res);
 906  906  }
 907  907  
 908  908  struct recall_arg {
 909  909          rfs4_deleg_state_t *dsp;
 910  910          void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
 911  911          bool_t trunc;
 912  912  };
 913  913  
 914  914  static void
 915  915  do_recall(struct recall_arg *arg)
 916  916  {
 917  917          rfs4_deleg_state_t *dsp = arg->dsp;
 918  918          rfs4_file_t *fp = dsp->rds_finfo;
 919  919          callb_cpr_t cpr_info;
 920  920          kmutex_t cpr_lock;
 921  921  
 922  922          mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 923  923          CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
 924  924  
 925  925          /*
 926  926           * It is possible that before this thread starts
 927  927           * the client has send us a return_delegation, and
 928  928           * if that is the case we do not need to send the
 929  929           * recall callback.
 930  930           */
 931  931          if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
 932  932                  DTRACE_PROBE3(nfss__i__recall,
 933  933                      struct recall_arg *, arg,
 934  934                      struct rfs4_deleg_state_t *, dsp,
 935  935                      struct rfs4_file_t *, fp);
 936  936  
 937  937                  if (arg->recall)
 938  938                          (void) (*arg->recall)(dsp, arg->trunc);
 939  939          }
 940  940  
 941  941          mutex_enter(fp->rf_dinfo.rd_recall_lock);
 942  942          /*
 943  943           * Recall count may go negative if the parent thread that is
 944  944           * creating the individual callback threads does not modify
 945  945           * the recall_count field before the callback thread actually
 946  946           * gets a response from the CB_RECALL
 947  947           */
 948  948          fp->rf_dinfo.rd_recall_count--;
 949  949          if (fp->rf_dinfo.rd_recall_count == 0)
 950  950                  cv_signal(fp->rf_dinfo.rd_recall_cv);
 951  951          mutex_exit(fp->rf_dinfo.rd_recall_lock);
 952  952  
 953  953          mutex_enter(&cpr_lock);
 954  954          CALLB_CPR_EXIT(&cpr_info);
 955  955          mutex_destroy(&cpr_lock);
 956  956  
 957  957          rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
 958  958          kmem_free(arg, sizeof (struct recall_arg));
 959  959          zthread_exit();
 960  960  }
 961  961  
 962  962  struct master_recall_args {
 963  963      rfs4_file_t *fp;
 964  964      void (*recall)(rfs4_deleg_state_t *, bool_t);
 965  965      bool_t trunc;
 966  966  };
 967  967  
 968  968  static void
 969  969  do_recall_file(struct master_recall_args *map)
 970  970  {
 971  971          rfs4_file_t *fp = map->fp;
 972  972          rfs4_deleg_state_t *dsp;
 973  973          struct recall_arg *arg;
 974  974          callb_cpr_t cpr_info;
 975  975          kmutex_t cpr_lock;
 976  976          int32_t recall_count;
 977  977  
 978  978          rfs4_dbe_lock(fp->rf_dbe);
 979  979  
 980  980          /* Recall already in progress ? */
 981  981          mutex_enter(fp->rf_dinfo.rd_recall_lock);
 982  982          if (fp->rf_dinfo.rd_recall_count != 0) {
 983  983                  mutex_exit(fp->rf_dinfo.rd_recall_lock);
 984  984                  rfs4_dbe_rele_nolock(fp->rf_dbe);
 985  985                  rfs4_dbe_unlock(fp->rf_dbe);
 986  986                  kmem_free(map, sizeof (struct master_recall_args));
 987  987                  zthread_exit();
 988  988          }
 989  989  
 990  990          mutex_exit(fp->rf_dinfo.rd_recall_lock);
 991  991  
 992  992          mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 993  993          CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
 994  994  
 995  995          recall_count = 0;
 996  996          for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
 997  997              dsp = list_next(&fp->rf_delegstatelist, dsp)) {
 998  998  
 999  999                  rfs4_dbe_lock(dsp->rds_dbe);
1000 1000                  /*
1001 1001                   * if this delegation state
1002 1002                   * is being reaped skip it
1003 1003                   */
1004 1004                  if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1005 1005                          rfs4_dbe_unlock(dsp->rds_dbe);
1006 1006                          continue;
1007 1007                  }
1008 1008  
1009 1009                  /* hold for receiving thread */
1010 1010                  rfs4_dbe_hold(dsp->rds_dbe);
1011 1011                  rfs4_dbe_unlock(dsp->rds_dbe);
1012 1012  
1013 1013                  arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1014 1014                  arg->recall = map->recall;
1015 1015                  arg->trunc = map->trunc;
1016 1016                  arg->dsp = dsp;
1017 1017  
1018 1018                  recall_count++;
1019 1019  
1020 1020                  (void) zthread_create(NULL, 0, do_recall, arg, 0,
1021 1021                      minclsyspri);
1022 1022          }
1023 1023  
1024 1024          rfs4_dbe_unlock(fp->rf_dbe);
1025 1025  
1026 1026          mutex_enter(fp->rf_dinfo.rd_recall_lock);
1027 1027          /*
1028 1028           * Recall count may go negative if the parent thread that is
1029 1029           * creating the individual callback threads does not modify
1030 1030           * the recall_count field before the callback thread actually
1031 1031           * gets a response from the CB_RECALL
1032 1032           */
1033 1033          fp->rf_dinfo.rd_recall_count += recall_count;
1034 1034          while (fp->rf_dinfo.rd_recall_count)
1035 1035                  cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1036 1036  
1037 1037          mutex_exit(fp->rf_dinfo.rd_recall_lock);
1038 1038  
1039 1039          DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1040 1040          rfs4_file_rele(fp);
1041 1041          kmem_free(map, sizeof (struct master_recall_args));
1042 1042          mutex_enter(&cpr_lock);
1043 1043          CALLB_CPR_EXIT(&cpr_info);
1044 1044          mutex_destroy(&cpr_lock);
1045 1045          zthread_exit();
1046 1046  }
1047 1047  
1048 1048  static void
1049 1049  rfs4_recall_file(rfs4_file_t *fp,
1050 1050      void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1051 1051      bool_t trunc, rfs4_client_t *cp)
1052 1052  {
1053 1053          struct master_recall_args *args;
1054 1054  
1055 1055          rfs4_dbe_lock(fp->rf_dbe);
1056 1056          if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1057 1057                  rfs4_dbe_unlock(fp->rf_dbe);
1058 1058                  return;
1059 1059          }
1060 1060          rfs4_dbe_hold(fp->rf_dbe);      /* hold for new thread */
1061 1061  
1062 1062          /*
1063 1063           * Mark the time we started the recall processing.
1064 1064           * If it has been previously recalled, do not reset the
1065 1065           * timer since this is used for the revocation decision.
1066 1066           */
1067 1067          if (fp->rf_dinfo.rd_time_recalled == 0)
1068 1068                  fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1069 1069          fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1070 1070          /* Client causing recall not always available */
1071 1071          if (cp)
1072 1072                  fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1073 1073  
1074 1074          rfs4_dbe_unlock(fp->rf_dbe);
1075 1075  
1076 1076          args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1077 1077          args->fp = fp;
1078 1078          args->recall = recall;
1079 1079          args->trunc = trunc;
1080 1080  
1081 1081          (void) zthread_create(NULL, 0, do_recall_file, args, 0,
1082 1082              minclsyspri);
1083 1083  }
1084 1084  
1085 1085  void
1086 1086  rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1087 1087  {
1088 1088          time_t elapsed1, elapsed2;
1089 1089  
1090 1090          if (fp->rf_dinfo.rd_time_recalled != 0) {
1091 1091                  elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1092 1092                  elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1093 1093                  /* First check to see if a revocation should occur */
1094 1094                  if (elapsed1 > rfs4_lease_time &&
1095 1095                      elapsed2 > rfs4_lease_time) {
1096 1096                          rfs4_revoke_file(fp);
1097 1097                          return;
1098 1098                  }
1099 1099                  /*
1100 1100                   * Next check to see if a recall should be done again
1101 1101                   * so quickly.
1102 1102                   */
1103 1103                  if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1104 1104                          return;
1105 1105          }
1106 1106          rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1107 1107  }
1108 1108  
1109 1109  /*
1110 1110   * rfs4_check_recall is called from rfs4_do_open to determine if the current
1111 1111   * open conflicts with the delegation.
1112 1112   * Return true if we need recall otherwise false.
1113 1113   * Assumes entry locks for sp and sp->rs_finfo are held.
1114 1114   */
1115 1115  bool_t
1116 1116  rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1117 1117  {
1118 1118          open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1119 1119  
1120 1120          switch (dtype) {
1121 1121          case OPEN_DELEGATE_NONE:
1122 1122                  /* Not currently delegated so there is nothing to do */
1123 1123                  return (FALSE);
1124 1124          case OPEN_DELEGATE_READ:
1125 1125                  /*
1126 1126                   * If the access is only asking for READ then there is
1127 1127                   * no conflict and nothing to do.  If it is asking
1128 1128                   * for write, then there will be conflict and the read
1129 1129                   * delegation should be recalled.
1130 1130                   */
1131 1131                  if (access == OPEN4_SHARE_ACCESS_READ)
1132 1132                          return (FALSE);
1133 1133                  else
1134 1134                          return (TRUE);
1135 1135          case OPEN_DELEGATE_WRITE:
1136 1136                  /* Check to see if this client has the delegation */
1137 1137                  return (rfs4_is_deleg(sp));
1138 1138          }
1139 1139  
1140 1140          return (FALSE);
1141 1141  }
1142 1142  
1143 1143  /*
1144 1144   * Return the "best" allowable delegation available given the current
1145 1145   * delegation type and the desired access and deny modes on the file.
1146 1146   * At the point that this routine is called we know that the access and
1147 1147   * deny modes are consistent with the file modes.
1148 1148   */
1149 1149  static open_delegation_type4
1150 1150  rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1151 1151  {
1152 1152          open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1153 1153          uint32_t access = sp->rs_share_access;
1154 1154          uint32_t deny = sp->rs_share_deny;
1155 1155          int readcnt = 0;
1156 1156          int writecnt = 0;
1157 1157  
1158 1158          switch (dtype) {
1159 1159          case OPEN_DELEGATE_NONE:
1160 1160                  /*
1161 1161                   * Determine if more than just this OPEN have the file
1162 1162                   * open and if so, no delegation may be provided to
1163 1163                   * the client.
1164 1164                   */
1165 1165                  if (access & OPEN4_SHARE_ACCESS_WRITE)
1166 1166                          writecnt++;
1167 1167                  if (access & OPEN4_SHARE_ACCESS_READ)
1168 1168                          readcnt++;
1169 1169  
1170 1170                  if (fp->rf_access_read > readcnt ||
1171 1171                      fp->rf_access_write > writecnt)
1172 1172                          return (OPEN_DELEGATE_NONE);
1173 1173  
1174 1174                  /*
1175 1175                   * If the client is going to write, or if the client
1176 1176                   * has exclusive access, return a write delegation.
1177 1177                   */
1178 1178                  if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1179 1179                      (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1180 1180                          return (OPEN_DELEGATE_WRITE);
1181 1181                  /*
1182 1182                   * If we don't want to write or we've haven't denied read
1183 1183                   * access to others, return a read delegation.
1184 1184                   */
1185 1185                  if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1186 1186                      (deny & ~OPEN4_SHARE_DENY_READ))
1187 1187                          return (OPEN_DELEGATE_READ);
1188 1188  
1189 1189                  /* Shouldn't get here */
1190 1190                  return (OPEN_DELEGATE_NONE);
1191 1191  
1192 1192          case OPEN_DELEGATE_READ:
1193 1193                  /*
1194 1194                   * If the file is delegated for read but we wan't to
1195 1195                   * write or deny others to read then we can't delegate
1196 1196                   * the file. We shouldn't get here since the delegation should
1197 1197                   * have been recalled already.
1198 1198                   */
1199 1199                  if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1200 1200                      (deny & OPEN4_SHARE_DENY_READ))
1201 1201                          return (OPEN_DELEGATE_NONE);
1202 1202                  return (OPEN_DELEGATE_READ);
1203 1203  
1204 1204          case OPEN_DELEGATE_WRITE:
1205 1205                  return (OPEN_DELEGATE_WRITE);
1206 1206          }
1207 1207  
1208 1208          /* Shouldn't get here */
1209 1209          return (OPEN_DELEGATE_NONE);
1210 1210  }
1211 1211  
1212 1212  /*
1213 1213   * Given the desired delegation type and the "history" of the file
1214 1214   * determine the actual delegation type to return.
1215 1215   */
1216 1216  static open_delegation_type4
1217 1217  rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype,
1218 1218      rfs4_dinfo_t *dinfo, clientid4 cid)
1219 1219  {
1220 1220          time_t elapsed;
1221 1221  
1222 1222          if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1223 1223                  return (OPEN_DELEGATE_NONE);
1224 1224  
1225 1225          /*
1226 1226           * Has this file/delegation ever been recalled?  If not then
1227 1227           * no further checks for a delegation race need to be done.
1228 1228           * However if a recall has occurred, then check to see if a
1229 1229           * client has caused its own delegation recall to occur.  If
1230 1230           * not, then has a delegation for this file been returned
1231 1231           * recently?  If so, then do not assign a new delegation to
1232 1232           * avoid a "delegation race" between the original client and
1233 1233           * the new/conflicting client.
1234 1234           */
1235 1235          if (dinfo->rd_ever_recalled == TRUE) {
1236 1236                  if (dinfo->rd_conflicted_client != cid) {
1237 1237                          elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1238 1238                          if (elapsed < rfs4_lease_time)
1239 1239                                  return (OPEN_DELEGATE_NONE);
1240 1240                  }
1241 1241          }
1242 1242  
1243 1243          /* Limit the number of read grants */
1244 1244          if (dtype == OPEN_DELEGATE_READ &&
1245 1245              dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1246 1246                  return (OPEN_DELEGATE_NONE);
1247 1247  
1248 1248          /*
1249 1249           * Should consider limiting total number of read/write
1250 1250           * delegations the server will permit.
1251 1251           */
1252 1252  
1253 1253          return (dtype);
1254 1254  }
1255 1255  
1256 1256  /*
1257 1257   * Try and grant a delegation for an open give the state. The routine
1258 1258   * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1259 1259   *
1260 1260   * The state and associate file entry must be locked
1261 1261   */
1262 1262  rfs4_deleg_state_t *
  
    | 
      ↓ open down ↓ | 
    1089 lines elided | 
    
      ↑ open up ↑ | 
  
1263 1263  rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1264 1264  {
1265 1265          nfs4_srv_t *nsrv4;
1266 1266          rfs4_file_t *fp = sp->rs_finfo;
1267 1267          open_delegation_type4 dtype;
1268 1268          int no_delegation;
1269 1269  
1270 1270          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1271 1271          ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1272 1272  
1273      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     1273 +        nsrv4 = nfs4_get_srv();
1274 1274  
1275 1275          /* Is the server even providing delegations? */
1276 1276          if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1277 1277                  return (NULL);
1278 1278  
1279 1279          /* Check to see if delegations have been temporarily disabled */
1280 1280          mutex_enter(&nsrv4->deleg_lock);
1281 1281          no_delegation = rfs4_deleg_disabled;
1282 1282          mutex_exit(&nsrv4->deleg_lock);
1283 1283  
1284 1284          if (no_delegation)
1285 1285                  return (NULL);
1286 1286  
1287 1287          /* Don't grant a delegation if a deletion is impending. */
1288 1288          if (fp->rf_dinfo.rd_hold_grant > 0) {
1289 1289                  return (NULL);
1290 1290          }
1291 1291  
1292 1292          /*
1293 1293           * Don't grant a delegation if there are any lock manager
1294 1294           * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1295 1295           * if there are only read locks we should be able to grant a
1296 1296           * read-only delegation), but it's good enough for now.
1297 1297           *
1298 1298           * MT safety: the lock manager checks for conflicting delegations
1299 1299           * before processing a lock request.  That check will block until
1300 1300           * we are done here.  So if the lock manager acquires a lock after
1301 1301           * we decide to grant the delegation, the delegation will get
1302 1302           * immediately recalled (if there's a conflict), so we're safe.
1303 1303           */
1304 1304          if (lm_vp_active(fp->rf_vp)) {
1305 1305                  return (NULL);
1306 1306          }
1307 1307  
1308 1308          /*
1309 1309           * Based on the type of delegation request passed in, take the
1310 1310           * appropriate action (DELEG_NONE is handled above)
1311 1311           */
1312 1312          switch (dreq) {
1313 1313  
1314 1314          case DELEG_READ:
1315 1315          case DELEG_WRITE:
1316 1316                  /*
1317 1317                   * The server "must" grant the delegation in this case.
1318 1318                   * Client is using open previous
1319 1319                   */
1320 1320                  dtype = (open_delegation_type4)dreq;
1321 1321                  *recall = 1;
1322 1322                  break;
1323 1323          case DELEG_ANY:
1324 1324                  /*
1325 1325                   * If a valid callback path does not exist, no delegation may
1326 1326                   * be granted.
1327 1327                   */
1328 1328                  if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1329 1329                          return (NULL);
1330 1330  
1331 1331                  /*
1332 1332                   * If the original operation which caused time_rm_delayed
1333 1333                   * to be set hasn't been retried and completed for one
1334 1334                   * full lease period, clear it and allow delegations to
1335 1335                   * get granted again.
1336 1336                   */
1337 1337                  if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1338 1338                      gethrestime_sec() >
1339 1339                      fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1340 1340                          fp->rf_dinfo.rd_time_rm_delayed = 0;
1341 1341  
1342 1342                  /*
1343 1343                   * If we are waiting for a delegation to be returned then
1344 1344                   * don't delegate this file. We do this for correctness as
1345 1345                   * well as if the file is being recalled we would likely
1346 1346                   * recall this file again.
1347 1347                   */
1348 1348  
1349 1349                  if (fp->rf_dinfo.rd_time_recalled != 0 ||
1350 1350                      fp->rf_dinfo.rd_time_rm_delayed != 0)
1351 1351                          return (NULL);
1352 1352  
1353 1353                  /* Get the "best" delegation candidate */
1354 1354                  dtype = rfs4_check_delegation(sp, fp);
1355 1355  
1356 1356                  if (dtype == OPEN_DELEGATE_NONE)
1357 1357                          return (NULL);
1358 1358  
1359 1359                  /*
1360 1360                   * Based on policy and the history of the file get the
1361 1361                   * actual delegation.
1362 1362                   */
1363 1363                  dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo,
1364 1364                      sp->rs_owner->ro_client->rc_clientid);
1365 1365  
1366 1366                  if (dtype == OPEN_DELEGATE_NONE)
1367 1367                          return (NULL);
1368 1368                  break;
1369 1369          default:
1370 1370                  return (NULL);
1371 1371          }
1372 1372  
1373 1373          /* set the delegation for the state */
1374 1374          return (rfs4_deleg_state(sp, dtype, recall));
1375 1375  }
1376 1376  
1377 1377  void
1378 1378  rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1379 1379      nfsace4 *ace,  int recall)
1380 1380  {
1381 1381          open_write_delegation4 *wp;
1382 1382          open_read_delegation4 *rp;
1383 1383          nfs_space_limit4 *spl;
1384 1384          nfsace4 nace;
1385 1385  
1386 1386          /*
1387 1387           * We need to allocate a new copy of the who string.
1388 1388           * this string will be freed by the rfs4_op_open dis_resfree
1389 1389           * routine. We need to do this allocation since replays will
1390 1390           * be allocated and rfs4_compound can't tell the difference from
1391 1391           * a replay and an inital open. N.B. if an ace is passed in, it
1392 1392           * the caller's responsibility to free it.
1393 1393           */
1394 1394  
1395 1395          if (ace == NULL) {
1396 1396                  /*
1397 1397                   * Default is to deny all access, the client will have
1398 1398                   * to contact the server.  XXX Do we want to actually
1399 1399                   * set a deny for every one, or do we simply want to
1400 1400                   * construct an entity that will match no one?
1401 1401                   */
1402 1402                  nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1403 1403                  nace.flag = 0;
1404 1404                  nace.access_mask = ACE4_VALID_MASK_BITS;
1405 1405                  (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1406 1406          } else {
1407 1407                  nace.type = ace->type;
1408 1408                  nace.flag = ace->flag;
1409 1409                  nace.access_mask = ace->access_mask;
1410 1410                  (void) utf8_copy(&ace->who, &nace.who);
1411 1411          }
1412 1412  
1413 1413          dp->delegation_type = dsp->rds_dtype;
1414 1414  
1415 1415          switch (dsp->rds_dtype) {
1416 1416          case OPEN_DELEGATE_NONE:
1417 1417                  break;
1418 1418          case OPEN_DELEGATE_READ:
1419 1419                  rp = &dp->open_delegation4_u.read;
1420 1420                  rp->stateid = dsp->rds_delegid.stateid;
1421 1421                  rp->recall = (bool_t)recall;
1422 1422                  rp->permissions = nace;
1423 1423                  break;
1424 1424          case OPEN_DELEGATE_WRITE:
1425 1425                  wp = &dp->open_delegation4_u.write;
1426 1426                  wp->stateid = dsp->rds_delegid.stateid;
1427 1427                  wp->recall = (bool_t)recall;
1428 1428                  spl = &wp->space_limit;
1429 1429                  spl->limitby = NFS_LIMIT_SIZE;
1430 1430                  spl->nfs_space_limit4_u.filesize = 0;
1431 1431                  wp->permissions = nace;
1432 1432                  break;
1433 1433          }
1434 1434  }
1435 1435  
1436 1436  /*
1437 1437   * Check if the file is delegated via the provided file struct.
1438 1438   * Return TRUE if it is delegated.  This is intended for use by
1439 1439   * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1440 1440   *
1441 1441   * Note that if the file is found to have a delegation, it is
  
    | 
      ↓ open down ↓ | 
    158 lines elided | 
    
      ↑ open up ↑ | 
  
1442 1442   * recalled, unless the clientid of the caller matches the clientid of the
1443 1443   * delegation. If the caller has specified, there is a slight delay
1444 1444   * inserted in the hopes that the delegation will be returned quickly.
1445 1445   */
1446 1446  bool_t
1447 1447  rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1448 1448      bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1449 1449  {
1450 1450          rfs4_deleg_state_t *dsp;
1451 1451  
1452      -        nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     1452 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
1453 1453  
1454 1454          /* Is delegation enabled? */
1455 1455          if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1456 1456                  return (FALSE);
1457 1457  
1458 1458          /* do we have a delegation on this file? */
1459 1459          rfs4_dbe_lock(fp->rf_dbe);
1460 1460          if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1461 1461                  if (is_rm)
1462 1462                          fp->rf_dinfo.rd_hold_grant++;
1463 1463                  rfs4_dbe_unlock(fp->rf_dbe);
1464 1464                  return (FALSE);
1465 1465          }
1466 1466          /*
1467 1467           * do we have a write delegation on this file or are we
1468 1468           * requesting write access to a file with any type of existing
1469 1469           * delegation?
1470 1470           */
1471 1471          if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1472 1472                  if (cp != NULL) {
1473 1473                          dsp = list_head(&fp->rf_delegstatelist);
1474 1474                          if (dsp == NULL) {
1475 1475                                  rfs4_dbe_unlock(fp->rf_dbe);
1476 1476                                  return (FALSE);
1477 1477                          }
1478 1478                          /*
1479 1479                           * Does the requestor already own the delegation?
1480 1480                           */
1481 1481                          if (dsp->rds_client->rc_clientid == *(cp)) {
1482 1482                                  rfs4_dbe_unlock(fp->rf_dbe);
1483 1483                                  return (FALSE);
1484 1484                          }
1485 1485                  }
1486 1486  
1487 1487                  rfs4_dbe_unlock(fp->rf_dbe);
1488 1488                  rfs4_recall_deleg(fp, trunc, NULL);
1489 1489  
1490 1490                  if (!do_delay) {
1491 1491                          rfs4_dbe_lock(fp->rf_dbe);
1492 1492                          fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1493 1493                          rfs4_dbe_unlock(fp->rf_dbe);
1494 1494                          return (TRUE);
1495 1495                  }
1496 1496  
1497 1497                  delay(NFS4_DELEGATION_CONFLICT_DELAY);
1498 1498  
1499 1499                  rfs4_dbe_lock(fp->rf_dbe);
1500 1500                  if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1501 1501                          fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1502 1502                          rfs4_dbe_unlock(fp->rf_dbe);
1503 1503                          return (TRUE);
1504 1504                  }
1505 1505          }
1506 1506          if (is_rm)
1507 1507                  fp->rf_dinfo.rd_hold_grant++;
1508 1508          rfs4_dbe_unlock(fp->rf_dbe);
1509 1509          return (FALSE);
1510 1510  }
1511 1511  
1512 1512  /*
1513 1513   * Check if the file is delegated in the case of a v2 or v3 access.
1514 1514   * Return TRUE if it is delegated which in turn means that v2 should
  
    | 
      ↓ open down ↓ | 
    52 lines elided | 
    
      ↑ open up ↑ | 
  
1515 1515   * drop the request and in the case of v3 JUKEBOX should be returned.
1516 1516   */
1517 1517  bool_t
1518 1518  rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1519 1519  {
1520 1520          nfs4_srv_t *nsrv4;
1521 1521          rfs4_file_t *fp;
1522 1522          bool_t create = FALSE;
1523 1523          bool_t rc = FALSE;
1524 1524  
1525      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     1525 +        nsrv4 = nfs4_get_srv();
1526 1526          rfs4_hold_deleg_policy(nsrv4);
1527 1527  
1528 1528          /* Is delegation enabled? */
1529 1529          if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1530 1530                  fp = rfs4_findfile(vp, NULL, &create);
1531 1531                  if (fp != NULL) {
1532 1532                          if (rfs4_check_delegated_byfp(mode, fp, trunc,
1533 1533                              TRUE, FALSE, NULL)) {
1534 1534                                  rc = TRUE;
1535 1535                          }
1536 1536                          rfs4_file_rele(fp);
1537 1537                  }
1538 1538          }
1539 1539          rfs4_rele_deleg_policy(nsrv4);
1540 1540          return (rc);
  
    | 
      ↓ open down ↓ | 
    5 lines elided | 
    
      ↑ open up ↑ | 
  
1541 1541  }
1542 1542  
1543 1543  /*
1544 1544   * Release a hold on the hold_grant counter which
1545 1545   * prevents delegation from being granted while a remove
1546 1546   * or a rename is in progress.
1547 1547   */
1548 1548  void
1549 1549  rfs4_clear_dont_grant(rfs4_file_t *fp)
1550 1550  {
1551      -        nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     1551 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
1552 1552  
1553 1553          if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1554 1554                  return;
1555 1555          rfs4_dbe_lock(fp->rf_dbe);
1556 1556          ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1557 1557          fp->rf_dinfo.rd_hold_grant--;
1558 1558          fp->rf_dinfo.rd_time_rm_delayed = 0;
1559 1559          rfs4_dbe_unlock(fp->rf_dbe);
1560 1560  }
1561 1561  
1562 1562  /*
1563 1563   * State support for delegation.
1564 1564   * Set the state delegation type for this state;
1565 1565   * This routine is called from open via rfs4_grant_delegation and the entry
1566 1566   * locks on sp and sp->rs_finfo are assumed.
1567 1567   */
1568 1568  static rfs4_deleg_state_t *
1569 1569  rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1570 1570  {
1571 1571          rfs4_file_t *fp = sp->rs_finfo;
1572 1572          bool_t create = TRUE;
1573 1573          rfs4_deleg_state_t *dsp;
1574 1574          vnode_t *vp;
1575 1575          int open_prev = *recall;
1576 1576          int ret;
1577 1577          int fflags = 0;
1578 1578  
1579 1579          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1580 1580          ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1581 1581  
1582 1582          /* Shouldn't happen */
1583 1583          if (fp->rf_dinfo.rd_recall_count != 0 ||
1584 1584              (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1585 1585              dtype != OPEN_DELEGATE_READ)) {
1586 1586                  return (NULL);
1587 1587          }
1588 1588  
1589 1589          /* Unlock to avoid deadlock */
1590 1590          rfs4_dbe_unlock(fp->rf_dbe);
1591 1591          rfs4_dbe_unlock(sp->rs_dbe);
1592 1592  
1593 1593          dsp = rfs4_finddeleg(sp, &create);
1594 1594  
1595 1595          rfs4_dbe_lock(sp->rs_dbe);
1596 1596          rfs4_dbe_lock(fp->rf_dbe);
1597 1597  
1598 1598          if (dsp == NULL)
1599 1599                  return (NULL);
1600 1600  
1601 1601          /*
1602 1602           * It is possible that since we dropped the lock
1603 1603           * in order to call finddeleg, the rfs4_file_t
1604 1604           * was marked such that we should not grant a
1605 1605           * delegation, if so bail out.
1606 1606           */
1607 1607          if (fp->rf_dinfo.rd_hold_grant > 0) {
1608 1608                  rfs4_deleg_state_rele(dsp);
1609 1609                  return (NULL);
1610 1610          }
1611 1611  
1612 1612          if (create == FALSE) {
1613 1613                  if (sp->rs_owner->ro_client == dsp->rds_client &&
1614 1614                      dsp->rds_dtype == dtype) {
1615 1615                          return (dsp);
1616 1616                  } else {
1617 1617                          rfs4_deleg_state_rele(dsp);
1618 1618                          return (NULL);
1619 1619                  }
1620 1620          }
1621 1621  
1622 1622          /*
1623 1623           * Check that this file has not been delegated to another
1624 1624           * client
1625 1625           */
1626 1626          if (fp->rf_dinfo.rd_recall_count != 0 ||
1627 1627              fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1628 1628              (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1629 1629              dtype != OPEN_DELEGATE_READ)) {
1630 1630                  rfs4_deleg_state_rele(dsp);
1631 1631                  return (NULL);
1632 1632          }
1633 1633  
1634 1634          vp = fp->rf_vp;
1635 1635          /* vnevent_support returns 0 if file system supports vnevents */
1636 1636          if (vnevent_support(vp, NULL)) {
1637 1637                  rfs4_deleg_state_rele(dsp);
1638 1638                  return (NULL);
1639 1639          }
1640 1640  
1641 1641          /* Calculate the fflags for this OPEN. */
1642 1642          if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1643 1643                  fflags |= FREAD;
1644 1644          if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1645 1645                  fflags |= FWRITE;
1646 1646  
1647 1647          *recall = 0;
1648 1648          /*
1649 1649           * Before granting a delegation we need to know if anyone else has
1650 1650           * opened the file in a conflicting mode.  However, first we need to
1651 1651           * know how we opened the file to check the counts properly.
1652 1652           */
1653 1653          if (dtype == OPEN_DELEGATE_READ) {
1654 1654                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1655 1655                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1656 1656                      vn_is_mapped(vp, V_WRITE)) {
1657 1657                          if (open_prev) {
1658 1658                                  *recall = 1;
1659 1659                          } else {
1660 1660                                  rfs4_deleg_state_rele(dsp);
1661 1661                                  return (NULL);
1662 1662                          }
1663 1663                  }
1664 1664                  ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1665 1665                      rfs4_mon_hold, rfs4_mon_rele);
1666 1666                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1667 1667                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1668 1668                      vn_is_mapped(vp, V_WRITE)) {
1669 1669                          if (open_prev) {
1670 1670                                  *recall = 1;
1671 1671                          } else {
1672 1672                                  (void) fem_uninstall(vp, deleg_rdops,
1673 1673                                      (void *)fp);
1674 1674                                  rfs4_deleg_state_rele(dsp);
1675 1675                                  return (NULL);
1676 1676                          }
1677 1677                  }
1678 1678                  /*
1679 1679                   * Because a client can hold onto a delegation after the
1680 1680                   * file has been closed, we need to keep track of the
1681 1681                   * access to this file.  Otherwise the CIFS server would
1682 1682                   * not know about the client accessing the file and could
1683 1683                   * inappropriately grant an OPLOCK.
1684 1684                   * fem_install() returns EBUSY when asked to install a
1685 1685                   * OPUNIQ monitor more than once.  Therefore, check the
1686 1686                   * return code because we only want this done once.
1687 1687                   */
1688 1688                  if (ret == 0)
1689 1689                          vn_open_upgrade(vp, FREAD);
1690 1690          } else { /* WRITE */
1691 1691                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1692 1692                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1693 1693                      ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1694 1694                      (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1695 1695                      vn_is_mapped(vp, V_RDORWR)) {
1696 1696                          if (open_prev) {
1697 1697                                  *recall = 1;
1698 1698                          } else {
1699 1699                                  rfs4_deleg_state_rele(dsp);
1700 1700                                  return (NULL);
1701 1701                          }
1702 1702                  }
1703 1703                  ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1704 1704                      rfs4_mon_hold, rfs4_mon_rele);
1705 1705                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1706 1706                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1707 1707                      ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1708 1708                      (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1709 1709                      vn_is_mapped(vp, V_RDORWR)) {
1710 1710                          if (open_prev) {
1711 1711                                  *recall = 1;
1712 1712                          } else {
1713 1713                                  (void) fem_uninstall(vp, deleg_wrops,
1714 1714                                      (void *)fp);
1715 1715                                  rfs4_deleg_state_rele(dsp);
1716 1716                                  return (NULL);
1717 1717                          }
1718 1718                  }
1719 1719                  /*
1720 1720                   * Because a client can hold onto a delegation after the
1721 1721                   * file has been closed, we need to keep track of the
1722 1722                   * access to this file.  Otherwise the CIFS server would
1723 1723                   * not know about the client accessing the file and could
1724 1724                   * inappropriately grant an OPLOCK.
1725 1725                   * fem_install() returns EBUSY when asked to install a
1726 1726                   * OPUNIQ monitor more than once.  Therefore, check the
1727 1727                   * return code because we only want this done once.
1728 1728                   */
1729 1729                  if (ret == 0)
1730 1730                          vn_open_upgrade(vp, FREAD|FWRITE);
1731 1731          }
1732 1732          /* Place on delegation list for file */
1733 1733          ASSERT(!list_link_active(&dsp->rds_node));
1734 1734          list_insert_tail(&fp->rf_delegstatelist, dsp);
1735 1735  
1736 1736          dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1737 1737  
1738 1738          /* Update delegation stats for this file */
1739 1739          fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1740 1740  
1741 1741          /* reset since this is a new delegation */
1742 1742          fp->rf_dinfo.rd_conflicted_client = 0;
1743 1743          fp->rf_dinfo.rd_ever_recalled = FALSE;
1744 1744  
1745 1745          if (dtype == OPEN_DELEGATE_READ)
1746 1746                  fp->rf_dinfo.rd_rdgrants++;
1747 1747          else
1748 1748                  fp->rf_dinfo.rd_wrgrants++;
1749 1749  
1750 1750          return (dsp);
1751 1751  }
1752 1752  
1753 1753  /*
1754 1754   * State routine for the server when a delegation is returned.
1755 1755   */
1756 1756  void
1757 1757  rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1758 1758  {
1759 1759          rfs4_file_t *fp = dsp->rds_finfo;
1760 1760          open_delegation_type4 dtypewas;
1761 1761  
1762 1762          rfs4_dbe_lock(fp->rf_dbe);
1763 1763  
1764 1764          /* nothing to do if no longer on list */
1765 1765          if (!list_link_active(&dsp->rds_node)) {
1766 1766                  rfs4_dbe_unlock(fp->rf_dbe);
1767 1767                  return;
1768 1768          }
1769 1769  
1770 1770          /* Remove state from recall list */
1771 1771          list_remove(&fp->rf_delegstatelist, dsp);
1772 1772  
1773 1773          if (list_is_empty(&fp->rf_delegstatelist)) {
1774 1774                  dtypewas = fp->rf_dinfo.rd_dtype;
1775 1775                  fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1776 1776                  rfs4_dbe_cv_broadcast(fp->rf_dbe);
1777 1777  
1778 1778                  /* if file system was unshared, the vp will be NULL */
1779 1779                  if (fp->rf_vp != NULL) {
1780 1780                          /*
1781 1781                           * Once a delegation is no longer held by any client,
1782 1782                           * the monitor is uninstalled.  At this point, the
1783 1783                           * client must send OPEN otw, so we don't need the
1784 1784                           * reference on the vnode anymore.  The open
1785 1785                           * downgrade removes the reference put on earlier.
1786 1786                           */
1787 1787                          if (dtypewas == OPEN_DELEGATE_READ) {
1788 1788                                  (void) fem_uninstall(fp->rf_vp, deleg_rdops,
1789 1789                                      (void *)fp);
1790 1790                                  vn_open_downgrade(fp->rf_vp, FREAD);
1791 1791                          } else if (dtypewas == OPEN_DELEGATE_WRITE) {
1792 1792                                  (void) fem_uninstall(fp->rf_vp, deleg_wrops,
1793 1793                                      (void *)fp);
1794 1794                                  vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1795 1795                          }
1796 1796                  }
1797 1797          }
1798 1798  
1799 1799          switch (dsp->rds_dtype) {
1800 1800          case OPEN_DELEGATE_READ:
1801 1801                  fp->rf_dinfo.rd_rdgrants--;
1802 1802                  break;
1803 1803          case OPEN_DELEGATE_WRITE:
1804 1804                  fp->rf_dinfo.rd_wrgrants--;
1805 1805                  break;
1806 1806          default:
1807 1807                  break;
1808 1808          }
1809 1809  
1810 1810          /* used in the policy decision */
1811 1811          fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1812 1812  
1813 1813          /*
1814 1814           * reset the time_recalled field so future delegations are not
1815 1815           * accidentally revoked
1816 1816           */
1817 1817          if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1818 1818                  fp->rf_dinfo.rd_time_recalled = 0;
1819 1819  
1820 1820          rfs4_dbe_unlock(fp->rf_dbe);
1821 1821  
1822 1822          rfs4_dbe_lock(dsp->rds_dbe);
1823 1823  
1824 1824          dsp->rds_dtype = OPEN_DELEGATE_NONE;
1825 1825  
1826 1826          if (revoked == TRUE)
1827 1827                  dsp->rds_time_revoked = gethrestime_sec();
1828 1828  
1829 1829          rfs4_dbe_invalidate(dsp->rds_dbe);
1830 1830  
1831 1831          rfs4_dbe_unlock(dsp->rds_dbe);
1832 1832  
1833 1833          if (revoked == TRUE) {
1834 1834                  rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1835 1835                  dsp->rds_client->rc_deleg_revoked++;    /* observability */
1836 1836                  rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1837 1837          }
1838 1838  }
1839 1839  
1840 1840  static void
1841 1841  rfs4_revoke_file(rfs4_file_t *fp)
1842 1842  {
1843 1843          rfs4_deleg_state_t *dsp;
1844 1844  
1845 1845          /*
1846 1846           * The lock for rfs4_file_t must be held when traversing the
1847 1847           * delegation list but that lock needs to be released to call
1848 1848           * rfs4_return_deleg()
1849 1849           */
1850 1850          rfs4_dbe_lock(fp->rf_dbe);
1851 1851          while (dsp = list_head(&fp->rf_delegstatelist)) {
1852 1852                  rfs4_dbe_hold(dsp->rds_dbe);
1853 1853                  rfs4_dbe_unlock(fp->rf_dbe);
1854 1854                  rfs4_return_deleg(dsp, TRUE);
1855 1855                  rfs4_deleg_state_rele(dsp);
1856 1856                  rfs4_dbe_lock(fp->rf_dbe);
1857 1857          }
1858 1858          rfs4_dbe_unlock(fp->rf_dbe);
1859 1859  }
1860 1860  
1861 1861  /*
1862 1862   * A delegation is assumed to be present on the file associated with
1863 1863   * "sp".  Check to see if the delegation matches is associated with
1864 1864   * the same client as referenced by "sp".  If it is not, TRUE is
1865 1865   * returned.  If the delegation DOES match the client (or no
1866 1866   * delegation is present), return FALSE.
1867 1867   * Assume the state entry and file entry are locked.
1868 1868   */
1869 1869  bool_t
1870 1870  rfs4_is_deleg(rfs4_state_t *sp)
1871 1871  {
1872 1872          rfs4_deleg_state_t *dsp;
1873 1873          rfs4_file_t *fp = sp->rs_finfo;
1874 1874          rfs4_client_t *cp = sp->rs_owner->ro_client;
1875 1875  
1876 1876          ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1877 1877          for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1878 1878              dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1879 1879                  if (cp != dsp->rds_client) {
1880 1880                          return (TRUE);
  
    | 
      ↓ open down ↓ | 
    319 lines elided | 
    
      ↑ open up ↑ | 
  
1881 1881                  }
1882 1882          }
1883 1883          return (FALSE);
1884 1884  }
1885 1885  
1886 1886  void
1887 1887  rfs4_disable_delegation(void)
1888 1888  {
1889 1889          nfs4_srv_t *nsrv4;
1890 1890  
1891      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     1891 +        nsrv4 = nfs4_get_srv();
1892 1892          mutex_enter(&nsrv4->deleg_lock);
1893 1893          rfs4_deleg_disabled++;
1894 1894          mutex_exit(&nsrv4->deleg_lock);
1895 1895  }
1896 1896  
1897 1897  void
1898 1898  rfs4_enable_delegation(void)
1899 1899  {
1900 1900          nfs4_srv_t *nsrv4;
1901 1901  
1902      -        nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
     1902 +        nsrv4 = nfs4_get_srv();
1903 1903          mutex_enter(&nsrv4->deleg_lock);
1904 1904          ASSERT(rfs4_deleg_disabled > 0);
1905 1905          rfs4_deleg_disabled--;
1906 1906          mutex_exit(&nsrv4->deleg_lock);
1907 1907  }
1908 1908  
1909 1909  void
1910 1910  rfs4_mon_hold(void *arg)
1911 1911  {
1912 1912          rfs4_file_t *fp = arg;
1913 1913  
1914 1914          rfs4_dbe_hold(fp->rf_dbe);
1915 1915  }
1916 1916  
1917 1917  void
1918 1918  rfs4_mon_rele(void *arg)
1919 1919  {
1920 1920          rfs4_file_t *fp = arg;
1921 1921  
1922 1922          rfs4_dbe_rele_nolock(fp->rf_dbe);
1923 1923  }
  
    | 
      ↓ open down ↓ | 
    11 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX