Print this page
    
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25      - * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26   25   */
  27   26  
       27 +/*
       28 + * Copyright 2018 Nexenta Systems, Inc.
       29 + */
       30 +
  28   31  #include <sys/systm.h>
  29   32  #include <rpc/auth.h>
  30   33  #include <rpc/clnt.h>
  31   34  #include <nfs/nfs4_kprot.h>
  32   35  #include <nfs/nfs4.h>
  33   36  #include <nfs/lm.h>
  34   37  #include <sys/cmn_err.h>
  35   38  #include <sys/disp.h>
  36   39  #include <sys/sdt.h>
  37   40  
  38   41  #include <sys/pathname.h>
  39   42  
  40   43  #include <sys/strsubr.h>
  
    | 
      ↓ open down ↓ | 
    3 lines elided | 
    
      ↑ open up ↑ | 
  
  41   44  #include <sys/ddi.h>
  42   45  
  43   46  #include <sys/vnode.h>
  44   47  #include <sys/sdt.h>
  45   48  #include <inet/common.h>
  46   49  #include <inet/ip.h>
  47   50  #include <inet/ip6.h>
  48   51  
  49   52  #define MAX_READ_DELEGATIONS 5
  50   53  
  51      -krwlock_t rfs4_deleg_policy_lock;
  52      -srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
  53   54  static int rfs4_deleg_wlp = 5;
  54      -kmutex_t rfs4_deleg_lock;
  55   55  static int rfs4_deleg_disabled;
  56   56  static int rfs4_max_setup_cb_tries = 5;
  57   57  
  58   58  #ifdef DEBUG
  59   59  
  60   60  static int rfs4_test_cbgetattr_fail = 0;
  61   61  int rfs4_cb_null;
  62   62  int rfs4_cb_debug;
  63   63  int rfs4_deleg_debug;
  64   64  
  65   65  #endif
  66   66  
  67   67  static void rfs4_recall_file(rfs4_file_t *,
  68   68      void (*recall)(rfs4_deleg_state_t *, bool_t),
  69   69      bool_t, rfs4_client_t *);
  70   70  static  void            rfs4_revoke_file(rfs4_file_t *);
  71   71  static  void            rfs4_cb_chflush(rfs4_cbinfo_t *);
  72   72  static  CLIENT          *rfs4_cb_getch(rfs4_cbinfo_t *);
  73   73  static  void            rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
  74   74  static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
  75   75      open_delegation_type4, int *);
  76   76  
  77   77  /*
  78   78   * Convert a universal address to an transport specific
  79   79   * address using inet_pton.
  80   80   */
  81   81  static int
  82   82  uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
  83   83  {
  84   84          int dots = 0, i, j, len, k;
  85   85          unsigned char c;
  86   86          in_port_t port = 0;
  87   87  
  88   88          len = strlen(ua);
  89   89  
  90   90          for (i = len-1; i >= 0; i--) {
  91   91  
  92   92                  if (ua[i] == '.')
  93   93                          dots++;
  94   94  
  95   95                  if (dots == 2) {
  96   96  
  97   97                          ua[i] = '\0';
  98   98                          /*
  99   99                           * We use k to remember were to stick '.' back, since
 100  100                           * ua was kmem_allocateded from the pool len+1.
 101  101                           */
 102  102                          k = i;
 103  103                          if (inet_pton(af, ua, ap) == 1) {
 104  104  
 105  105                                  c = 0;
 106  106  
 107  107                                  for (j = i+1; j < len; j++) {
 108  108                                          if (ua[j] == '.') {
 109  109                                                  port = c << 8;
 110  110                                                  c = 0;
 111  111                                          } else if (ua[j] >= '0' &&
 112  112                                              ua[j] <= '9') {
 113  113                                                  c *= 10;
 114  114                                                  c += ua[j] - '0';
 115  115                                          } else {
 116  116                                                  ua[k] = '.';
 117  117                                                  return (EINVAL);
 118  118                                          }
 119  119                                  }
 120  120                                  port += c;
 121  121  
 122  122                                  *pp = htons(port);
 123  123  
 124  124                                  ua[k] = '.';
 125  125                                  return (0);
 126  126                          } else {
 127  127                                  ua[k] = '.';
 128  128                                  return (EINVAL);
 129  129                          }
 130  130                  }
  
    | 
      ↓ open down ↓ | 
    66 lines elided | 
    
      ↑ open up ↑ | 
  
 131  131          }
 132  132  
 133  133          return (EINVAL);
 134  134  }
 135  135  
 136  136  /*
 137  137   * Update the delegation policy with the
 138  138   * value of "new_policy"
 139  139   */
 140  140  void
 141      -rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
      141 +rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy)
 142  142  {
 143      -        rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
 144      -        rfs4_deleg_policy = new_policy;
 145      -        rw_exit(&rfs4_deleg_policy_lock);
      143 +        rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER);
      144 +        nsrv4->nfs4_deleg_policy = new_policy;
      145 +        rw_exit(&nsrv4->deleg_policy_lock);
 146  146  }
 147  147  
 148  148  void
 149      -rfs4_hold_deleg_policy(void)
      149 +rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4)
 150  150  {
 151      -        rw_enter(&rfs4_deleg_policy_lock, RW_READER);
      151 +        rw_enter(&nsrv4->deleg_policy_lock, RW_READER);
 152  152  }
 153  153  
 154  154  void
 155      -rfs4_rele_deleg_policy(void)
      155 +rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4)
 156  156  {
 157      -        rw_exit(&rfs4_deleg_policy_lock);
      157 +        rw_exit(&nsrv4->deleg_policy_lock);
 158  158  }
 159  159  
      160 +srv_deleg_policy_t
      161 +nfs4_get_deleg_policy()
      162 +{
      163 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
      164 +        return (nsrv4->nfs4_deleg_policy);
      165 +}
 160  166  
      167 +
 161  168  /*
 162  169   * This free function is to be used when the client struct is being
 163  170   * released and nothing at all is needed of the callback info any
 164  171   * longer.
 165  172   */
 166  173  void
 167  174  rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
 168  175  {
 169  176          char *addr = cbp->cb_callback.cb_location.r_addr;
 170  177          char *netid = cbp->cb_callback.cb_location.r_netid;
 171  178  
 172  179          /* Free old address if any */
 173  180  
 174  181          if (addr)
 175  182                  kmem_free(addr, strlen(addr) + 1);
 176  183          if (netid)
 177  184                  kmem_free(netid, strlen(netid) + 1);
 178  185  
 179  186          addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
 180  187          netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
 181  188  
 182  189          if (addr)
 183  190                  kmem_free(addr, strlen(addr) + 1);
 184  191          if (netid)
 185  192                  kmem_free(netid, strlen(netid) + 1);
 186  193  
 187  194          if (cbp->cb_chc_free) {
 188  195                  rfs4_cb_chflush(cbp);
 189  196          }
 190  197  }
 191  198  
 192  199  /*
 193  200   * The server uses this to check the callback path supplied by the
 194  201   * client.  The callback connection is marked "in progress" while this
 195  202   * work is going on and then eventually marked either OK or FAILED.
 196  203   * This work can be done as part of a separate thread and at the end
 197  204   * of this the thread will exit or it may be done such that the caller
 198  205   * will continue with other work.
 199  206   */
 200  207  static void
 201  208  rfs4_do_cb_null(rfs4_client_t *cp)
 202  209  {
  
    | 
      ↓ open down ↓ | 
    32 lines elided | 
    
      ↑ open up ↑ | 
  
 203  210          struct timeval tv;
 204  211          CLIENT *ch;
 205  212          rfs4_cbstate_t newstate;
 206  213          rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 207  214  
 208  215          mutex_enter(cbp->cb_lock);
 209  216          /* If another thread is doing CB_NULL RPC then return */
 210  217          if (cbp->cb_nullcaller == TRUE) {
 211  218                  mutex_exit(cbp->cb_lock);
 212  219                  rfs4_client_rele(cp);
 213      -                return;
      220 +                zthread_exit();
 214  221          }
 215  222  
 216  223          /* Mark the cbinfo as having a thread in the NULL callback */
 217  224          cbp->cb_nullcaller = TRUE;
 218  225  
 219  226          /*
 220  227           * Are there other threads still using the cbinfo client
 221  228           * handles?  If so, this thread must wait before going and
 222  229           * mucking aroiund with the callback information
 223  230           */
 224  231          while (cbp->cb_refcnt != 0)
 225  232                  cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
 226  233  
 227  234          /*
 228  235           * This thread itself may find that new callback info has
 229  236           * arrived and is set up to handle this case and redrive the
 230  237           * call to the client's callback server.
 231  238           */
 232  239  retry:
 233  240          if (cbp->cb_newer.cb_new == TRUE &&
 234  241              cbp->cb_newer.cb_confirmed == TRUE) {
 235  242                  char *addr = cbp->cb_callback.cb_location.r_addr;
 236  243                  char *netid = cbp->cb_callback.cb_location.r_netid;
 237  244  
 238  245                  /*
 239  246                   * Free the old stuff if it exists; may be the first
 240  247                   * time through this path
 241  248                   */
 242  249                  if (addr)
 243  250                          kmem_free(addr, strlen(addr) + 1);
 244  251                  if (netid)
 245  252                          kmem_free(netid, strlen(netid) + 1);
 246  253  
 247  254                  /* Move over the addr/netid */
 248  255                  cbp->cb_callback.cb_location.r_addr =
 249  256                      cbp->cb_newer.cb_callback.cb_location.r_addr;
 250  257                  cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
 251  258                  cbp->cb_callback.cb_location.r_netid =
 252  259                      cbp->cb_newer.cb_callback.cb_location.r_netid;
 253  260                  cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
 254  261  
 255  262                  /* Get the program number */
 256  263                  cbp->cb_callback.cb_program =
 257  264                      cbp->cb_newer.cb_callback.cb_program;
 258  265                  cbp->cb_newer.cb_callback.cb_program = 0;
 259  266  
 260  267                  /* Don't forget the protocol's "cb_ident" field */
 261  268                  cbp->cb_ident = cbp->cb_newer.cb_ident;
 262  269                  cbp->cb_newer.cb_ident = 0;
 263  270  
 264  271                  /* no longer new */
 265  272                  cbp->cb_newer.cb_new = FALSE;
 266  273                  cbp->cb_newer.cb_confirmed = FALSE;
 267  274  
 268  275                  /* get rid of the old client handles that may exist */
 269  276                  rfs4_cb_chflush(cbp);
 270  277  
  
    | 
      ↓ open down ↓ | 
    47 lines elided | 
    
      ↑ open up ↑ | 
  
 271  278                  cbp->cb_state = CB_NONE;
 272  279                  cbp->cb_timefailed = 0; /* reset the clock */
 273  280                  cbp->cb_notified_of_cb_path_down = TRUE;
 274  281          }
 275  282  
 276  283          if (cbp->cb_state != CB_NONE) {
 277  284                  cv_broadcast(cbp->cb_cv);       /* let the others know */
 278  285                  cbp->cb_nullcaller = FALSE;
 279  286                  mutex_exit(cbp->cb_lock);
 280  287                  rfs4_client_rele(cp);
 281      -                return;
      288 +                zthread_exit();
 282  289          }
 283  290  
 284  291          /* mark rfs4_client_t as CALLBACK NULL in progress */
 285  292          cbp->cb_state = CB_INPROG;
 286  293          mutex_exit(cbp->cb_lock);
 287  294  
 288  295          /* get/generate a client handle */
 289  296          if ((ch = rfs4_cb_getch(cbp)) == NULL) {
 290  297                  mutex_enter(cbp->cb_lock);
 291  298                  cbp->cb_state = CB_BAD;
 292  299                  cbp->cb_timefailed = gethrestime_sec(); /* observability */
 293  300                  goto retry;
 294  301          }
 295  302  
 296  303  
 297  304          tv.tv_sec = 30;
 298  305          tv.tv_usec = 0;
 299  306          if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
 300  307                  newstate = CB_BAD;
 301  308          } else {
 302  309                  newstate = CB_OK;
 303  310  #ifdef  DEBUG
 304  311                  rfs4_cb_null++;
 305  312  #endif
 306  313          }
 307  314  
 308  315          /* Check to see if the client has specified new callback info */
 309  316          mutex_enter(cbp->cb_lock);
 310  317          rfs4_cb_freech(cbp, ch, TRUE);
 311  318          if (cbp->cb_newer.cb_new == TRUE &&
 312  319              cbp->cb_newer.cb_confirmed == TRUE) {
  
    | 
      ↓ open down ↓ | 
    21 lines elided | 
    
      ↑ open up ↑ | 
  
 313  320                  goto retry;     /* give the CB_NULL another chance */
 314  321          }
 315  322  
 316  323          cbp->cb_state = newstate;
 317  324          if (cbp->cb_state == CB_BAD)
 318  325                  cbp->cb_timefailed = gethrestime_sec(); /* observability */
 319  326  
 320  327          cv_broadcast(cbp->cb_cv);       /* start up the other threads */
 321  328          cbp->cb_nullcaller = FALSE;
 322  329          mutex_exit(cbp->cb_lock);
 323      -
 324  330          rfs4_client_rele(cp);
      331 +        zthread_exit();
 325  332  }
 326  333  
 327  334  /*
 328  335   * Given a client struct, inspect the callback info to see if the
 329  336   * callback path is up and available.
 330  337   *
 331  338   * If new callback path is available and no one has set it up then
 332  339   * try to set it up. If setup is not successful after 5 tries (5 secs)
 333  340   * then gives up and returns NULL.
 334  341   *
 335  342   * If callback path is being initialized, then wait for the CB_NULL RPC
 336  343   * call to occur.
 337  344   */
 338  345  static rfs4_cbinfo_t *
 339  346  rfs4_cbinfo_hold(rfs4_client_t *cp)
 340  347  {
 341  348          rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 342  349          int retries = 0;
 343  350  
 344  351          mutex_enter(cbp->cb_lock);
 345  352  
 346  353          while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
 347  354                  /*
 348  355                   * Looks like a new callback path may be available and
 349  356                   * noone has set it up.
 350  357                   */
 351  358                  mutex_exit(cbp->cb_lock);
 352  359                  rfs4_dbe_hold(cp->rc_dbe);
 353  360                  rfs4_do_cb_null(cp); /* caller will release client hold */
 354  361  
 355  362                  mutex_enter(cbp->cb_lock);
 356  363                  /*
 357  364                   * If callback path is no longer new, or it's being setup
 358  365                   * then stop and wait for it to be done.
 359  366                   */
 360  367                  if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
 361  368                          break;
 362  369                  mutex_exit(cbp->cb_lock);
 363  370  
 364  371                  if (++retries >= rfs4_max_setup_cb_tries)
 365  372                          return (NULL);
 366  373                  delay(hz);
 367  374                  mutex_enter(cbp->cb_lock);
 368  375          }
 369  376  
 370  377          /* Is there a thread working on doing the CB_NULL RPC? */
 371  378          if (cbp->cb_nullcaller == TRUE)
 372  379                  cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
 373  380  
 374  381          /* If the callback path is not okay (up and running), just quit */
 375  382          if (cbp->cb_state != CB_OK) {
 376  383                  mutex_exit(cbp->cb_lock);
 377  384                  return (NULL);
 378  385          }
 379  386  
 380  387          /* Let someone know we are using the current callback info */
 381  388          cbp->cb_refcnt++;
 382  389          mutex_exit(cbp->cb_lock);
 383  390          return (cbp);
 384  391  }
 385  392  
 386  393  /*
 387  394   * The caller is done with the callback info.  It may be that the
 388  395   * caller's RPC failed and the NFSv4 client has actually provided new
 389  396   * callback information.  If so, let the caller know so they can
 390  397   * advantage of this and maybe retry the RPC that originally failed.
 391  398   */
 392  399  static int
 393  400  rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
 394  401  {
 395  402          int cb_new = FALSE;
 396  403  
 397  404          mutex_enter(cbp->cb_lock);
 398  405  
 399  406          /* The caller gets a chance to mark the callback info as bad */
 400  407          if (newstate != CB_NOCHANGE)
 401  408                  cbp->cb_state = newstate;
 402  409          if (newstate == CB_FAILED) {
 403  410                  cbp->cb_timefailed = gethrestime_sec(); /* observability */
 404  411                  cbp->cb_notified_of_cb_path_down = FALSE;
 405  412          }
 406  413  
 407  414          cbp->cb_refcnt--;       /* no longer using the information */
 408  415  
 409  416          /*
 410  417           * A thread may be waiting on this one to finish and if so,
 411  418           * let it know that it is okay to do the CB_NULL to the
 412  419           * client's callback server.
 413  420           */
 414  421          if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
 415  422                  cv_broadcast(cbp->cb_cv_nullcaller);
 416  423  
 417  424          /*
 418  425           * If this is the last thread to use the callback info and
 419  426           * there is new callback information to try and no thread is
 420  427           * there ready to do the CB_NULL, then return true to teh
 421  428           * caller so they can do the CB_NULL
 422  429           */
 423  430          if (cbp->cb_refcnt == 0 &&
 424  431              cbp->cb_nullcaller == FALSE &&
 425  432              cbp->cb_newer.cb_new == TRUE &&
 426  433              cbp->cb_newer.cb_confirmed == TRUE)
 427  434                  cb_new = TRUE;
 428  435  
 429  436          mutex_exit(cbp->cb_lock);
 430  437  
 431  438          return (cb_new);
 432  439  }
 433  440  
 434  441  /*
 435  442   * Given the information in the callback info struct, create a client
 436  443   * handle that can be used by the server for its callback path.
 437  444   */
 438  445  static CLIENT *
 439  446  rfs4_cbch_init(rfs4_cbinfo_t *cbp)
 440  447  {
 441  448          struct knetconfig knc;
 442  449          vnode_t *vp;
 443  450          struct sockaddr_in addr4;
 444  451          struct sockaddr_in6 addr6;
 445  452          void *addr, *taddr;
 446  453          in_port_t *pp;
 447  454          int af;
 448  455          char *devnam;
 449  456          struct netbuf nb;
 450  457          int size;
 451  458          CLIENT *ch = NULL;
 452  459          int useresvport = 0;
 453  460  
 454  461          mutex_enter(cbp->cb_lock);
 455  462  
 456  463          if (cbp->cb_callback.cb_location.r_netid == NULL ||
 457  464              cbp->cb_callback.cb_location.r_addr == NULL) {
 458  465                  goto cb_init_out;
 459  466          }
 460  467  
 461  468          if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
 462  469                  knc.knc_semantics = NC_TPI_COTS;
 463  470                  knc.knc_protofmly = "inet";
 464  471                  knc.knc_proto = "tcp";
 465  472                  devnam = "/dev/tcp";
 466  473                  af = AF_INET;
 467  474          } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
 468  475              == 0) {
 469  476                  knc.knc_semantics = NC_TPI_CLTS;
 470  477                  knc.knc_protofmly = "inet";
 471  478                  knc.knc_proto = "udp";
 472  479                  devnam = "/dev/udp";
 473  480                  af = AF_INET;
 474  481          } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
 475  482              == 0) {
 476  483                  knc.knc_semantics = NC_TPI_COTS;
 477  484                  knc.knc_protofmly = "inet6";
 478  485                  knc.knc_proto = "tcp";
 479  486                  devnam = "/dev/tcp6";
 480  487                  af = AF_INET6;
 481  488          } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
 482  489              == 0) {
 483  490                  knc.knc_semantics = NC_TPI_CLTS;
 484  491                  knc.knc_protofmly = "inet6";
 485  492                  knc.knc_proto = "udp";
 486  493                  devnam = "/dev/udp6";
 487  494                  af = AF_INET6;
 488  495          } else {
 489  496                  goto cb_init_out;
 490  497          }
 491  498  
 492  499          if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
 493  500  
 494  501                  goto cb_init_out;
 495  502          }
 496  503  
 497  504          if (vp->v_type != VCHR) {
 498  505                  VN_RELE(vp);
 499  506                  goto cb_init_out;
 500  507          }
 501  508  
 502  509          knc.knc_rdev = vp->v_rdev;
 503  510  
 504  511          VN_RELE(vp);
 505  512  
 506  513          if (af == AF_INET) {
 507  514                  size = sizeof (addr4);
 508  515                  bzero(&addr4, size);
 509  516                  addr4.sin_family = (sa_family_t)af;
 510  517                  addr = &addr4.sin_addr;
 511  518                  pp = &addr4.sin_port;
 512  519                  taddr = &addr4;
 513  520          } else /* AF_INET6 */ {
 514  521                  size = sizeof (addr6);
 515  522                  bzero(&addr6, size);
 516  523                  addr6.sin6_family = (sa_family_t)af;
 517  524                  addr = &addr6.sin6_addr;
 518  525                  pp = &addr6.sin6_port;
 519  526                  taddr = &addr6;
 520  527          }
 521  528  
 522  529          if (uaddr2sockaddr(af,
 523  530              cbp->cb_callback.cb_location.r_addr, addr, pp)) {
 524  531  
 525  532                  goto cb_init_out;
 526  533          }
 527  534  
 528  535  
 529  536          nb.maxlen = nb.len = size;
 530  537          nb.buf = (char *)taddr;
 531  538  
 532  539          if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
 533  540              NFS_CB, 0, 0, curthread->t_cred, &ch)) {
 534  541  
 535  542                  ch = NULL;
 536  543          }
 537  544  
 538  545          /* turn off reserved port usage */
 539  546          (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
 540  547  
 541  548  cb_init_out:
 542  549          mutex_exit(cbp->cb_lock);
 543  550          return (ch);
 544  551  }
 545  552  
 546  553  /*
 547  554   * Iterate over the client handle cache and
 548  555   * destroy it.
 549  556   */
 550  557  static void
 551  558  rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
 552  559  {
 553  560          CLIENT *ch;
 554  561  
 555  562          while (cbp->cb_chc_free) {
 556  563                  cbp->cb_chc_free--;
 557  564                  ch = cbp->cb_chc[cbp->cb_chc_free];
 558  565                  cbp->cb_chc[cbp->cb_chc_free] = NULL;
 559  566                  if (ch) {
 560  567                          if (ch->cl_auth)
 561  568                                  auth_destroy(ch->cl_auth);
 562  569                          clnt_destroy(ch);
 563  570                  }
 564  571          }
 565  572  }
 566  573  
 567  574  /*
 568  575   * Return a client handle, either from a the small
 569  576   * rfs4_client_t cache or one that we just created.
 570  577   */
 571  578  static CLIENT *
 572  579  rfs4_cb_getch(rfs4_cbinfo_t *cbp)
 573  580  {
 574  581          CLIENT *cbch = NULL;
 575  582          uint32_t zilch = 0;
 576  583  
 577  584          mutex_enter(cbp->cb_lock);
 578  585  
 579  586          if (cbp->cb_chc_free) {
 580  587                  cbp->cb_chc_free--;
 581  588                  cbch = cbp->cb_chc[ cbp->cb_chc_free ];
 582  589                  mutex_exit(cbp->cb_lock);
 583  590                  (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
 584  591                  return (cbch);
 585  592          }
 586  593  
 587  594          mutex_exit(cbp->cb_lock);
 588  595  
 589  596          /* none free so make it now */
 590  597          cbch = rfs4_cbch_init(cbp);
 591  598  
 592  599          return (cbch);
 593  600  }
 594  601  
 595  602  /*
 596  603   * Return the client handle to the small cache or
 597  604   * destroy it.
 598  605   */
 599  606  static void
 600  607  rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
 601  608  {
 602  609          if (lockheld == FALSE)
 603  610                  mutex_enter(cbp->cb_lock);
 604  611  
 605  612          if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
 606  613                  cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
 607  614                  if (lockheld == FALSE)
 608  615                          mutex_exit(cbp->cb_lock);
 609  616                  return;
 610  617          }
 611  618          if (lockheld == FALSE)
 612  619                  mutex_exit(cbp->cb_lock);
 613  620  
 614  621          /*
 615  622           * cache maxed out of free entries, obliterate
 616  623           * this client handle, destroy it, throw it away.
 617  624           */
 618  625          if (ch->cl_auth)
 619  626                  auth_destroy(ch->cl_auth);
 620  627          clnt_destroy(ch);
 621  628  }
 622  629  
 623  630  /*
 624  631   * With the supplied callback information - initialize the client
 625  632   * callback data.  If there is a callback in progress, save the
 626  633   * callback info so that a thread can pick it up in the future.
 627  634   */
 628  635  void
 629  636  rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
 630  637  {
 631  638          char *addr = NULL;
 632  639          char *netid = NULL;
 633  640          rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 634  641          size_t len;
 635  642  
 636  643          /* Set the call back for the client */
 637  644          if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
 638  645              cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
 639  646                  len = strlen(cb->cb_location.r_addr) + 1;
 640  647                  addr = kmem_alloc(len, KM_SLEEP);
 641  648                  bcopy(cb->cb_location.r_addr, addr, len);
 642  649                  len = strlen(cb->cb_location.r_netid) + 1;
 643  650                  netid = kmem_alloc(len, KM_SLEEP);
 644  651                  bcopy(cb->cb_location.r_netid, netid, len);
 645  652          }
 646  653          /* ready to save the new information but first free old, if exists */
 647  654          mutex_enter(cbp->cb_lock);
 648  655  
 649  656          cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
 650  657  
 651  658          if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
 652  659                  kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
 653  660                      strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
 654  661          cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
 655  662  
 656  663          if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
 657  664                  kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
 658  665                      strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
 659  666          cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
 660  667  
 661  668          cbp->cb_newer.cb_ident = cb_ident;
 662  669  
 663  670          if (addr && *addr && netid && *netid) {
 664  671                  cbp->cb_newer.cb_new = TRUE;
 665  672                  cbp->cb_newer.cb_confirmed = FALSE;
 666  673          } else {
 667  674                  cbp->cb_newer.cb_new = FALSE;
 668  675                  cbp->cb_newer.cb_confirmed = FALSE;
 669  676          }
 670  677  
 671  678          mutex_exit(cbp->cb_lock);
 672  679  }
 673  680  
 674  681  /*
 675  682   * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
 676  683   * information may have been provided on SETCLIENTID and this call
 677  684   * marks that information as confirmed and then starts a thread to
 678  685   * test the callback path.
 679  686   */
  
    | 
      ↓ open down ↓ | 
    345 lines elided | 
    
      ↑ open up ↑ | 
  
 680  687  void
 681  688  rfs4_deleg_cb_check(rfs4_client_t *cp)
 682  689  {
 683  690          if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
 684  691                  return;
 685  692  
 686  693          cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
 687  694  
 688  695          rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
 689  696  
 690      -        (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
      697 +        (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0,
 691  698              minclsyspri);
 692  699  }
 693  700  
 694  701  static void
 695  702  rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
 696  703  {
 697  704          CB_RECALL4args  *rec_argp;
 698  705  
 699  706          rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 700  707          if (rec_argp->fh.nfs_fh4_val)
 701  708                  kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
 702  709  }
 703  710  
 704  711  /* ARGSUSED */
 705  712  static void
 706  713  rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
 707  714  {
 708  715          CB_GETATTR4args *argp;
 709  716  
 710  717          argp = &argop->nfs_cb_argop4_u.opcbgetattr;
 711  718          if (argp->fh.nfs_fh4_val)
 712  719                  kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
 713  720  }
 714  721  
 715  722  static void
 716  723  rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
 717  724  {
 718  725          int i, arglen;
 719  726          nfs_cb_argop4 *argop;
 720  727  
 721  728          /*
 722  729           * First free any special args alloc'd for specific ops.
 723  730           */
 724  731          arglen = args->array_len;
 725  732          argop = args->array;
 726  733          for (i = 0; i < arglen; i++, argop++) {
 727  734  
 728  735                  switch (argop->argop) {
 729  736                  case OP_CB_RECALL:
 730  737                          rfs4args_cb_recall_free(argop);
 731  738                          break;
 732  739  
 733  740                  case OP_CB_GETATTR:
 734  741                          rfs4args_cb_getattr_free(argop);
 735  742                          break;
 736  743  
 737  744                  default:
 738  745                          return;
 739  746                  }
 740  747          }
 741  748  
 742  749          if (args->tag.utf8string_len > 0)
 743  750                  UTF8STRING_FREE(args->tag)
 744  751  
 745  752          kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
 746  753          if (resp)
 747  754                  xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
 748  755  }
 749  756  
 750  757  /*
 751  758   * General callback routine for the server to the client.
 752  759   */
 753  760  static enum clnt_stat
 754  761  rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
 755  762      CB_COMPOUND4res *res, struct timeval timeout)
 756  763  {
 757  764          rfs4_cbinfo_t *cbp;
 758  765          CLIENT *ch;
 759  766          /* start with this in case cb_getch() fails */
 760  767          enum clnt_stat  stat = RPC_FAILED;
 761  768  
 762  769          res->tag.utf8string_val = NULL;
 763  770          res->array = NULL;
 764  771  
 765  772  retry:
 766  773          cbp = rfs4_cbinfo_hold(cp);
 767  774          if (cbp == NULL)
 768  775                  return (stat);
 769  776  
 770  777          /* get a client handle */
 771  778          if ((ch = rfs4_cb_getch(cbp)) != NULL) {
 772  779                  /*
 773  780                   * reset the cb_ident since it may have changed in
 774  781                   * rfs4_cbinfo_hold()
 775  782                   */
 776  783                  args->callback_ident = cbp->cb_ident;
 777  784  
 778  785                  stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
 779  786                      (caddr_t)args, xdr_CB_COMPOUND4res,
 780  787                      (caddr_t)res, timeout);
 781  788  
 782  789                  /* free client handle */
 783  790                  rfs4_cb_freech(cbp, ch, FALSE);
 784  791          }
 785  792  
 786  793          /*
 787  794           * If the rele says that there may be new callback info then
 788  795           * retry this sequence and it may succeed as a result of the
 789  796           * new callback path
 790  797           */
 791  798          if (rfs4_cbinfo_rele(cbp,
 792  799              (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
 793  800                  goto retry;
 794  801  
 795  802          return (stat);
 796  803  }
 797  804  
 798  805  /*
 799  806   * Used by the NFSv4 server to get attributes for a file while
 800  807   * handling the case where a file has been write delegated.  For the
 801  808   * time being, VOP_GETATTR() is called and CB_GETATTR processing is
 802  809   * not undertaken.  This call site is maintained in case the server is
 803  810   * updated in the future to handle write delegation space guarantees.
 804  811   */
 805  812  nfsstat4
 806  813  rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 807  814  {
 808  815  
 809  816          int error;
 810  817  
 811  818          error = VOP_GETATTR(vp, vap, flag, cr, NULL);
 812  819          return (puterrno4(error));
 813  820  }
 814  821  
 815  822  /*
 816  823   * This is used everywhere in the v2/v3 server to allow the
 817  824   * integration of all NFS versions and the support of delegation.  For
 818  825   * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
 819  826   * in the future to provide space guarantees for write delegations
 820  827   * then this call site should be expanded to interact with the client.
 821  828   */
 822  829  int
 823  830  rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
 824  831  {
 825  832          return (VOP_GETATTR(vp, vap, flag, cr, NULL));
 826  833  }
 827  834  
 828  835  /*
 829  836   * Place the actual cb_recall otw call to client.
 830  837   */
 831  838  static void
 832  839  rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
 833  840  {
 834  841          CB_COMPOUND4args        cb4_args;
 835  842          CB_COMPOUND4res         cb4_res;
 836  843          CB_RECALL4args          *rec_argp;
 837  844          CB_RECALL4res           *rec_resp;
 838  845          nfs_cb_argop4           *argop;
 839  846          int                     numops;
 840  847          int                     argoplist_size;
 841  848          struct timeval          timeout;
 842  849          nfs_fh4                 *fhp;
 843  850          enum clnt_stat          call_stat;
 844  851  
 845  852          /*
 846  853           * set up the compound args
 847  854           */
 848  855          numops = 1;     /* CB_RECALL only */
 849  856  
 850  857          argoplist_size = numops * sizeof (nfs_cb_argop4);
 851  858          argop = kmem_zalloc(argoplist_size, KM_SLEEP);
 852  859          argop->argop = OP_CB_RECALL;
 853  860          rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 854  861  
 855  862          (void) str_to_utf8("cb_recall", &cb4_args.tag);
 856  863          cb4_args.minorversion = CB4_MINORVERSION;
 857  864          /* cb4_args.callback_ident is set in rfs4_do_callback() */
 858  865          cb4_args.array_len = numops;
 859  866          cb4_args.array = argop;
 860  867  
 861  868          /*
 862  869           * fill in the args struct
 863  870           */
 864  871          bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
 865  872          rec_argp->truncate = trunc;
 866  873  
 867  874          fhp = &dsp->rds_finfo->rf_filehandle;
 868  875          rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
 869  876              fhp->nfs_fh4_len, KM_SLEEP);
 870  877          nfs_fh4_copy(fhp, &rec_argp->fh);
 871  878  
 872  879          /* Keep track of when we did this for observability */
 873  880          dsp->rds_time_recalled = gethrestime_sec();
 874  881  
 875  882          /*
 876  883           * Set up the timeout for the callback and make the actual call.
 877  884           * Timeout will be 80% of the lease period for this server.
 878  885           */
 879  886          timeout.tv_sec = (rfs4_lease_time * 80) / 100;
 880  887          timeout.tv_usec = 0;
 881  888  
 882  889          DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
 883  890              rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
 884  891  
 885  892          call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
 886  893              timeout);
 887  894  
 888  895          rec_resp = (cb4_res.array_len == 0) ? NULL :
 889  896              &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
 890  897  
 891  898          DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
 892  899              rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
 893  900  
 894  901          if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
 895  902                  rfs4_return_deleg(dsp, TRUE);
 896  903          }
 897  904  
 898  905          rfs4freeargres(&cb4_args, &cb4_res);
 899  906  }
 900  907  
 901  908  struct recall_arg {
 902  909          rfs4_deleg_state_t *dsp;
 903  910          void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
 904  911          bool_t trunc;
 905  912  };
 906  913  
 907  914  static void
 908  915  do_recall(struct recall_arg *arg)
 909  916  {
 910  917          rfs4_deleg_state_t *dsp = arg->dsp;
 911  918          rfs4_file_t *fp = dsp->rds_finfo;
 912  919          callb_cpr_t cpr_info;
 913  920          kmutex_t cpr_lock;
 914  921  
 915  922          mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 916  923          CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
 917  924  
 918  925          /*
 919  926           * It is possible that before this thread starts
 920  927           * the client has send us a return_delegation, and
 921  928           * if that is the case we do not need to send the
 922  929           * recall callback.
 923  930           */
 924  931          if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
 925  932                  DTRACE_PROBE3(nfss__i__recall,
 926  933                      struct recall_arg *, arg,
 927  934                      struct rfs4_deleg_state_t *, dsp,
 928  935                      struct rfs4_file_t *, fp);
 929  936  
 930  937                  if (arg->recall)
 931  938                          (void) (*arg->recall)(dsp, arg->trunc);
 932  939          }
 933  940  
 934  941          mutex_enter(fp->rf_dinfo.rd_recall_lock);
 935  942          /*
 936  943           * Recall count may go negative if the parent thread that is
 937  944           * creating the individual callback threads does not modify
 938  945           * the recall_count field before the callback thread actually
 939  946           * gets a response from the CB_RECALL
 940  947           */
  
    | 
      ↓ open down ↓ | 
    240 lines elided | 
    
      ↑ open up ↑ | 
  
 941  948          fp->rf_dinfo.rd_recall_count--;
 942  949          if (fp->rf_dinfo.rd_recall_count == 0)
 943  950                  cv_signal(fp->rf_dinfo.rd_recall_cv);
 944  951          mutex_exit(fp->rf_dinfo.rd_recall_lock);
 945  952  
 946  953          mutex_enter(&cpr_lock);
 947  954          CALLB_CPR_EXIT(&cpr_info);
 948  955          mutex_destroy(&cpr_lock);
 949  956  
 950  957          rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
 951      -
 952  958          kmem_free(arg, sizeof (struct recall_arg));
      959 +        zthread_exit();
 953  960  }
 954  961  
 955  962  struct master_recall_args {
 956  963      rfs4_file_t *fp;
 957  964      void (*recall)(rfs4_deleg_state_t *, bool_t);
 958  965      bool_t trunc;
 959  966  };
 960  967  
 961  968  static void
 962  969  do_recall_file(struct master_recall_args *map)
 963  970  {
 964  971          rfs4_file_t *fp = map->fp;
 965  972          rfs4_deleg_state_t *dsp;
 966  973          struct recall_arg *arg;
 967  974          callb_cpr_t cpr_info;
 968  975          kmutex_t cpr_lock;
 969  976          int32_t recall_count;
  
    | 
      ↓ open down ↓ | 
    7 lines elided | 
    
      ↑ open up ↑ | 
  
 970  977  
 971  978          rfs4_dbe_lock(fp->rf_dbe);
 972  979  
 973  980          /* Recall already in progress ? */
 974  981          mutex_enter(fp->rf_dinfo.rd_recall_lock);
 975  982          if (fp->rf_dinfo.rd_recall_count != 0) {
 976  983                  mutex_exit(fp->rf_dinfo.rd_recall_lock);
 977  984                  rfs4_dbe_rele_nolock(fp->rf_dbe);
 978  985                  rfs4_dbe_unlock(fp->rf_dbe);
 979  986                  kmem_free(map, sizeof (struct master_recall_args));
 980      -                return;
      987 +                zthread_exit();
 981  988          }
 982  989  
 983  990          mutex_exit(fp->rf_dinfo.rd_recall_lock);
 984  991  
 985  992          mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 986  993          CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
 987  994  
 988  995          recall_count = 0;
 989  996          for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
 990  997              dsp = list_next(&fp->rf_delegstatelist, dsp)) {
 991  998  
 992  999                  rfs4_dbe_lock(dsp->rds_dbe);
 993 1000                  /*
 994 1001                   * if this delegation state
 995 1002                   * is being reaped skip it
 996 1003                   */
 997 1004                  if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
 998 1005                          rfs4_dbe_unlock(dsp->rds_dbe);
 999 1006                          continue;
1000 1007                  }
1001 1008  
1002 1009                  /* hold for receiving thread */
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
1003 1010                  rfs4_dbe_hold(dsp->rds_dbe);
1004 1011                  rfs4_dbe_unlock(dsp->rds_dbe);
1005 1012  
1006 1013                  arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1007 1014                  arg->recall = map->recall;
1008 1015                  arg->trunc = map->trunc;
1009 1016                  arg->dsp = dsp;
1010 1017  
1011 1018                  recall_count++;
1012 1019  
1013      -                (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
     1020 +                (void) zthread_create(NULL, 0, do_recall, arg, 0,
1014 1021                      minclsyspri);
1015 1022          }
1016 1023  
1017 1024          rfs4_dbe_unlock(fp->rf_dbe);
1018 1025  
1019 1026          mutex_enter(fp->rf_dinfo.rd_recall_lock);
1020 1027          /*
1021 1028           * Recall count may go negative if the parent thread that is
1022 1029           * creating the individual callback threads does not modify
1023 1030           * the recall_count field before the callback thread actually
1024 1031           * gets a response from the CB_RECALL
1025 1032           */
1026 1033          fp->rf_dinfo.rd_recall_count += recall_count;
1027 1034          while (fp->rf_dinfo.rd_recall_count)
  
    | 
      ↓ open down ↓ | 
    4 lines elided | 
    
      ↑ open up ↑ | 
  
1028 1035                  cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1029 1036  
1030 1037          mutex_exit(fp->rf_dinfo.rd_recall_lock);
1031 1038  
1032 1039          DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1033 1040          rfs4_file_rele(fp);
1034 1041          kmem_free(map, sizeof (struct master_recall_args));
1035 1042          mutex_enter(&cpr_lock);
1036 1043          CALLB_CPR_EXIT(&cpr_info);
1037 1044          mutex_destroy(&cpr_lock);
     1045 +        zthread_exit();
1038 1046  }
1039 1047  
1040 1048  static void
1041 1049  rfs4_recall_file(rfs4_file_t *fp,
1042 1050      void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1043 1051      bool_t trunc, rfs4_client_t *cp)
1044 1052  {
1045 1053          struct master_recall_args *args;
1046 1054  
1047 1055          rfs4_dbe_lock(fp->rf_dbe);
1048 1056          if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1049 1057                  rfs4_dbe_unlock(fp->rf_dbe);
1050 1058                  return;
1051 1059          }
1052 1060          rfs4_dbe_hold(fp->rf_dbe);      /* hold for new thread */
1053 1061  
1054 1062          /*
1055 1063           * Mark the time we started the recall processing.
1056 1064           * If it has been previously recalled, do not reset the
1057 1065           * timer since this is used for the revocation decision.
1058 1066           */
1059 1067          if (fp->rf_dinfo.rd_time_recalled == 0)
1060 1068                  fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1061 1069          fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1062 1070          /* Client causing recall not always available */
  
    | 
      ↓ open down ↓ | 
    15 lines elided | 
    
      ↑ open up ↑ | 
  
1063 1071          if (cp)
1064 1072                  fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1065 1073  
1066 1074          rfs4_dbe_unlock(fp->rf_dbe);
1067 1075  
1068 1076          args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1069 1077          args->fp = fp;
1070 1078          args->recall = recall;
1071 1079          args->trunc = trunc;
1072 1080  
1073      -        (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
     1081 +        (void) zthread_create(NULL, 0, do_recall_file, args, 0,
1074 1082              minclsyspri);
1075 1083  }
1076 1084  
1077 1085  void
1078 1086  rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1079 1087  {
1080 1088          time_t elapsed1, elapsed2;
1081 1089  
1082 1090          if (fp->rf_dinfo.rd_time_recalled != 0) {
1083 1091                  elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1084 1092                  elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1085 1093                  /* First check to see if a revocation should occur */
1086 1094                  if (elapsed1 > rfs4_lease_time &&
1087 1095                      elapsed2 > rfs4_lease_time) {
1088 1096                          rfs4_revoke_file(fp);
1089 1097                          return;
1090 1098                  }
1091 1099                  /*
1092 1100                   * Next check to see if a recall should be done again
1093 1101                   * so quickly.
1094 1102                   */
1095 1103                  if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1096 1104                          return;
1097 1105          }
1098 1106          rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1099 1107  }
1100 1108  
1101 1109  /*
1102 1110   * rfs4_check_recall is called from rfs4_do_open to determine if the current
1103 1111   * open conflicts with the delegation.
1104 1112   * Return true if we need recall otherwise false.
1105 1113   * Assumes entry locks for sp and sp->rs_finfo are held.
1106 1114   */
1107 1115  bool_t
1108 1116  rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1109 1117  {
1110 1118          open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1111 1119  
1112 1120          switch (dtype) {
1113 1121          case OPEN_DELEGATE_NONE:
1114 1122                  /* Not currently delegated so there is nothing to do */
1115 1123                  return (FALSE);
1116 1124          case OPEN_DELEGATE_READ:
1117 1125                  /*
1118 1126                   * If the access is only asking for READ then there is
1119 1127                   * no conflict and nothing to do.  If it is asking
1120 1128                   * for write, then there will be conflict and the read
1121 1129                   * delegation should be recalled.
1122 1130                   */
1123 1131                  if (access == OPEN4_SHARE_ACCESS_READ)
1124 1132                          return (FALSE);
1125 1133                  else
1126 1134                          return (TRUE);
1127 1135          case OPEN_DELEGATE_WRITE:
1128 1136                  /* Check to see if this client has the delegation */
1129 1137                  return (rfs4_is_deleg(sp));
1130 1138          }
1131 1139  
1132 1140          return (FALSE);
1133 1141  }
1134 1142  
1135 1143  /*
1136 1144   * Return the "best" allowable delegation available given the current
1137 1145   * delegation type and the desired access and deny modes on the file.
1138 1146   * At the point that this routine is called we know that the access and
1139 1147   * deny modes are consistent with the file modes.
1140 1148   */
1141 1149  static open_delegation_type4
1142 1150  rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1143 1151  {
1144 1152          open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1145 1153          uint32_t access = sp->rs_share_access;
1146 1154          uint32_t deny = sp->rs_share_deny;
1147 1155          int readcnt = 0;
1148 1156          int writecnt = 0;
1149 1157  
1150 1158          switch (dtype) {
1151 1159          case OPEN_DELEGATE_NONE:
1152 1160                  /*
1153 1161                   * Determine if more than just this OPEN have the file
1154 1162                   * open and if so, no delegation may be provided to
1155 1163                   * the client.
1156 1164                   */
1157 1165                  if (access & OPEN4_SHARE_ACCESS_WRITE)
1158 1166                          writecnt++;
1159 1167                  if (access & OPEN4_SHARE_ACCESS_READ)
1160 1168                          readcnt++;
1161 1169  
1162 1170                  if (fp->rf_access_read > readcnt ||
1163 1171                      fp->rf_access_write > writecnt)
1164 1172                          return (OPEN_DELEGATE_NONE);
1165 1173  
1166 1174                  /*
1167 1175                   * If the client is going to write, or if the client
1168 1176                   * has exclusive access, return a write delegation.
1169 1177                   */
1170 1178                  if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1171 1179                      (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1172 1180                          return (OPEN_DELEGATE_WRITE);
1173 1181                  /*
1174 1182                   * If we don't want to write or we've haven't denied read
1175 1183                   * access to others, return a read delegation.
1176 1184                   */
1177 1185                  if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1178 1186                      (deny & ~OPEN4_SHARE_DENY_READ))
1179 1187                          return (OPEN_DELEGATE_READ);
1180 1188  
1181 1189                  /* Shouldn't get here */
1182 1190                  return (OPEN_DELEGATE_NONE);
1183 1191  
1184 1192          case OPEN_DELEGATE_READ:
1185 1193                  /*
1186 1194                   * If the file is delegated for read but we wan't to
1187 1195                   * write or deny others to read then we can't delegate
1188 1196                   * the file. We shouldn't get here since the delegation should
1189 1197                   * have been recalled already.
1190 1198                   */
1191 1199                  if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1192 1200                      (deny & OPEN4_SHARE_DENY_READ))
1193 1201                          return (OPEN_DELEGATE_NONE);
1194 1202                  return (OPEN_DELEGATE_READ);
1195 1203  
1196 1204          case OPEN_DELEGATE_WRITE:
1197 1205                  return (OPEN_DELEGATE_WRITE);
1198 1206          }
  
    | 
      ↓ open down ↓ | 
    115 lines elided | 
    
      ↑ open up ↑ | 
  
1199 1207  
1200 1208          /* Shouldn't get here */
1201 1209          return (OPEN_DELEGATE_NONE);
1202 1210  }
1203 1211  
1204 1212  /*
1205 1213   * Given the desired delegation type and the "history" of the file
1206 1214   * determine the actual delegation type to return.
1207 1215   */
1208 1216  static open_delegation_type4
1209      -rfs4_delegation_policy(open_delegation_type4 dtype,
     1217 +rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype,
1210 1218      rfs4_dinfo_t *dinfo, clientid4 cid)
1211 1219  {
1212 1220          time_t elapsed;
1213 1221  
1214      -        if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
     1222 +        if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1215 1223                  return (OPEN_DELEGATE_NONE);
1216 1224  
1217 1225          /*
1218 1226           * Has this file/delegation ever been recalled?  If not then
1219 1227           * no further checks for a delegation race need to be done.
1220 1228           * However if a recall has occurred, then check to see if a
1221 1229           * client has caused its own delegation recall to occur.  If
1222 1230           * not, then has a delegation for this file been returned
1223 1231           * recently?  If so, then do not assign a new delegation to
1224 1232           * avoid a "delegation race" between the original client and
1225 1233           * the new/conflicting client.
1226 1234           */
1227 1235          if (dinfo->rd_ever_recalled == TRUE) {
1228 1236                  if (dinfo->rd_conflicted_client != cid) {
1229 1237                          elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1230 1238                          if (elapsed < rfs4_lease_time)
1231 1239                                  return (OPEN_DELEGATE_NONE);
1232 1240                  }
1233 1241          }
1234 1242  
1235 1243          /* Limit the number of read grants */
1236 1244          if (dtype == OPEN_DELEGATE_READ &&
1237 1245              dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1238 1246                  return (OPEN_DELEGATE_NONE);
1239 1247  
1240 1248          /*
1241 1249           * Should consider limiting total number of read/write
1242 1250           * delegations the server will permit.
1243 1251           */
1244 1252  
1245 1253          return (dtype);
1246 1254  }
  
    | 
      ↓ open down ↓ | 
    22 lines elided | 
    
      ↑ open up ↑ | 
  
1247 1255  
1248 1256  /*
1249 1257   * Try and grant a delegation for an open give the state. The routine
1250 1258   * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1251 1259   *
1252 1260   * The state and associate file entry must be locked
1253 1261   */
1254 1262  rfs4_deleg_state_t *
1255 1263  rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1256 1264  {
     1265 +        nfs4_srv_t *nsrv4;
1257 1266          rfs4_file_t *fp = sp->rs_finfo;
1258 1267          open_delegation_type4 dtype;
1259 1268          int no_delegation;
1260 1269  
1261 1270          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1262 1271          ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1263 1272  
     1273 +        nsrv4 = nfs4_get_srv();
     1274 +
1264 1275          /* Is the server even providing delegations? */
1265      -        if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
     1276 +        if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE ||
     1277 +            dreq == DELEG_NONE) {
1266 1278                  return (NULL);
     1279 +        }
1267 1280  
1268 1281          /* Check to see if delegations have been temporarily disabled */
1269      -        mutex_enter(&rfs4_deleg_lock);
     1282 +        mutex_enter(&nsrv4->deleg_lock);
1270 1283          no_delegation = rfs4_deleg_disabled;
1271      -        mutex_exit(&rfs4_deleg_lock);
     1284 +        mutex_exit(&nsrv4->deleg_lock);
1272 1285  
1273 1286          if (no_delegation)
1274 1287                  return (NULL);
1275 1288  
1276 1289          /* Don't grant a delegation if a deletion is impending. */
1277 1290          if (fp->rf_dinfo.rd_hold_grant > 0) {
1278 1291                  return (NULL);
1279 1292          }
1280 1293  
1281 1294          /*
1282 1295           * Don't grant a delegation if there are any lock manager
1283 1296           * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1284 1297           * if there are only read locks we should be able to grant a
1285 1298           * read-only delegation), but it's good enough for now.
1286 1299           *
1287 1300           * MT safety: the lock manager checks for conflicting delegations
1288 1301           * before processing a lock request.  That check will block until
1289 1302           * we are done here.  So if the lock manager acquires a lock after
1290 1303           * we decide to grant the delegation, the delegation will get
1291 1304           * immediately recalled (if there's a conflict), so we're safe.
1292 1305           */
1293 1306          if (lm_vp_active(fp->rf_vp)) {
1294 1307                  return (NULL);
1295 1308          }
1296 1309  
1297 1310          /*
1298 1311           * Based on the type of delegation request passed in, take the
1299 1312           * appropriate action (DELEG_NONE is handled above)
1300 1313           */
1301 1314          switch (dreq) {
1302 1315  
1303 1316          case DELEG_READ:
1304 1317          case DELEG_WRITE:
1305 1318                  /*
1306 1319                   * The server "must" grant the delegation in this case.
1307 1320                   * Client is using open previous
1308 1321                   */
1309 1322                  dtype = (open_delegation_type4)dreq;
1310 1323                  *recall = 1;
1311 1324                  break;
1312 1325          case DELEG_ANY:
1313 1326                  /*
1314 1327                   * If a valid callback path does not exist, no delegation may
1315 1328                   * be granted.
1316 1329                   */
1317 1330                  if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1318 1331                          return (NULL);
1319 1332  
1320 1333                  /*
1321 1334                   * If the original operation which caused time_rm_delayed
1322 1335                   * to be set hasn't been retried and completed for one
1323 1336                   * full lease period, clear it and allow delegations to
1324 1337                   * get granted again.
1325 1338                   */
1326 1339                  if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1327 1340                      gethrestime_sec() >
1328 1341                      fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1329 1342                          fp->rf_dinfo.rd_time_rm_delayed = 0;
1330 1343  
1331 1344                  /*
1332 1345                   * If we are waiting for a delegation to be returned then
1333 1346                   * don't delegate this file. We do this for correctness as
1334 1347                   * well as if the file is being recalled we would likely
1335 1348                   * recall this file again.
1336 1349                   */
1337 1350  
1338 1351                  if (fp->rf_dinfo.rd_time_recalled != 0 ||
1339 1352                      fp->rf_dinfo.rd_time_rm_delayed != 0)
1340 1353                          return (NULL);
1341 1354  
  
    | 
      ↓ open down ↓ | 
    60 lines elided | 
    
      ↑ open up ↑ | 
  
1342 1355                  /* Get the "best" delegation candidate */
1343 1356                  dtype = rfs4_check_delegation(sp, fp);
1344 1357  
1345 1358                  if (dtype == OPEN_DELEGATE_NONE)
1346 1359                          return (NULL);
1347 1360  
1348 1361                  /*
1349 1362                   * Based on policy and the history of the file get the
1350 1363                   * actual delegation.
1351 1364                   */
1352      -                dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
     1365 +                dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo,
1353 1366                      sp->rs_owner->ro_client->rc_clientid);
1354 1367  
1355 1368                  if (dtype == OPEN_DELEGATE_NONE)
1356 1369                          return (NULL);
1357 1370                  break;
1358 1371          default:
1359 1372                  return (NULL);
1360 1373          }
1361 1374  
1362 1375          /* set the delegation for the state */
1363 1376          return (rfs4_deleg_state(sp, dtype, recall));
1364 1377  }
1365 1378  
1366 1379  void
1367 1380  rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1368 1381      nfsace4 *ace,  int recall)
1369 1382  {
1370 1383          open_write_delegation4 *wp;
1371 1384          open_read_delegation4 *rp;
1372 1385          nfs_space_limit4 *spl;
1373 1386          nfsace4 nace;
1374 1387  
1375 1388          /*
1376 1389           * We need to allocate a new copy of the who string.
1377 1390           * this string will be freed by the rfs4_op_open dis_resfree
1378 1391           * routine. We need to do this allocation since replays will
1379 1392           * be allocated and rfs4_compound can't tell the difference from
1380 1393           * a replay and an inital open. N.B. if an ace is passed in, it
1381 1394           * the caller's responsibility to free it.
1382 1395           */
1383 1396  
1384 1397          if (ace == NULL) {
1385 1398                  /*
1386 1399                   * Default is to deny all access, the client will have
1387 1400                   * to contact the server.  XXX Do we want to actually
1388 1401                   * set a deny for every one, or do we simply want to
1389 1402                   * construct an entity that will match no one?
1390 1403                   */
1391 1404                  nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1392 1405                  nace.flag = 0;
1393 1406                  nace.access_mask = ACE4_VALID_MASK_BITS;
1394 1407                  (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1395 1408          } else {
1396 1409                  nace.type = ace->type;
1397 1410                  nace.flag = ace->flag;
1398 1411                  nace.access_mask = ace->access_mask;
1399 1412                  (void) utf8_copy(&ace->who, &nace.who);
1400 1413          }
1401 1414  
1402 1415          dp->delegation_type = dsp->rds_dtype;
1403 1416  
1404 1417          switch (dsp->rds_dtype) {
1405 1418          case OPEN_DELEGATE_NONE:
1406 1419                  break;
1407 1420          case OPEN_DELEGATE_READ:
1408 1421                  rp = &dp->open_delegation4_u.read;
1409 1422                  rp->stateid = dsp->rds_delegid.stateid;
1410 1423                  rp->recall = (bool_t)recall;
1411 1424                  rp->permissions = nace;
1412 1425                  break;
1413 1426          case OPEN_DELEGATE_WRITE:
1414 1427                  wp = &dp->open_delegation4_u.write;
1415 1428                  wp->stateid = dsp->rds_delegid.stateid;
1416 1429                  wp->recall = (bool_t)recall;
1417 1430                  spl = &wp->space_limit;
1418 1431                  spl->limitby = NFS_LIMIT_SIZE;
1419 1432                  spl->nfs_space_limit4_u.filesize = 0;
1420 1433                  wp->permissions = nace;
1421 1434                  break;
1422 1435          }
1423 1436  }
1424 1437  
1425 1438  /*
1426 1439   * Check if the file is delegated via the provided file struct.
1427 1440   * Return TRUE if it is delegated.  This is intended for use by
1428 1441   * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1429 1442   *
1430 1443   * Note that if the file is found to have a delegation, it is
  
    | 
      ↓ open down ↓ | 
    68 lines elided | 
    
      ↑ open up ↑ | 
  
1431 1444   * recalled, unless the clientid of the caller matches the clientid of the
1432 1445   * delegation. If the caller has specified, there is a slight delay
1433 1446   * inserted in the hopes that the delegation will be returned quickly.
1434 1447   */
1435 1448  bool_t
1436 1449  rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1437 1450      bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1438 1451  {
1439 1452          rfs4_deleg_state_t *dsp;
1440 1453  
     1454 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
     1455 +
1441 1456          /* Is delegation enabled? */
1442      -        if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
     1457 +        if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1443 1458                  return (FALSE);
1444 1459  
1445 1460          /* do we have a delegation on this file? */
1446 1461          rfs4_dbe_lock(fp->rf_dbe);
1447 1462          if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1448 1463                  if (is_rm)
1449 1464                          fp->rf_dinfo.rd_hold_grant++;
1450 1465                  rfs4_dbe_unlock(fp->rf_dbe);
1451 1466                  return (FALSE);
1452 1467          }
1453 1468          /*
1454 1469           * do we have a write delegation on this file or are we
1455 1470           * requesting write access to a file with any type of existing
1456 1471           * delegation?
1457 1472           */
1458 1473          if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1459 1474                  if (cp != NULL) {
1460 1475                          dsp = list_head(&fp->rf_delegstatelist);
1461 1476                          if (dsp == NULL) {
1462 1477                                  rfs4_dbe_unlock(fp->rf_dbe);
1463 1478                                  return (FALSE);
1464 1479                          }
1465 1480                          /*
1466 1481                           * Does the requestor already own the delegation?
1467 1482                           */
1468 1483                          if (dsp->rds_client->rc_clientid == *(cp)) {
1469 1484                                  rfs4_dbe_unlock(fp->rf_dbe);
1470 1485                                  return (FALSE);
1471 1486                          }
1472 1487                  }
1473 1488  
1474 1489                  rfs4_dbe_unlock(fp->rf_dbe);
1475 1490                  rfs4_recall_deleg(fp, trunc, NULL);
1476 1491  
1477 1492                  if (!do_delay) {
1478 1493                          rfs4_dbe_lock(fp->rf_dbe);
1479 1494                          fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1480 1495                          rfs4_dbe_unlock(fp->rf_dbe);
1481 1496                          return (TRUE);
1482 1497                  }
1483 1498  
1484 1499                  delay(NFS4_DELEGATION_CONFLICT_DELAY);
1485 1500  
1486 1501                  rfs4_dbe_lock(fp->rf_dbe);
1487 1502                  if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1488 1503                          fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1489 1504                          rfs4_dbe_unlock(fp->rf_dbe);
1490 1505                          return (TRUE);
1491 1506                  }
1492 1507          }
1493 1508          if (is_rm)
1494 1509                  fp->rf_dinfo.rd_hold_grant++;
1495 1510          rfs4_dbe_unlock(fp->rf_dbe);
1496 1511          return (FALSE);
  
    | 
      ↓ open down ↓ | 
    44 lines elided | 
    
      ↑ open up ↑ | 
  
1497 1512  }
1498 1513  
1499 1514  /*
1500 1515   * Check if the file is delegated in the case of a v2 or v3 access.
1501 1516   * Return TRUE if it is delegated which in turn means that v2 should
1502 1517   * drop the request and in the case of v3 JUKEBOX should be returned.
1503 1518   */
1504 1519  bool_t
1505 1520  rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1506 1521  {
     1522 +        nfs4_srv_t *nsrv4;
1507 1523          rfs4_file_t *fp;
1508 1524          bool_t create = FALSE;
1509 1525          bool_t rc = FALSE;
1510 1526  
1511      -        rfs4_hold_deleg_policy();
     1527 +        nsrv4 = nfs4_get_srv();
     1528 +        rfs4_hold_deleg_policy(nsrv4);
1512 1529  
1513 1530          /* Is delegation enabled? */
1514      -        if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
     1531 +        if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1515 1532                  fp = rfs4_findfile(vp, NULL, &create);
1516 1533                  if (fp != NULL) {
1517 1534                          if (rfs4_check_delegated_byfp(mode, fp, trunc,
1518 1535                              TRUE, FALSE, NULL)) {
1519 1536                                  rc = TRUE;
1520 1537                          }
1521 1538                          rfs4_file_rele(fp);
1522 1539                  }
1523 1540          }
1524      -        rfs4_rele_deleg_policy();
     1541 +        rfs4_rele_deleg_policy(nsrv4);
1525 1542          return (rc);
1526 1543  }
1527 1544  
1528 1545  /*
1529 1546   * Release a hold on the hold_grant counter which
1530 1547   * prevents delegation from being granted while a remove
1531 1548   * or a rename is in progress.
1532 1549   */
1533 1550  void
1534 1551  rfs4_clear_dont_grant(rfs4_file_t *fp)
1535 1552  {
1536      -        if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
     1553 +        nfs4_srv_t *nsrv4 = nfs4_get_srv();
     1554 +
     1555 +        if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1537 1556                  return;
1538 1557          rfs4_dbe_lock(fp->rf_dbe);
1539 1558          ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1540 1559          fp->rf_dinfo.rd_hold_grant--;
1541 1560          fp->rf_dinfo.rd_time_rm_delayed = 0;
1542 1561          rfs4_dbe_unlock(fp->rf_dbe);
1543 1562  }
1544 1563  
1545 1564  /*
1546 1565   * State support for delegation.
1547 1566   * Set the state delegation type for this state;
1548 1567   * This routine is called from open via rfs4_grant_delegation and the entry
1549 1568   * locks on sp and sp->rs_finfo are assumed.
1550 1569   */
1551 1570  static rfs4_deleg_state_t *
1552 1571  rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1553 1572  {
1554 1573          rfs4_file_t *fp = sp->rs_finfo;
1555 1574          bool_t create = TRUE;
1556 1575          rfs4_deleg_state_t *dsp;
1557 1576          vnode_t *vp;
1558 1577          int open_prev = *recall;
1559 1578          int ret;
1560 1579          int fflags = 0;
1561 1580  
1562 1581          ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1563 1582          ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1564 1583  
1565 1584          /* Shouldn't happen */
1566 1585          if (fp->rf_dinfo.rd_recall_count != 0 ||
1567 1586              (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1568 1587              dtype != OPEN_DELEGATE_READ)) {
1569 1588                  return (NULL);
1570 1589          }
1571 1590  
1572 1591          /* Unlock to avoid deadlock */
1573 1592          rfs4_dbe_unlock(fp->rf_dbe);
1574 1593          rfs4_dbe_unlock(sp->rs_dbe);
1575 1594  
1576 1595          dsp = rfs4_finddeleg(sp, &create);
1577 1596  
1578 1597          rfs4_dbe_lock(sp->rs_dbe);
1579 1598          rfs4_dbe_lock(fp->rf_dbe);
1580 1599  
1581 1600          if (dsp == NULL)
1582 1601                  return (NULL);
1583 1602  
1584 1603          /*
1585 1604           * It is possible that since we dropped the lock
1586 1605           * in order to call finddeleg, the rfs4_file_t
1587 1606           * was marked such that we should not grant a
1588 1607           * delegation, if so bail out.
1589 1608           */
1590 1609          if (fp->rf_dinfo.rd_hold_grant > 0) {
1591 1610                  rfs4_deleg_state_rele(dsp);
1592 1611                  return (NULL);
1593 1612          }
1594 1613  
1595 1614          if (create == FALSE) {
1596 1615                  if (sp->rs_owner->ro_client == dsp->rds_client &&
1597 1616                      dsp->rds_dtype == dtype) {
1598 1617                          return (dsp);
1599 1618                  } else {
1600 1619                          rfs4_deleg_state_rele(dsp);
1601 1620                          return (NULL);
1602 1621                  }
1603 1622          }
1604 1623  
1605 1624          /*
1606 1625           * Check that this file has not been delegated to another
1607 1626           * client
1608 1627           */
1609 1628          if (fp->rf_dinfo.rd_recall_count != 0 ||
1610 1629              fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1611 1630              (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1612 1631              dtype != OPEN_DELEGATE_READ)) {
1613 1632                  rfs4_deleg_state_rele(dsp);
1614 1633                  return (NULL);
1615 1634          }
1616 1635  
1617 1636          vp = fp->rf_vp;
1618 1637          /* vnevent_support returns 0 if file system supports vnevents */
1619 1638          if (vnevent_support(vp, NULL)) {
1620 1639                  rfs4_deleg_state_rele(dsp);
1621 1640                  return (NULL);
1622 1641          }
1623 1642  
1624 1643          /* Calculate the fflags for this OPEN. */
1625 1644          if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1626 1645                  fflags |= FREAD;
1627 1646          if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1628 1647                  fflags |= FWRITE;
1629 1648  
1630 1649          *recall = 0;
1631 1650          /*
1632 1651           * Before granting a delegation we need to know if anyone else has
1633 1652           * opened the file in a conflicting mode.  However, first we need to
1634 1653           * know how we opened the file to check the counts properly.
1635 1654           */
1636 1655          if (dtype == OPEN_DELEGATE_READ) {
1637 1656                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1638 1657                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1639 1658                      vn_is_mapped(vp, V_WRITE)) {
1640 1659                          if (open_prev) {
1641 1660                                  *recall = 1;
1642 1661                          } else {
1643 1662                                  rfs4_deleg_state_rele(dsp);
1644 1663                                  return (NULL);
1645 1664                          }
1646 1665                  }
1647 1666                  ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1648 1667                      rfs4_mon_hold, rfs4_mon_rele);
1649 1668                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1650 1669                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1651 1670                      vn_is_mapped(vp, V_WRITE)) {
1652 1671                          if (open_prev) {
1653 1672                                  *recall = 1;
1654 1673                          } else {
1655 1674                                  (void) fem_uninstall(vp, deleg_rdops,
1656 1675                                      (void *)fp);
1657 1676                                  rfs4_deleg_state_rele(dsp);
1658 1677                                  return (NULL);
1659 1678                          }
1660 1679                  }
1661 1680                  /*
1662 1681                   * Because a client can hold onto a delegation after the
1663 1682                   * file has been closed, we need to keep track of the
1664 1683                   * access to this file.  Otherwise the CIFS server would
1665 1684                   * not know about the client accessing the file and could
1666 1685                   * inappropriately grant an OPLOCK.
1667 1686                   * fem_install() returns EBUSY when asked to install a
1668 1687                   * OPUNIQ monitor more than once.  Therefore, check the
1669 1688                   * return code because we only want this done once.
1670 1689                   */
1671 1690                  if (ret == 0)
1672 1691                          vn_open_upgrade(vp, FREAD);
1673 1692          } else { /* WRITE */
1674 1693                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1675 1694                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1676 1695                      ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1677 1696                      (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1678 1697                      vn_is_mapped(vp, V_RDORWR)) {
1679 1698                          if (open_prev) {
1680 1699                                  *recall = 1;
1681 1700                          } else {
1682 1701                                  rfs4_deleg_state_rele(dsp);
1683 1702                                  return (NULL);
1684 1703                          }
1685 1704                  }
1686 1705                  ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1687 1706                      rfs4_mon_hold, rfs4_mon_rele);
1688 1707                  if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1689 1708                      (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1690 1709                      ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1691 1710                      (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1692 1711                      vn_is_mapped(vp, V_RDORWR)) {
1693 1712                          if (open_prev) {
1694 1713                                  *recall = 1;
1695 1714                          } else {
1696 1715                                  (void) fem_uninstall(vp, deleg_wrops,
1697 1716                                      (void *)fp);
1698 1717                                  rfs4_deleg_state_rele(dsp);
1699 1718                                  return (NULL);
1700 1719                          }
1701 1720                  }
1702 1721                  /*
1703 1722                   * Because a client can hold onto a delegation after the
1704 1723                   * file has been closed, we need to keep track of the
1705 1724                   * access to this file.  Otherwise the CIFS server would
1706 1725                   * not know about the client accessing the file and could
1707 1726                   * inappropriately grant an OPLOCK.
1708 1727                   * fem_install() returns EBUSY when asked to install a
1709 1728                   * OPUNIQ monitor more than once.  Therefore, check the
1710 1729                   * return code because we only want this done once.
1711 1730                   */
1712 1731                  if (ret == 0)
1713 1732                          vn_open_upgrade(vp, FREAD|FWRITE);
1714 1733          }
1715 1734          /* Place on delegation list for file */
1716 1735          ASSERT(!list_link_active(&dsp->rds_node));
1717 1736          list_insert_tail(&fp->rf_delegstatelist, dsp);
1718 1737  
1719 1738          dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1720 1739  
1721 1740          /* Update delegation stats for this file */
1722 1741          fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1723 1742  
1724 1743          /* reset since this is a new delegation */
1725 1744          fp->rf_dinfo.rd_conflicted_client = 0;
1726 1745          fp->rf_dinfo.rd_ever_recalled = FALSE;
1727 1746  
1728 1747          if (dtype == OPEN_DELEGATE_READ)
1729 1748                  fp->rf_dinfo.rd_rdgrants++;
1730 1749          else
1731 1750                  fp->rf_dinfo.rd_wrgrants++;
1732 1751  
1733 1752          return (dsp);
1734 1753  }
1735 1754  
1736 1755  /*
1737 1756   * State routine for the server when a delegation is returned.
1738 1757   */
1739 1758  void
1740 1759  rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1741 1760  {
1742 1761          rfs4_file_t *fp = dsp->rds_finfo;
1743 1762          open_delegation_type4 dtypewas;
1744 1763  
1745 1764          rfs4_dbe_lock(fp->rf_dbe);
1746 1765  
1747 1766          /* nothing to do if no longer on list */
1748 1767          if (!list_link_active(&dsp->rds_node)) {
1749 1768                  rfs4_dbe_unlock(fp->rf_dbe);
1750 1769                  return;
1751 1770          }
1752 1771  
1753 1772          /* Remove state from recall list */
1754 1773          list_remove(&fp->rf_delegstatelist, dsp);
1755 1774  
1756 1775          if (list_is_empty(&fp->rf_delegstatelist)) {
1757 1776                  dtypewas = fp->rf_dinfo.rd_dtype;
1758 1777                  fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1759 1778                  rfs4_dbe_cv_broadcast(fp->rf_dbe);
1760 1779  
1761 1780                  /* if file system was unshared, the vp will be NULL */
1762 1781                  if (fp->rf_vp != NULL) {
1763 1782                          /*
1764 1783                           * Once a delegation is no longer held by any client,
1765 1784                           * the monitor is uninstalled.  At this point, the
1766 1785                           * client must send OPEN otw, so we don't need the
1767 1786                           * reference on the vnode anymore.  The open
1768 1787                           * downgrade removes the reference put on earlier.
1769 1788                           */
1770 1789                          if (dtypewas == OPEN_DELEGATE_READ) {
1771 1790                                  (void) fem_uninstall(fp->rf_vp, deleg_rdops,
1772 1791                                      (void *)fp);
1773 1792                                  vn_open_downgrade(fp->rf_vp, FREAD);
1774 1793                          } else if (dtypewas == OPEN_DELEGATE_WRITE) {
1775 1794                                  (void) fem_uninstall(fp->rf_vp, deleg_wrops,
1776 1795                                      (void *)fp);
1777 1796                                  vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1778 1797                          }
1779 1798                  }
1780 1799          }
1781 1800  
1782 1801          switch (dsp->rds_dtype) {
1783 1802          case OPEN_DELEGATE_READ:
1784 1803                  fp->rf_dinfo.rd_rdgrants--;
1785 1804                  break;
1786 1805          case OPEN_DELEGATE_WRITE:
1787 1806                  fp->rf_dinfo.rd_wrgrants--;
1788 1807                  break;
1789 1808          default:
1790 1809                  break;
1791 1810          }
1792 1811  
1793 1812          /* used in the policy decision */
1794 1813          fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1795 1814  
1796 1815          /*
1797 1816           * reset the time_recalled field so future delegations are not
1798 1817           * accidentally revoked
1799 1818           */
1800 1819          if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1801 1820                  fp->rf_dinfo.rd_time_recalled = 0;
1802 1821  
1803 1822          rfs4_dbe_unlock(fp->rf_dbe);
1804 1823  
1805 1824          rfs4_dbe_lock(dsp->rds_dbe);
1806 1825  
1807 1826          dsp->rds_dtype = OPEN_DELEGATE_NONE;
1808 1827  
1809 1828          if (revoked == TRUE)
1810 1829                  dsp->rds_time_revoked = gethrestime_sec();
1811 1830  
1812 1831          rfs4_dbe_invalidate(dsp->rds_dbe);
1813 1832  
1814 1833          rfs4_dbe_unlock(dsp->rds_dbe);
1815 1834  
1816 1835          if (revoked == TRUE) {
1817 1836                  rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1818 1837                  dsp->rds_client->rc_deleg_revoked++;    /* observability */
1819 1838                  rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1820 1839          }
1821 1840  }
1822 1841  
1823 1842  static void
1824 1843  rfs4_revoke_file(rfs4_file_t *fp)
1825 1844  {
1826 1845          rfs4_deleg_state_t *dsp;
1827 1846  
1828 1847          /*
1829 1848           * The lock for rfs4_file_t must be held when traversing the
1830 1849           * delegation list but that lock needs to be released to call
1831 1850           * rfs4_return_deleg()
1832 1851           */
1833 1852          rfs4_dbe_lock(fp->rf_dbe);
1834 1853          while (dsp = list_head(&fp->rf_delegstatelist)) {
1835 1854                  rfs4_dbe_hold(dsp->rds_dbe);
1836 1855                  rfs4_dbe_unlock(fp->rf_dbe);
1837 1856                  rfs4_return_deleg(dsp, TRUE);
1838 1857                  rfs4_deleg_state_rele(dsp);
1839 1858                  rfs4_dbe_lock(fp->rf_dbe);
1840 1859          }
1841 1860          rfs4_dbe_unlock(fp->rf_dbe);
1842 1861  }
1843 1862  
1844 1863  /*
1845 1864   * A delegation is assumed to be present on the file associated with
1846 1865   * "sp".  Check to see if the delegation matches is associated with
1847 1866   * the same client as referenced by "sp".  If it is not, TRUE is
1848 1867   * returned.  If the delegation DOES match the client (or no
1849 1868   * delegation is present), return FALSE.
1850 1869   * Assume the state entry and file entry are locked.
1851 1870   */
1852 1871  bool_t
1853 1872  rfs4_is_deleg(rfs4_state_t *sp)
1854 1873  {
1855 1874          rfs4_deleg_state_t *dsp;
1856 1875          rfs4_file_t *fp = sp->rs_finfo;
1857 1876          rfs4_client_t *cp = sp->rs_owner->ro_client;
1858 1877  
1859 1878          ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1860 1879          for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1861 1880              dsp = list_next(&fp->rf_delegstatelist, dsp)) {
  
    | 
      ↓ open down ↓ | 
    315 lines elided | 
    
      ↑ open up ↑ | 
  
1862 1881                  if (cp != dsp->rds_client) {
1863 1882                          return (TRUE);
1864 1883                  }
1865 1884          }
1866 1885          return (FALSE);
1867 1886  }
1868 1887  
1869 1888  void
1870 1889  rfs4_disable_delegation(void)
1871 1890  {
1872      -        mutex_enter(&rfs4_deleg_lock);
     1891 +        nfs4_srv_t *nsrv4;
     1892 +
     1893 +        nsrv4 = nfs4_get_srv();
     1894 +        mutex_enter(&nsrv4->deleg_lock);
1873 1895          rfs4_deleg_disabled++;
1874      -        mutex_exit(&rfs4_deleg_lock);
     1896 +        mutex_exit(&nsrv4->deleg_lock);
1875 1897  }
1876 1898  
1877 1899  void
1878 1900  rfs4_enable_delegation(void)
1879 1901  {
1880      -        mutex_enter(&rfs4_deleg_lock);
     1902 +        nfs4_srv_t *nsrv4;
     1903 +
     1904 +        nsrv4 = nfs4_get_srv();
     1905 +        mutex_enter(&nsrv4->deleg_lock);
1881 1906          ASSERT(rfs4_deleg_disabled > 0);
1882 1907          rfs4_deleg_disabled--;
1883      -        mutex_exit(&rfs4_deleg_lock);
     1908 +        mutex_exit(&nsrv4->deleg_lock);
1884 1909  }
1885 1910  
1886 1911  void
1887 1912  rfs4_mon_hold(void *arg)
1888 1913  {
1889 1914          rfs4_file_t *fp = arg;
1890 1915  
1891 1916          rfs4_dbe_hold(fp->rf_dbe);
1892 1917  }
1893 1918  
1894 1919  void
1895 1920  rfs4_mon_rele(void *arg)
1896 1921  {
1897 1922          rfs4_file_t *fp = arg;
1898 1923  
1899 1924          rfs4_dbe_rele_nolock(fp->rf_dbe);
1900 1925  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX