Print this page
    
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/nfs/nfs4_callback.c
          +++ new/usr/src/uts/common/fs/nfs/nfs4_callback.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27   27  /* All Rights Reserved */
  28   28  
  29   29  #include <sys/param.h>
  30   30  #include <sys/types.h>
  31   31  #include <sys/systm.h>
  32   32  #include <sys/cred.h>
  33   33  #include <sys/vfs.h>
  34   34  #include <sys/vnode.h>
  35   35  #include <sys/pathname.h>
  36   36  #include <sys/sysmacros.h>
  37   37  #include <sys/kmem.h>
  38   38  #include <sys/kstat.h>
  39   39  #include <sys/mkdev.h>
  40   40  #include <sys/mount.h>
  41   41  #include <sys/statvfs.h>
  42   42  #include <sys/errno.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/cmn_err.h>
  45   45  #include <sys/utsname.h>
  46   46  #include <sys/bootconf.h>
  47   47  #include <sys/modctl.h>
  48   48  #include <sys/acl.h>
  49   49  #include <sys/flock.h>
  50   50  #include <sys/kstr.h>
  51   51  #include <sys/stropts.h>
  52   52  #include <sys/strsubr.h>
  53   53  #include <sys/atomic.h>
  54   54  #include <sys/disp.h>
  55   55  #include <sys/policy.h>
  56   56  #include <sys/list.h>
  57   57  #include <sys/zone.h>
  58   58  
  59   59  #include <rpc/types.h>
  60   60  #include <rpc/auth.h>
  61   61  #include <rpc/rpcsec_gss.h>
  62   62  #include <rpc/clnt.h>
  63   63  #include <rpc/xdr.h>
  64   64  
  65   65  #include <nfs/nfs.h>
  66   66  #include <nfs/nfs_clnt.h>
  67   67  #include <nfs/mount.h>
  68   68  #include <nfs/nfs_acl.h>
  69   69  
  70   70  #include <fs/fs_subr.h>
  71   71  
  72   72  #include <nfs/nfs4.h>
  73   73  #include <nfs/rnode4.h>
  74   74  #include <nfs/nfs4_clnt.h>
  75   75  #include <nfs/nfssys.h>
  76   76  
  77   77  #ifdef  DEBUG
  78   78  /*
  79   79   * These are "special" state IDs and file handles that
  80   80   * match any delegation state ID or file handled.  This
  81   81   * is for testing purposes only.
  82   82   */
  83   83  
  84   84  stateid4 nfs4_deleg_any = { 0x7FFFFFF0 };
  85   85  char nfs4_deleg_fh[] = "\0377\0376\0375\0374";
  86   86  nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh };
  87   87  nfsstat4 cb4_getattr_fail = NFS4_OK;
  88   88  nfsstat4 cb4_recall_fail = NFS4_OK;
  89   89  
  90   90  int nfs4_callback_debug;
  91   91  int nfs4_recall_debug;
  92   92  int nfs4_drat_debug;
  93   93  
  94   94  #endif
  95   95  
  96   96  #define CB_NOTE(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
  97   97  #define CB_WARN(x)      NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
  98   98  #define CB_WARN1(x, y)  NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
  99   99  
 100  100  enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE;
 101  101  
 102  102  static zone_key_t nfs4_callback_zone_key;
 103  103  
 104  104  /*
 105  105   * NFS4_MAPSIZE is the number of bytes we are willing to consume
 106  106   * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
 107  107   * style delegation.
 108  108   */
 109  109  
 110  110  #define NFS4_MAPSIZE    8192
 111  111  #define NFS4_MAPWORDS   NFS4_MAPSIZE/sizeof (uint_t)
 112  112  #define NbPW            (NBBY*sizeof (uint_t))
 113  113  
 114  114  static int nfs4_num_prognums = 1024;
 115  115  static SVC_CALLOUT_TABLE nfs4_cb_sct;
 116  116  
 117  117  struct nfs4_dnode {
 118  118          list_node_t     linkage;
 119  119          rnode4_t        *rnodep;
 120  120          int             flags;          /* Flags for nfs4delegreturn_impl() */
 121  121  };
 122  122  
 123  123  static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = {
 124  124          { "delegations",        KSTAT_DATA_UINT64 },
 125  125          { "cb_getattr",         KSTAT_DATA_UINT64 },
 126  126          { "cb_recall",          KSTAT_DATA_UINT64 },
 127  127          { "cb_null",            KSTAT_DATA_UINT64 },
 128  128          { "cb_dispatch",        KSTAT_DATA_UINT64 },
 129  129          { "delegaccept_r",      KSTAT_DATA_UINT64 },
 130  130          { "delegaccept_rw",     KSTAT_DATA_UINT64 },
 131  131          { "delegreturn",        KSTAT_DATA_UINT64 },
 132  132          { "callbacks",          KSTAT_DATA_UINT64 },
 133  133          { "claim_cur",          KSTAT_DATA_UINT64 },
 134  134          { "claim_cur_ok",       KSTAT_DATA_UINT64 },
 135  135          { "recall_trunc",       KSTAT_DATA_UINT64 },
 136  136          { "recall_failed",      KSTAT_DATA_UINT64 },
 137  137          { "return_limit_write", KSTAT_DATA_UINT64 },
 138  138          { "return_limit_addmap", KSTAT_DATA_UINT64 },
 139  139          { "deleg_recover",      KSTAT_DATA_UINT64 },
 140  140          { "cb_illegal",         KSTAT_DATA_UINT64 }
 141  141  };
 142  142  
 143  143  struct nfs4_cb_port {
 144  144          list_node_t             linkage; /* linkage into per-zone port list */
 145  145          char                    netid[KNC_STRSIZE];
 146  146          char                    uaddr[KNC_STRSIZE];
 147  147          char                    protofmly[KNC_STRSIZE];
 148  148          char                    proto[KNC_STRSIZE];
 149  149  };
 150  150  
 151  151  static int cb_getattr_bytes;
 152  152  
 153  153  struct cb_recall_pass {
 154  154          rnode4_t        *rp;
 155  155          int             flags;          /* Flags for nfs4delegreturn_impl() */
 156  156          bool_t          truncate;
 157  157  };
 158  158  
 159  159  static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int);
 160  160  static void nfs4delegreturn_thread(struct cb_recall_pass *);
 161  161  static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *,
 162  162      int);
 163  163  static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int);
 164  164  static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int);
 165  165  static int nfs4delegreturn_impl(rnode4_t *, int,
 166  166      struct nfs4_callback_globals *);
 167  167  static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *,
 168  168      struct nfs4_callback_globals *);
 169  169  
 170  170  static void
 171  171  cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 172  172      struct compound_state *cs, struct nfs4_callback_globals *ncg)
 173  173  {
 174  174          CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr;
 175  175          CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr;
 176  176          rnode4_t *rp;
 177  177          vnode_t *vp;
 178  178          bool_t found = FALSE;
 179  179          struct nfs4_server *sp;
 180  180          struct fattr4 *fap;
 181  181          rpc_inline_t *fdata;
 182  182          long mapcnt;
 183  183          fattr4_change change;
 184  184          fattr4_size size;
 185  185          uint_t rflag;
 186  186  
 187  187          ncg->nfs4_callback_stats.cb_getattr.value.ui64++;
 188  188  
 189  189  #ifdef DEBUG
 190  190          /*
 191  191           * error injection hook: set cb_getattr_fail global to
 192  192           * NFS4 pcol error to be returned
 193  193           */
 194  194          if (cb4_getattr_fail != NFS4_OK) {
 195  195                  *cs->statusp = resp->status = cb4_getattr_fail;
 196  196                  return;
 197  197          }
 198  198  #endif
 199  199  
 200  200          resp->obj_attributes.attrmask = 0;
 201  201  
 202  202          mutex_enter(&ncg->nfs4_cb_lock);
 203  203          sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 204  204          mutex_exit(&ncg->nfs4_cb_lock);
 205  205  
 206  206          if (nfs4_server_vlock(sp, 0) == FALSE) {
 207  207  
 208  208                  CB_WARN("cb_getattr: cannot find server\n");
 209  209  
 210  210                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 211  211                  return;
 212  212          }
 213  213  
 214  214          /*
 215  215           * In cb_compound, callback_ident was validated against rq_prog,
 216  216           * but we couldn't verify that it was set to the value we provided
 217  217           * at setclientid time (because we didn't have server struct yet).
 218  218           * Now we have the server struct, but don't have callback_ident
 219  219           * handy.  So, validate server struct program number against req
 220  220           * RPC's prog number.  At this point, we know the RPC prog num
 221  221           * is valid (else we wouldn't be here); however, we don't know
 222  222           * that it was the prog number we supplied to this server at
 223  223           * setclientid time.  If the prog numbers aren't equivalent, then
 224  224           * log the problem and fail the request because either cbserv
 225  225           * and/or cbclient are confused.  This will probably never happen.
 226  226           */
 227  227          if (sp->s_program != req->rq_prog) {
 228  228  #ifdef DEBUG
 229  229                  zcmn_err(getzoneid(), CE_WARN,
 230  230                      "cb_getattr: wrong server program number srv=%d req=%d\n",
 231  231                      sp->s_program, req->rq_prog);
 232  232  #else
 233  233                  zcmn_err(getzoneid(), CE_WARN,
 234  234                      "cb_getattr: wrong server program number\n");
 235  235  #endif
 236  236                  mutex_exit(&sp->s_lock);
 237  237                  nfs4_server_rele(sp);
 238  238                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 239  239                  return;
 240  240          }
 241  241  
 242  242          /*
 243  243           * Search the delegation list for a matching file handle;
 244  244           * mutex on sp prevents the list from changing.
 245  245           */
 246  246  
 247  247          rp = list_head(&sp->s_deleg_list);
 248  248          for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 249  249                  nfs4_fhandle_t fhandle;
 250  250  
 251  251                  sfh4_copyval(rp->r_fh, &fhandle);
 252  252  
 253  253                  if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 254  254                      bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 255  255                      fhandle.fh_len) == 0)) {
 256  256  
 257  257                          found = TRUE;
 258  258                          break;
 259  259                  }
 260  260  #ifdef  DEBUG
 261  261                  if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len &&
 262  262                      bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val,
 263  263                      args->fh.nfs_fh4_len) == 0) {
 264  264  
 265  265                          found = TRUE;
 266  266                          break;
 267  267                  }
 268  268  #endif
 269  269          }
 270  270  
 271  271          /*
 272  272           * VN_HOLD the vnode before releasing s_lock to guarantee
 273  273           * we have a valid vnode reference.
 274  274           */
 275  275          if (found == TRUE) {
 276  276                  vp = RTOV4(rp);
 277  277                  VN_HOLD(vp);
 278  278          }
 279  279  
 280  280          mutex_exit(&sp->s_lock);
 281  281          nfs4_server_rele(sp);
 282  282  
 283  283          if (found == FALSE) {
 284  284  
 285  285                  CB_WARN("cb_getattr: bad fhandle\n");
 286  286  
 287  287                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 288  288                  return;
 289  289          }
 290  290  
 291  291          /*
 292  292           * Figure out which attributes the server wants.  We only
 293  293           * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
 294  294           */
 295  295          fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP);
 296  296  
 297  297          /*
 298  298           * Don't actually need to create XDR to encode these
 299  299           * simple data structures.
 300  300           * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
 301  301           */
 302  302          fap = &resp->obj_attributes;
 303  303  
 304  304          fap->attrmask = 0;
 305  305          /* attrlist4_len starts at 0 and increases as attrs are processed */
 306  306          fap->attrlist4 = (char *)fdata;
 307  307          fap->attrlist4_len = 0;
 308  308  
 309  309          /* don't supply attrs if request was zero */
 310  310          if (args->attr_request != 0) {
 311  311                  if (args->attr_request & FATTR4_CHANGE_MASK) {
 312  312                          /*
 313  313                           * If the file is mmapped, then increment the change
 314  314                           * attribute and return it.  This will guarantee that
 315  315                           * the server will perceive that the file has changed
 316  316                           * if there is any chance that the client application
 317  317                           * has changed it.  Otherwise, just return the change
 318  318                           * attribute as it has been updated by nfs4write_deleg.
 319  319                           */
 320  320  
 321  321                          mutex_enter(&rp->r_statelock);
 322  322                          mapcnt = rp->r_mapcnt;
 323  323                          rflag = rp->r_flags;
 324  324                          mutex_exit(&rp->r_statelock);
 325  325  
 326  326                          mutex_enter(&rp->r_statev4_lock);
 327  327                          /*
 328  328                           * If object mapped, then always return new change.
 329  329                           * Otherwise, return change if object has dirty
 330  330                           * pages.  If object doesn't have any dirty pages,
 331  331                           * then all changes have been pushed to server, so
 332  332                           * reset change to grant change.
 333  333                           */
 334  334                          if (mapcnt)
 335  335                                  rp->r_deleg_change++;
 336  336                          else if (! (rflag & R4DIRTY))
 337  337                                  rp->r_deleg_change = rp->r_deleg_change_grant;
 338  338                          change = rp->r_deleg_change;
 339  339                          mutex_exit(&rp->r_statev4_lock);
 340  340  
 341  341                          /*
 342  342                           * Use inline XDR code directly, we know that we
 343  343                           * going to a memory buffer and it has enough
 344  344                           * space so it cannot fail.
 345  345                           */
 346  346                          IXDR_PUT_U_HYPER(fdata, change);
 347  347                          fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 348  348                          fap->attrmask |= FATTR4_CHANGE_MASK;
 349  349                  }
 350  350  
 351  351                  if (args->attr_request & FATTR4_SIZE_MASK) {
 352  352                          /*
 353  353                           * Use an atomic add of 0 to fetch a consistent view
 354  354                           * of r_size; this avoids having to take rw_lock
 355  355                           * which could cause a deadlock.
 356  356                           */
 357  357                          size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0);
 358  358  
 359  359                          /*
 360  360                           * Use inline XDR code directly, we know that we
 361  361                           * going to a memory buffer and it has enough
 362  362                           * space so it cannot fail.
 363  363                           */
 364  364                          IXDR_PUT_U_HYPER(fdata, size);
 365  365                          fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
 366  366                          fap->attrmask |= FATTR4_SIZE_MASK;
 367  367                  }
 368  368          }
 369  369  
 370  370          VN_RELE(vp);
 371  371  
 372  372          *cs->statusp = resp->status = NFS4_OK;
 373  373  }
 374  374  
 375  375  static void
 376  376  cb_getattr_free(nfs_cb_resop4 *resop)
 377  377  {
 378  378          if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4)
 379  379                  kmem_free(resop->nfs_cb_resop4_u.opcbgetattr.
 380  380                      obj_attributes.attrlist4, cb_getattr_bytes);
 381  381  }
 382  382  
 383  383  static void
 384  384  cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 385  385      struct compound_state *cs, struct nfs4_callback_globals *ncg)
 386  386  {
 387  387          CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall;
 388  388          CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall;
 389  389          rnode4_t *rp;
 390  390          vnode_t *vp;
 391  391          struct nfs4_server *sp;
 392  392          bool_t found = FALSE;
 393  393  
 394  394          ncg->nfs4_callback_stats.cb_recall.value.ui64++;
 395  395  
 396  396          ASSERT(req->rq_prog >= NFS4_CALLBACK);
 397  397          ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 398  398  
 399  399  #ifdef DEBUG
 400  400          /*
 401  401           * error injection hook: set cb_recall_fail global to
 402  402           * NFS4 pcol error to be returned
 403  403           */
 404  404          if (cb4_recall_fail != NFS4_OK) {
 405  405                  *cs->statusp = resp->status = cb4_recall_fail;
 406  406                  return;
 407  407          }
 408  408  #endif
 409  409  
 410  410          mutex_enter(&ncg->nfs4_cb_lock);
 411  411          sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 412  412          mutex_exit(&ncg->nfs4_cb_lock);
 413  413  
 414  414          if (nfs4_server_vlock(sp, 0) == FALSE) {
 415  415  
 416  416                  CB_WARN("cb_recall: cannot find server\n");
 417  417  
 418  418                  *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
 419  419                  return;
 420  420          }
 421  421  
 422  422          /*
 423  423           * Search the delegation list for a matching file handle
 424  424           * AND stateid; mutex on sp prevents the list from changing.
 425  425           */
 426  426  
 427  427          rp = list_head(&sp->s_deleg_list);
 428  428          for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
 429  429                  mutex_enter(&rp->r_statev4_lock);
 430  430  
 431  431                  /* check both state id and file handle! */
 432  432  
 433  433                  if ((bcmp(&rp->r_deleg_stateid, &args->stateid,
 434  434                      sizeof (stateid4)) == 0)) {
 435  435                          nfs4_fhandle_t fhandle;
 436  436  
 437  437                          sfh4_copyval(rp->r_fh, &fhandle);
 438  438                          if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
 439  439                              bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
 440  440                              fhandle.fh_len) == 0)) {
 441  441  
 442  442                                  found = TRUE;
 443  443                                  break;
 444  444                          } else {
 445  445  #ifdef  DEBUG
 446  446                                  CB_WARN("cb_recall: stateid OK, bad fh");
 447  447  #endif
 448  448                          }
 449  449                  }
 450  450  #ifdef  DEBUG
 451  451                  if (bcmp(&args->stateid, &nfs4_deleg_any,
 452  452                      sizeof (stateid4)) == 0) {
 453  453  
 454  454                          found = TRUE;
 455  455                          break;
 456  456                  }
 457  457  #endif
 458  458                  mutex_exit(&rp->r_statev4_lock);
 459  459          }
 460  460  
 461  461          /*
 462  462           * VN_HOLD the vnode before releasing s_lock to guarantee
 463  463           * we have a valid vnode reference.  The async thread will
 464  464           * release the hold when it's done.
 465  465           */
 466  466          if (found == TRUE) {
 467  467                  mutex_exit(&rp->r_statev4_lock);
 468  468                  vp = RTOV4(rp);
 469  469                  VN_HOLD(vp);
 470  470          }
 471  471          mutex_exit(&sp->s_lock);
 472  472          nfs4_server_rele(sp);
 473  473  
 474  474          if (found == FALSE) {
 475  475  
 476  476                  CB_WARN("cb_recall: bad stateid\n");
 477  477  
 478  478                  *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
 479  479                  return;
 480  480          }
 481  481  
 482  482          /* Fire up a thread to do the delegreturn */
 483  483          nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN,
 484  484              args->truncate);
 485  485  
 486  486          *cs->statusp = resp->status = 0;
 487  487  }
 488  488  
 489  489  /* ARGSUSED */
 490  490  static void
 491  491  cb_recall_free(nfs_cb_resop4 *resop)
 492  492  {
 493  493          /* nothing to do here, cb_recall doesn't kmem_alloc */
 494  494  }
 495  495  
 496  496  /*
 497  497   * This function handles the CB_NULL proc call from an NFSv4 Server.
 498  498   *
 499  499   * We take note that the server has sent a CB_NULL for later processing
 500  500   * in the recovery logic. It is noted so we may pause slightly after the
 501  501   * setclientid and before reopening files. The pause is to allow the
 502  502   * NFSv4 Server time to receive the CB_NULL reply and adjust any of
 503  503   * its internal structures such that it has the opportunity to grant
 504  504   * delegations to reopened files.
 505  505   *
 506  506   */
 507  507  
 508  508  /* ARGSUSED */
 509  509  static void
 510  510  cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 511  511      struct nfs4_callback_globals *ncg)
 512  512  {
 513  513          struct nfs4_server *sp;
 514  514  
 515  515          ncg->nfs4_callback_stats.cb_null.value.ui64++;
 516  516  
 517  517          ASSERT(req->rq_prog >= NFS4_CALLBACK);
 518  518          ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
 519  519  
 520  520          mutex_enter(&ncg->nfs4_cb_lock);
 521  521          sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
 522  522          mutex_exit(&ncg->nfs4_cb_lock);
 523  523  
 524  524          if (nfs4_server_vlock(sp, 0) != FALSE) {
 525  525                  sp->s_flags |= N4S_CB_PINGED;
 526  526                  cv_broadcast(&sp->wait_cb_null);
 527  527                  mutex_exit(&sp->s_lock);
 528  528                  nfs4_server_rele(sp);
 529  529          }
 530  530  }
 531  531  
 532  532  /*
 533  533   * cb_illegal   args: void
 534  534   *              res : status (NFS4ERR_OP_CB_ILLEGAL)
 535  535   */
 536  536  /* ARGSUSED */
 537  537  static void
 538  538  cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
 539  539      struct compound_state *cs, struct nfs4_callback_globals *ncg)
 540  540  {
 541  541          CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal;
 542  542  
 543  543          ncg->nfs4_callback_stats.cb_illegal.value.ui64++;
 544  544          resop->resop = OP_CB_ILLEGAL;
 545  545          *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
 546  546  }
 547  547  
 548  548  static void
 549  549  cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
 550  550      struct nfs4_callback_globals *ncg)
 551  551  {
 552  552          uint_t i;
  
    | 
      ↓ open down ↓ | 
    552 lines elided | 
    
      ↑ open up ↑ | 
  
 553  553          struct compound_state cs;
 554  554          nfs_cb_argop4 *argop;
 555  555          nfs_cb_resop4 *resop, *new_res;
 556  556          uint_t op;
 557  557  
 558  558          bzero(&cs, sizeof (cs));
 559  559          cs.statusp = &resp->status;
 560  560          cs.cont = TRUE;
 561  561  
 562  562          /*
 563      -         * Form a reply tag by copying over the reqeuest tag.
      563 +         * Form a reply tag by copying over the request tag.
 564  564           */
 565  565          resp->tag.utf8string_len = args->tag.utf8string_len;
 566      -        resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len,
 567      -            KM_SLEEP);
 568      -        bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
 569      -            args->tag.utf8string_len);
      566 +        if (args->tag.utf8string_len != 0) {
      567 +                resp->tag.utf8string_val =
      568 +                    kmem_alloc(resp->tag.utf8string_len, KM_SLEEP);
      569 +                bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
      570 +                    args->tag.utf8string_len);
      571 +        } else {
      572 +                resp->tag.utf8string_val = NULL;
      573 +        }
 570  574  
 571  575          /*
 572  576           * XXX for now, minorversion should be zero
 573  577           */
 574  578          if (args->minorversion != CB4_MINORVERSION) {
 575  579                  resp->array_len = 0;
 576  580                  resp->array = NULL;
 577  581                  resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
 578  582                  return;
 579  583          }
 580  584  
 581  585  #ifdef DEBUG
 582  586          /*
 583  587           * Verify callback_ident.  It doesn't really matter if it's wrong
 584  588           * because we don't really use callback_ident -- we use prog number
 585  589           * of the RPC request instead.  In this case, just print a DEBUG
 586  590           * console message to reveal brokenness of cbclient (at bkoff/cthon).
 587  591           */
 588  592          if (args->callback_ident != req->rq_prog)
 589  593                  zcmn_err(getzoneid(), CE_WARN,
 590  594                      "cb_compound: cb_client using wrong "
 591  595                      "callback_ident(%d), should be %d",
 592  596                      args->callback_ident, req->rq_prog);
 593  597  #endif
 594  598  
 595  599          resp->array_len = args->array_len;
 596  600          resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4),
 597  601              KM_SLEEP);
 598  602  
 599  603          for (i = 0; i < args->array_len && cs.cont; i++) {
 600  604  
 601  605                  argop = &args->array[i];
 602  606                  resop = &resp->array[i];
 603  607                  resop->resop = argop->argop;
 604  608                  op = (uint_t)resop->resop;
 605  609  
 606  610                  switch (op) {
 607  611  
 608  612                  case OP_CB_GETATTR:
 609  613  
 610  614                          cb_getattr(argop, resop, req, &cs, ncg);
 611  615                          break;
 612  616  
 613  617                  case OP_CB_RECALL:
 614  618  
 615  619                          cb_recall(argop, resop, req, &cs, ncg);
 616  620                          break;
 617  621  
 618  622                  case OP_CB_ILLEGAL:
 619  623  
 620  624                          /* fall through */
 621  625  
 622  626                  default:
 623  627                          /*
 624  628                           * Handle OP_CB_ILLEGAL and any undefined opcode.
 625  629                           * Currently, the XDR code will return BADXDR
 626  630                           * if cb op doesn't decode to legal value, so
 627  631                           * it really only handles OP_CB_ILLEGAL.
 628  632                           */
 629  633                          op = OP_CB_ILLEGAL;
 630  634                          cb_illegal(argop, resop, req, &cs, ncg);
 631  635                  }
 632  636  
 633  637                  if (*cs.statusp != NFS4_OK)
 634  638                          cs.cont = FALSE;
 635  639  
 636  640                  /*
 637  641                   * If not at last op, and if we are to stop, then
 638  642                   * compact the results array.
 639  643                   */
 640  644                  if ((i + 1) < args->array_len && !cs.cont) {
 641  645  
 642  646                          new_res = kmem_alloc(
 643  647                              (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP);
 644  648                          bcopy(resp->array,
 645  649                              new_res, (i+1) * sizeof (nfs_cb_resop4));
 646  650                          kmem_free(resp->array,
 647  651                              args->array_len * sizeof (nfs_cb_resop4));
 648  652  
 649  653                          resp->array_len =  i + 1;
 650  654                          resp->array = new_res;
 651  655                  }
 652  656          }
 653  657  
 654  658  }
 655  659  
 656  660  static void
 657  661  cb_compound_free(CB_COMPOUND4res *resp)
 658  662  {
 659  663          uint_t i, op;
 660  664          nfs_cb_resop4 *resop;
 661  665  
 662  666          if (resp->tag.utf8string_val) {
 663  667                  UTF8STRING_FREE(resp->tag)
 664  668          }
 665  669  
 666  670          for (i = 0; i < resp->array_len; i++) {
 667  671  
 668  672                  resop = &resp->array[i];
 669  673                  op = (uint_t)resop->resop;
 670  674  
 671  675                  switch (op) {
 672  676  
 673  677                  case OP_CB_GETATTR:
 674  678  
 675  679                          cb_getattr_free(resop);
 676  680                          break;
 677  681  
 678  682                  case OP_CB_RECALL:
 679  683  
 680  684                          cb_recall_free(resop);
 681  685                          break;
 682  686  
 683  687                  default:
 684  688                          break;
 685  689                  }
 686  690          }
 687  691  
 688  692          if (resp->array != NULL) {
 689  693                  kmem_free(resp->array,
 690  694                      resp->array_len * sizeof (nfs_cb_resop4));
 691  695          }
 692  696  }
 693  697  
 694  698  static void
 695  699  cb_dispatch(struct svc_req *req, SVCXPRT *xprt)
 696  700  {
 697  701          CB_COMPOUND4args args;
 698  702          CB_COMPOUND4res res;
 699  703          struct nfs4_callback_globals *ncg;
 700  704  
 701  705          bool_t (*xdr_args)(), (*xdr_res)();
 702  706          void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *,
 703  707              struct nfs4_callback_globals *);
 704  708          void (*freeproc)(CB_COMPOUND4res *);
 705  709  
 706  710          ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
 707  711          ASSERT(ncg != NULL);
 708  712  
 709  713          ncg->nfs4_callback_stats.cb_dispatch.value.ui64++;
 710  714  
 711  715          switch (req->rq_proc) {
 712  716          case CB_NULL:
 713  717                  xdr_args = xdr_void;
 714  718                  xdr_res = xdr_void;
 715  719                  proc = cb_null;
 716  720                  freeproc = NULL;
 717  721                  break;
 718  722  
 719  723          case CB_COMPOUND:
 720  724                  xdr_args = xdr_CB_COMPOUND4args_clnt;
 721  725                  xdr_res = xdr_CB_COMPOUND4res;
 722  726                  proc = cb_compound;
 723  727                  freeproc = cb_compound_free;
 724  728                  break;
 725  729  
 726  730          default:
 727  731                  CB_WARN("cb_dispatch: no proc\n");
 728  732                  svcerr_noproc(xprt);
 729  733                  return;
 730  734          }
 731  735  
 732  736          args.tag.utf8string_val = NULL;
 733  737          args.array = NULL;
 734  738  
 735  739          if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) {
 736  740  
 737  741                  CB_WARN("cb_dispatch: cannot getargs\n");
 738  742                  svcerr_decode(xprt);
 739  743                  return;
 740  744          }
 741  745  
 742  746          (*proc)(&args, &res, req, ncg);
 743  747  
 744  748          if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) {
 745  749  
 746  750                  CB_WARN("cb_dispatch: bad sendreply\n");
 747  751                  svcerr_systemerr(xprt);
 748  752          }
 749  753  
 750  754          if (freeproc)
 751  755                  (*freeproc)(&res);
 752  756  
 753  757          if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) {
 754  758  
 755  759                  CB_WARN("cb_dispatch: bad freeargs\n");
 756  760          }
 757  761  }
 758  762  
 759  763  static rpcprog_t
 760  764  nfs4_getnextprogram(struct nfs4_callback_globals *ncg)
 761  765  {
 762  766          int i, j;
 763  767  
 764  768          j = ncg->nfs4_program_hint;
 765  769          for (i = 0; i < nfs4_num_prognums; i++, j++) {
 766  770  
 767  771                  if (j >= nfs4_num_prognums)
 768  772                          j = 0;
 769  773  
 770  774                  if (ncg->nfs4prog2server[j] == NULL) {
 771  775                          ncg->nfs4_program_hint = j+1;
 772  776                          return (j+NFS4_CALLBACK);
 773  777                  }
 774  778          }
 775  779  
 776  780          return (0);
 777  781  }
 778  782  
 779  783  void
 780  784  nfs4callback_destroy(nfs4_server_t *np)
 781  785  {
 782  786          struct nfs4_callback_globals *ncg;
 783  787          int i;
 784  788  
 785  789          if (np->s_program == 0)
 786  790                  return;
 787  791  
 788  792          ncg = np->zone_globals;
 789  793          i = np->s_program - NFS4_CALLBACK;
 790  794  
 791  795          mutex_enter(&ncg->nfs4_cb_lock);
 792  796  
 793  797          ASSERT(ncg->nfs4prog2server[i] == np);
 794  798  
 795  799          ncg->nfs4prog2server[i] = NULL;
 796  800  
 797  801          if (i < ncg->nfs4_program_hint)
 798  802                  ncg->nfs4_program_hint = i;
 799  803  
 800  804          mutex_exit(&ncg->nfs4_cb_lock);
 801  805  }
 802  806  
 803  807  /*
 804  808   * nfs4_setport - This function saves a netid and univeral address for
 805  809   * the callback program.  These values will be used during setclientid.
 806  810   */
 807  811  static void
 808  812  nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto,
 809  813      struct nfs4_callback_globals *ncg)
 810  814  {
 811  815          struct nfs4_cb_port *p;
 812  816          bool_t found = FALSE;
 813  817  
 814  818          ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock));
 815  819  
 816  820          p = list_head(&ncg->nfs4_cb_ports);
 817  821          for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 818  822                  if (strcmp(p->netid, netid) == 0) {
 819  823                          found = TRUE;
 820  824                          break;
 821  825                  }
 822  826          }
 823  827          if (found == TRUE)
 824  828                  (void) strcpy(p->uaddr, uaddr);
 825  829          else {
 826  830                  p = kmem_alloc(sizeof (*p), KM_SLEEP);
 827  831  
 828  832                  (void) strcpy(p->uaddr, uaddr);
 829  833                  (void) strcpy(p->netid, netid);
 830  834                  (void) strcpy(p->protofmly, protofmly);
 831  835                  (void) strcpy(p->proto, proto);
 832  836                  list_insert_head(&ncg->nfs4_cb_ports, p);
 833  837          }
 834  838  }
 835  839  
 836  840  /*
 837  841   * nfs4_cb_args - This function is used to construct the callback
 838  842   * portion of the arguments needed for setclientid.
 839  843   */
 840  844  
 841  845  void
 842  846  nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args)
 843  847  {
 844  848          struct nfs4_cb_port *p;
 845  849          bool_t found = FALSE;
 846  850          rpcprog_t pgm;
 847  851          struct nfs4_callback_globals *ncg = np->zone_globals;
 848  852  
 849  853          /*
 850  854           * This server structure may already have a program number
 851  855           * assigned to it.  This happens when the client has to
 852  856           * re-issue SETCLIENTID.  Just re-use the information.
 853  857           */
 854  858          if (np->s_program >= NFS4_CALLBACK &&
 855  859              np->s_program < NFS4_CALLBACK + nfs4_num_prognums)
 856  860                  nfs4callback_destroy(np);
 857  861  
 858  862          mutex_enter(&ncg->nfs4_cb_lock);
 859  863  
 860  864          p = list_head(&ncg->nfs4_cb_ports);
 861  865          for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
 862  866                  if (strcmp(p->protofmly, knc->knc_protofmly) == 0 &&
 863  867                      strcmp(p->proto, knc->knc_proto) == 0) {
 864  868                          found = TRUE;
 865  869                          break;
 866  870                  }
 867  871          }
 868  872  
 869  873          if (found == FALSE) {
 870  874  
 871  875                  NFS4_DEBUG(nfs4_callback_debug,
 872  876                      (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n",
 873  877                      knc->knc_protofmly, knc->knc_proto));
 874  878  
 875  879                  args->callback.cb_program = 0;
 876  880                  args->callback.cb_location.r_netid = NULL;
 877  881                  args->callback.cb_location.r_addr = NULL;
 878  882                  args->callback_ident = 0;
 879  883                  mutex_exit(&ncg->nfs4_cb_lock);
 880  884                  return;
 881  885          }
 882  886  
 883  887          if ((pgm = nfs4_getnextprogram(ncg)) == 0) {
 884  888                  CB_WARN("nfs4_cb_args: out of program numbers\n");
 885  889  
 886  890                  args->callback.cb_program = 0;
 887  891                  args->callback.cb_location.r_netid = NULL;
 888  892                  args->callback.cb_location.r_addr = NULL;
 889  893                  args->callback_ident = 0;
 890  894                  mutex_exit(&ncg->nfs4_cb_lock);
 891  895                  return;
 892  896          }
 893  897  
 894  898          ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np;
 895  899          args->callback.cb_program = pgm;
 896  900          args->callback.cb_location.r_netid = p->netid;
 897  901          args->callback.cb_location.r_addr = p->uaddr;
 898  902          args->callback_ident = pgm;
 899  903  
 900  904          np->s_program = pgm;
 901  905  
 902  906          mutex_exit(&ncg->nfs4_cb_lock);
 903  907  }
 904  908  
 905  909  static int
 906  910  nfs4_dquery(struct nfs4_svc_args *arg, model_t model)
 907  911  {
 908  912          file_t *fp;
 909  913          vnode_t *vp;
 910  914          rnode4_t *rp;
 911  915          int error;
 912  916          STRUCT_HANDLE(nfs4_svc_args, uap);
 913  917  
 914  918          STRUCT_SET_HANDLE(uap, model, arg);
 915  919  
 916  920          if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 917  921                  return (EBADF);
 918  922  
 919  923          vp = fp->f_vnode;
 920  924  
 921  925          if (vp == NULL || vp->v_type != VREG ||
 922  926              !vn_matchops(vp, nfs4_vnodeops)) {
 923  927                  releasef(STRUCT_FGET(uap, fd));
 924  928                  return (EBADF);
 925  929          }
 926  930  
 927  931          rp = VTOR4(vp);
 928  932  
 929  933          /*
 930  934           * I can't convince myself that we need locking here.  The
 931  935           * rnode cannot disappear and the value returned is instantly
 932  936           * stale anway, so why bother?
 933  937           */
 934  938  
 935  939          error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type);
 936  940          releasef(STRUCT_FGET(uap, fd));
 937  941          return (error);
 938  942  }
 939  943  
 940  944  
 941  945  /*
 942  946   * NFS4 client system call.  This service does the
 943  947   * necessary initialization for the callback program.
 944  948   * This is fashioned after the server side interaction
 945  949   * between nfsd and the kernel.  On the client, the
 946  950   * mount command forks and the child process does the
 947  951   * necessary interaction with the kernel.
 948  952   *
 949  953   * uap->fd is the fd of an open transport provider
 950  954   */
 951  955  int
 952  956  nfs4_svc(struct nfs4_svc_args *arg, model_t model)
 953  957  {
 954  958          file_t *fp;
 955  959          int error;
 956  960          int readsize;
 957  961          char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE];
 958  962          char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE];
 959  963          size_t len;
 960  964          STRUCT_HANDLE(nfs4_svc_args, uap);
 961  965          struct netbuf addrmask;
 962  966          int cmd;
 963  967          SVCMASTERXPRT *cb_xprt;
 964  968          struct nfs4_callback_globals *ncg;
 965  969  
 966  970  #ifdef lint
 967  971          model = model;          /* STRUCT macros don't always refer to it */
 968  972  #endif
 969  973  
 970  974          STRUCT_SET_HANDLE(uap, model, arg);
 971  975  
 972  976          if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY)
 973  977                  return (nfs4_dquery(arg, model));
 974  978  
 975  979          if (secpolicy_nfs(CRED()) != 0)
 976  980                  return (EPERM);
 977  981  
 978  982          if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
 979  983                  return (EBADF);
 980  984  
 981  985          /*
 982  986           * Set read buffer size to rsize
 983  987           * and add room for RPC headers.
 984  988           */
 985  989          readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
 986  990          if (readsize < RPC_MAXDATASIZE)
 987  991                  readsize = RPC_MAXDATASIZE;
 988  992  
 989  993          error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
 990  994              KNC_STRSIZE, &len);
 991  995          if (error) {
 992  996                  releasef(STRUCT_FGET(uap, fd));
 993  997                  return (error);
 994  998          }
 995  999  
 996 1000          cmd = STRUCT_FGET(uap, cmd);
 997 1001  
 998 1002          if (cmd & NFS4_KRPC_START) {
 999 1003                  addrmask.len = STRUCT_FGET(uap, addrmask.len);
1000 1004                  addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
1001 1005                  addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
1002 1006                  error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
1003 1007                      addrmask.len);
1004 1008                  if (error) {
1005 1009                          releasef(STRUCT_FGET(uap, fd));
1006 1010                          kmem_free(addrmask.buf, addrmask.maxlen);
1007 1011                          return (error);
1008 1012                  }
1009 1013          }
1010 1014          else
1011 1015                  addrmask.buf = NULL;
1012 1016  
1013 1017          error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr,
1014 1018              sizeof (uaddr), &len);
1015 1019          if (error) {
1016 1020                  releasef(STRUCT_FGET(uap, fd));
1017 1021                  if (addrmask.buf)
1018 1022                          kmem_free(addrmask.buf, addrmask.maxlen);
1019 1023                  return (error);
1020 1024          }
1021 1025  
1022 1026          error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly,
1023 1027              sizeof (protofmly), &len);
1024 1028          if (error) {
1025 1029                  releasef(STRUCT_FGET(uap, fd));
1026 1030                  if (addrmask.buf)
1027 1031                          kmem_free(addrmask.buf, addrmask.maxlen);
1028 1032                  return (error);
1029 1033          }
1030 1034  
1031 1035          error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto,
1032 1036              sizeof (proto), &len);
1033 1037          if (error) {
1034 1038                  releasef(STRUCT_FGET(uap, fd));
1035 1039                  if (addrmask.buf)
1036 1040                          kmem_free(addrmask.buf, addrmask.maxlen);
1037 1041                  return (error);
1038 1042          }
1039 1043  
1040 1044          ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1041 1045          ASSERT(ncg != NULL);
1042 1046  
1043 1047          mutex_enter(&ncg->nfs4_cb_lock);
1044 1048          if (cmd & NFS4_SETPORT)
1045 1049                  nfs4_setport(buf, uaddr, protofmly, proto, ncg);
1046 1050  
1047 1051          if (cmd & NFS4_KRPC_START) {
1048 1052                  error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt,
1049 1053                      &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE);
1050 1054                  if (error) {
1051 1055                          CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1052 1056                              error);
1053 1057                          kmem_free(addrmask.buf, addrmask.maxlen);
1054 1058                  }
1055 1059          }
1056 1060  
1057 1061          mutex_exit(&ncg->nfs4_cb_lock);
1058 1062          releasef(STRUCT_FGET(uap, fd));
1059 1063          return (error);
1060 1064  }
1061 1065  
1062 1066  struct nfs4_callback_globals *
1063 1067  nfs4_get_callback_globals(void)
1064 1068  {
1065 1069          return (zone_getspecific(nfs4_callback_zone_key, nfs_zone()));
1066 1070  }
1067 1071  
1068 1072  static void *
1069 1073  nfs4_callback_init_zone(zoneid_t zoneid)
1070 1074  {
1071 1075          kstat_t *nfs4_callback_kstat;
1072 1076          struct nfs4_callback_globals *ncg;
1073 1077  
1074 1078          ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
1075 1079  
1076 1080          ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums *
1077 1081              sizeof (struct nfs4_server *), KM_SLEEP);
1078 1082  
1079 1083          /* initialize the dlist */
1080 1084          mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL);
1081 1085          list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode),
1082 1086              offsetof(struct nfs4_dnode, linkage));
1083 1087  
1084 1088          /* initialize cb_port list */
1085 1089          mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL);
1086 1090          list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port),
1087 1091              offsetof(struct nfs4_cb_port, linkage));
1088 1092  
1089 1093          /* get our own copy of the kstats */
1090 1094          bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats,
1091 1095              sizeof (nfs4_callback_stats_tmpl));
1092 1096          /* register "nfs:0:nfs4_callback_stats" for this zone */
1093 1097          if ((nfs4_callback_kstat =
1094 1098              kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1095 1099              KSTAT_TYPE_NAMED,
1096 1100              sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t),
1097 1101              KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
1098 1102              zoneid)) != NULL) {
1099 1103                  nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats;
1100 1104                  kstat_install(nfs4_callback_kstat);
1101 1105          }
1102 1106          return (ncg);
1103 1107  }
1104 1108  
1105 1109  static void
1106 1110  nfs4_discard_delegations(struct nfs4_callback_globals *ncg)
1107 1111  {
1108 1112          nfs4_server_t *sp;
1109 1113          int i, num_removed;
1110 1114  
1111 1115          /*
1112 1116           * It's OK here to just run through the registered "programs", as
1113 1117           * servers without programs won't have any delegations to handle.
1114 1118           */
1115 1119          for (i = 0; i < nfs4_num_prognums; i++) {
1116 1120                  rnode4_t *rp;
1117 1121  
1118 1122                  mutex_enter(&ncg->nfs4_cb_lock);
1119 1123                  sp = ncg->nfs4prog2server[i];
1120 1124                  mutex_exit(&ncg->nfs4_cb_lock);
1121 1125  
1122 1126                  if (nfs4_server_vlock(sp, 1) == FALSE)
1123 1127                          continue;
1124 1128                  num_removed = 0;
1125 1129                  while ((rp = list_head(&sp->s_deleg_list)) != NULL) {
1126 1130                          mutex_enter(&rp->r_statev4_lock);
1127 1131                          if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1128 1132                                  /*
1129 1133                                   * We need to take matters into our own hands,
1130 1134                                   * as nfs4delegreturn_cleanup_impl() won't
1131 1135                                   * remove this from the list.
1132 1136                                   */
1133 1137                                  list_remove(&sp->s_deleg_list, rp);
1134 1138                                  mutex_exit(&rp->r_statev4_lock);
1135 1139                                  nfs4_dec_state_ref_count_nolock(sp,
1136 1140                                      VTOMI4(RTOV4(rp)));
1137 1141                                  num_removed++;
1138 1142                                  continue;
1139 1143                          }
1140 1144                          mutex_exit(&rp->r_statev4_lock);
1141 1145                          VN_HOLD(RTOV4(rp));
1142 1146                          mutex_exit(&sp->s_lock);
1143 1147                          /*
1144 1148                           * The following will remove the node from the list.
1145 1149                           */
1146 1150                          nfs4delegreturn_cleanup_impl(rp, sp, ncg);
1147 1151                          VN_RELE(RTOV4(rp));
1148 1152                          mutex_enter(&sp->s_lock);
1149 1153                  }
1150 1154                  mutex_exit(&sp->s_lock);
1151 1155                  /* each removed list node reles a reference */
1152 1156                  while (num_removed-- > 0)
1153 1157                          nfs4_server_rele(sp);
1154 1158                  /* remove our reference for nfs4_server_vlock */
1155 1159                  nfs4_server_rele(sp);
1156 1160          }
1157 1161  }
1158 1162  
1159 1163  /* ARGSUSED */
1160 1164  static void
1161 1165  nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data)
1162 1166  {
1163 1167          struct nfs4_callback_globals *ncg = data;
1164 1168  
1165 1169          /*
1166 1170           * Clean pending delegation return list.
1167 1171           */
1168 1172          nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD);
1169 1173  
1170 1174          /*
1171 1175           * Discard all delegations.
1172 1176           */
1173 1177          nfs4_discard_delegations(ncg);
1174 1178  }
1175 1179  
1176 1180  static void
1177 1181  nfs4_callback_fini_zone(zoneid_t zoneid, void *data)
1178 1182  {
1179 1183          struct nfs4_callback_globals *ncg = data;
1180 1184          struct nfs4_cb_port *p;
1181 1185          nfs4_server_t *sp, *next;
1182 1186          nfs4_server_t freelist;
1183 1187          int i;
1184 1188  
1185 1189          kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid);
1186 1190  
1187 1191          /*
1188 1192           * Discard all delegations that may have crept in since we did the
1189 1193           * _shutdown.
1190 1194           */
1191 1195          nfs4_discard_delegations(ncg);
1192 1196          /*
1193 1197           * We're completely done with this zone and all associated
1194 1198           * nfs4_server_t's.  Any remaining nfs4_server_ts should only have one
1195 1199           * more reference outstanding -- the reference we didn't release in
1196 1200           * nfs4_renew_lease_thread().
1197 1201           *
1198 1202           * Here we need to run through the global nfs4_server_lst as we need to
1199 1203           * deal with nfs4_server_ts without programs, as they also have threads
1200 1204           * created for them, and so have outstanding references that we need to
1201 1205           * release.
1202 1206           */
1203 1207          freelist.forw = &freelist;
1204 1208          freelist.back = &freelist;
1205 1209          mutex_enter(&nfs4_server_lst_lock);
1206 1210          sp = nfs4_server_lst.forw;
1207 1211          while (sp != &nfs4_server_lst) {
1208 1212                  next = sp->forw;
1209 1213                  if (sp->zoneid == zoneid) {
1210 1214                          remque(sp);
1211 1215                          insque(sp, &freelist);
1212 1216                  }
1213 1217                  sp = next;
1214 1218          }
1215 1219          mutex_exit(&nfs4_server_lst_lock);
1216 1220  
1217 1221          sp = freelist.forw;
1218 1222          while (sp != &freelist) {
1219 1223                  next = sp->forw;
1220 1224                  nfs4_server_rele(sp);   /* free the list's reference */
1221 1225                  sp = next;
1222 1226          }
1223 1227  
1224 1228  #ifdef DEBUG
1225 1229          for (i = 0; i < nfs4_num_prognums; i++) {
1226 1230                  ASSERT(ncg->nfs4prog2server[i] == NULL);
1227 1231          }
1228 1232  #endif
1229 1233          kmem_free(ncg->nfs4prog2server, nfs4_num_prognums *
1230 1234              sizeof (struct nfs4_server *));
1231 1235  
1232 1236          mutex_enter(&ncg->nfs4_cb_lock);
1233 1237          while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) {
1234 1238                  list_remove(&ncg->nfs4_cb_ports, p);
1235 1239                  kmem_free(p, sizeof (*p));
1236 1240          }
1237 1241          list_destroy(&ncg->nfs4_cb_ports);
1238 1242          mutex_destroy(&ncg->nfs4_cb_lock);
1239 1243          list_destroy(&ncg->nfs4_dlist);
1240 1244          mutex_destroy(&ncg->nfs4_dlist_lock);
1241 1245          kmem_free(ncg, sizeof (*ncg));
1242 1246  }
1243 1247  
1244 1248  void
1245 1249  nfs4_callback_init(void)
1246 1250  {
1247 1251          int i;
1248 1252          SVC_CALLOUT *nfs4_cb_sc;
1249 1253  
1250 1254          /* initialize the callback table */
1251 1255          nfs4_cb_sc = kmem_alloc(nfs4_num_prognums *
1252 1256              sizeof (SVC_CALLOUT), KM_SLEEP);
1253 1257  
1254 1258          for (i = 0; i < nfs4_num_prognums; i++) {
1255 1259                  nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i;
1256 1260                  nfs4_cb_sc[i].sc_versmin = NFS_CB;
1257 1261                  nfs4_cb_sc[i].sc_versmax = NFS_CB;
1258 1262                  nfs4_cb_sc[i].sc_dispatch = cb_dispatch;
1259 1263          }
1260 1264  
1261 1265          nfs4_cb_sct.sct_size = nfs4_num_prognums;
1262 1266          nfs4_cb_sct.sct_free = FALSE;
1263 1267          nfs4_cb_sct.sct_sc = nfs4_cb_sc;
1264 1268  
1265 1269          /*
1266 1270           * Compute max bytes required for dyamically allocated parts
1267 1271           * of cb_getattr reply.  Only size and change are supported now.
1268 1272           * If CB_GETATTR is changed to reply with additional attrs,
1269 1273           * additional sizes must be added below.
1270 1274           *
1271 1275           * fattr4_change + fattr4_size == uint64_t + uint64_t
1272 1276           */
1273 1277          cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT;
1274 1278  
1275 1279          zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone,
1276 1280              nfs4_callback_shutdown_zone, nfs4_callback_fini_zone);
1277 1281  }
1278 1282  
1279 1283  void
1280 1284  nfs4_callback_fini(void)
1281 1285  {
1282 1286  }
1283 1287  
1284 1288  /*
1285 1289   * NB: This function can be called from the *wrong* zone (ie, the zone that
1286 1290   * 'rp' belongs to and the caller's zone may not be the same).  This can happen
1287 1291   * if the zone is going away and we get called from nfs4_async_inactive().  In
1288 1292   * this case the globals will be NULL and we won't update the counters, which
1289 1293   * doesn't matter as the zone is going away anyhow.
1290 1294   */
1291 1295  static void
1292 1296  nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np,
1293 1297      struct nfs4_callback_globals *ncg)
1294 1298  {
1295 1299          mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1296 1300          boolean_t need_rele = B_FALSE;
1297 1301  
1298 1302          /*
1299 1303           * Caller must be holding mi_recovlock in read mode
1300 1304           * to call here.  This is provided by start_op.
1301 1305           * Delegation management requires to grab s_lock
1302 1306           * first and then r_statev4_lock.
1303 1307           */
1304 1308  
1305 1309          if (np == NULL) {
1306 1310                  np = find_nfs4_server_all(mi, 1);
1307 1311                  if (np == NULL)
1308 1312                          return;
1309 1313                  need_rele = B_TRUE;
1310 1314          } else {
1311 1315                  mutex_enter(&np->s_lock);
1312 1316          }
1313 1317  
1314 1318          mutex_enter(&rp->r_statev4_lock);
1315 1319  
1316 1320          if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1317 1321                  mutex_exit(&rp->r_statev4_lock);
1318 1322                  mutex_exit(&np->s_lock);
1319 1323                  if (need_rele)
1320 1324                          nfs4_server_rele(np);
1321 1325                  return;
1322 1326          }
1323 1327  
1324 1328          /*
1325 1329           * Free the cred originally held when
1326 1330           * the delegation was granted.  Caller must
1327 1331           * hold this cred if it wants to use it after
1328 1332           * this call.
1329 1333           */
1330 1334          crfree(rp->r_deleg_cred);
1331 1335          rp->r_deleg_cred = NULL;
1332 1336          rp->r_deleg_type = OPEN_DELEGATE_NONE;
1333 1337          rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1334 1338          rp->r_deleg_needs_recall = FALSE;
1335 1339          rp->r_deleg_return_pending = FALSE;
1336 1340  
1337 1341          /*
1338 1342           * Remove the rnode from the server's list and
1339 1343           * update the ref counts.
1340 1344           */
1341 1345          list_remove(&np->s_deleg_list, rp);
1342 1346          mutex_exit(&rp->r_statev4_lock);
1343 1347          nfs4_dec_state_ref_count_nolock(np, mi);
1344 1348          mutex_exit(&np->s_lock);
1345 1349          /* removed list node removes a reference */
1346 1350          nfs4_server_rele(np);
1347 1351          if (need_rele)
1348 1352                  nfs4_server_rele(np);
1349 1353          if (ncg != NULL)
1350 1354                  ncg->nfs4_callback_stats.delegations.value.ui64--;
1351 1355  }
1352 1356  
1353 1357  void
1354 1358  nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np)
1355 1359  {
1356 1360          struct nfs4_callback_globals *ncg;
1357 1361  
1358 1362          if (np != NULL) {
1359 1363                  ncg = np->zone_globals;
1360 1364          } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) {
1361 1365                  ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1362 1366                  ASSERT(ncg != NULL);
1363 1367          } else {
1364 1368                  /*
1365 1369                   * Request coming from the wrong zone.
1366 1370                   */
1367 1371                  ASSERT(getzoneid() == GLOBAL_ZONEID);
1368 1372                  ncg = NULL;
1369 1373          }
1370 1374  
1371 1375          nfs4delegreturn_cleanup_impl(rp, np, ncg);
1372 1376  }
1373 1377  
1374 1378  static void
1375 1379  nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1376 1380      cred_t *cr, vnode_t *vp)
1377 1381  {
1378 1382          if (error != ETIMEDOUT && error != EINTR &&
1379 1383              !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1380 1384                  lost_rqstp->lr_op = 0;
1381 1385                  return;
1382 1386          }
1383 1387  
1384 1388          NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1385 1389              "nfs4close_save_lost_rqst: error %d", error));
1386 1390  
1387 1391          lost_rqstp->lr_op = OP_DELEGRETURN;
1388 1392          /*
1389 1393           * The vp is held and rele'd via the recovery code.
1390 1394           * See nfs4_save_lost_rqst.
1391 1395           */
1392 1396          lost_rqstp->lr_vp = vp;
1393 1397          lost_rqstp->lr_dvp = NULL;
1394 1398          lost_rqstp->lr_oop = NULL;
1395 1399          lost_rqstp->lr_osp = NULL;
1396 1400          lost_rqstp->lr_lop = NULL;
1397 1401          lost_rqstp->lr_cr = cr;
1398 1402          lost_rqstp->lr_flk = NULL;
1399 1403          lost_rqstp->lr_putfirst = FALSE;
1400 1404  }
1401 1405  
1402 1406  static void
1403 1407  nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep)
1404 1408  {
1405 1409          COMPOUND4args_clnt args;
1406 1410          COMPOUND4res_clnt res;
1407 1411          nfs_argop4 argops[3];
1408 1412          nfs4_ga_res_t *garp = NULL;
1409 1413          hrtime_t t;
1410 1414          int numops;
1411 1415          int doqueue = 1;
1412 1416  
1413 1417          args.ctag = TAG_DELEGRETURN;
1414 1418  
1415 1419          numops = 3;             /* PUTFH, GETATTR, DELEGRETURN */
1416 1420  
1417 1421          args.array = argops;
1418 1422          args.array_len = numops;
1419 1423  
1420 1424          argops[0].argop = OP_CPUTFH;
1421 1425          argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1422 1426  
1423 1427          argops[1].argop = OP_GETATTR;
1424 1428          argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1425 1429          argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp));
1426 1430  
1427 1431          argops[2].argop = OP_DELEGRETURN;
1428 1432          argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid =
1429 1433              rp->r_deleg_stateid;
1430 1434  
1431 1435          t = gethrtime();
1432 1436          rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep);
1433 1437  
1434 1438          if (ep->error)
1435 1439                  return;
1436 1440  
1437 1441          if (res.status == NFS4_OK) {
1438 1442                  garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
1439 1443                  nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL);
1440 1444  
1441 1445          }
1442 1446          xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1443 1447  }
1444 1448  
1445 1449  int
1446 1450  nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr,
1447 1451      struct nfs4_callback_globals *ncg)
1448 1452  {
1449 1453          vnode_t *vp = RTOV4(rp);
1450 1454          mntinfo4_t *mi = VTOMI4(vp);
1451 1455          nfs4_lost_rqst_t lost_rqst;
1452 1456          nfs4_recov_state_t recov_state;
1453 1457          bool_t needrecov = FALSE, recovonly, done = FALSE;
1454 1458          nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1455 1459  
1456 1460          ncg->nfs4_callback_stats.delegreturn.value.ui64++;
1457 1461  
1458 1462          while (!done) {
1459 1463                  e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN,
1460 1464                      &recov_state, &recovonly);
1461 1465  
1462 1466                  if (e.error) {
1463 1467                          if (flags & NFS4_DR_FORCE) {
1464 1468                                  (void) nfs_rw_enter_sig(&mi->mi_recovlock,
1465 1469                                      RW_READER, 0);
1466 1470                                  nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1467 1471                                  nfs_rw_exit(&mi->mi_recovlock);
1468 1472                          }
1469 1473                          break;
1470 1474                  }
1471 1475  
1472 1476                  /*
1473 1477                   * Check to see if the delegation has already been
1474 1478                   * returned by the recovery thread.   The state of
1475 1479                   * the delegation cannot change at this point due
1476 1480                   * to start_fop and the r_deleg_recall_lock.
1477 1481                   */
1478 1482                  if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1479 1483                          e.error = 0;
1480 1484                          nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1481 1485                          break;
1482 1486                  }
1483 1487  
1484 1488                  if (recovonly) {
1485 1489                          /*
1486 1490                           * Delegation will be returned via the
1487 1491                           * recovery framework.  Build a lost request
1488 1492                           * structure, start recovery and get out.
1489 1493                           */
1490 1494                          nfs4_error_init(&e, EINTR);
1491 1495                          nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1492 1496                              cr, vp);
1493 1497                          (void) nfs4_start_recovery(&e, mi, vp,
1494 1498                              NULL, &rp->r_deleg_stateid,
1495 1499                              lost_rqst.lr_op == OP_DELEGRETURN ?
1496 1500                              &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1497 1501                              NULL, NULL);
1498 1502                          nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1499 1503                          break;
1500 1504                  }
1501 1505  
1502 1506                  nfs4delegreturn_otw(rp, cr, &e);
1503 1507  
1504 1508                  /*
1505 1509                   * Ignore some errors on delegreturn; no point in marking
1506 1510                   * the file dead on a state destroying operation.
1507 1511                   */
1508 1512                  if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) ||
1509 1513                      e.stat == NFS4ERR_BADHANDLE ||
1510 1514                      e.stat == NFS4ERR_STALE))
1511 1515                          needrecov = FALSE;
1512 1516                  else
1513 1517                          needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1514 1518  
1515 1519                  if (needrecov) {
1516 1520                          nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1517 1521                              cr, vp);
1518 1522                          (void) nfs4_start_recovery(&e, mi, vp,
1519 1523                              NULL, &rp->r_deleg_stateid,
1520 1524                              lost_rqst.lr_op == OP_DELEGRETURN ?
1521 1525                              &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1522 1526                              NULL, NULL);
1523 1527                  } else {
1524 1528                          nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1525 1529                          done = TRUE;
1526 1530                  }
1527 1531  
1528 1532                  nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1529 1533          }
1530 1534          return (e.error);
1531 1535  }
1532 1536  
1533 1537  /*
1534 1538   * nfs4_resend_delegreturn - used to drive the delegreturn
1535 1539   * operation via the recovery thread.
1536 1540   */
1537 1541  void
1538 1542  nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep,
1539 1543      nfs4_server_t *np)
1540 1544  {
1541 1545          rnode4_t *rp = VTOR4(lorp->lr_vp);
1542 1546  
1543 1547          /* If the file failed recovery, just quit. */
1544 1548          mutex_enter(&rp->r_statelock);
1545 1549          if (rp->r_flags & R4RECOVERR) {
1546 1550                  ep->error = EIO;
1547 1551          }
1548 1552          mutex_exit(&rp->r_statelock);
1549 1553  
1550 1554          if (!ep->error)
1551 1555                  nfs4delegreturn_otw(rp, lorp->lr_cr, ep);
1552 1556  
1553 1557          /*
1554 1558           * If recovery is now needed, then return the error
1555 1559           * and status and let the recovery thread handle it,
1556 1560           * including re-driving another delegreturn.  Otherwise,
1557 1561           * just give up and clean up the delegation.
1558 1562           */
1559 1563          if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp))
1560 1564                  return;
1561 1565  
1562 1566          if (rp->r_deleg_type != OPEN_DELEGATE_NONE)
1563 1567                  nfs4delegreturn_cleanup(rp, np);
1564 1568  
1565 1569          nfs4_error_zinit(ep);
1566 1570  }
1567 1571  
1568 1572  /*
1569 1573   * nfs4delegreturn - general function to return a delegation.
1570 1574   *
1571 1575   * NFS4_DR_FORCE - return the delegation even if start_op fails
1572 1576   * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE
1573 1577   * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1574 1578   * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1575 1579   * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1576 1580   * NFS4_DR_REOPEN - do file reopens, if applicable
1577 1581   */
1578 1582  static int
1579 1583  nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg)
1580 1584  {
1581 1585          int error = 0;
1582 1586          cred_t *cr = NULL;
1583 1587          vnode_t *vp;
1584 1588          bool_t needrecov = FALSE;
1585 1589          bool_t rw_entered = FALSE;
1586 1590          bool_t do_reopen;
1587 1591  
1588 1592          vp = RTOV4(rp);
1589 1593  
1590 1594          /*
1591 1595           * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1592 1596           * discard without doing an otw DELEGRETURN.  This may only be used
1593 1597           * by the recovery thread because it bypasses the synchronization
1594 1598           * with r_deleg_recall_lock and mi->mi_recovlock.
1595 1599           */
1596 1600          if (flags == NFS4_DR_DISCARD) {
1597 1601                  nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1598 1602                  return (0);
1599 1603          }
1600 1604  
1601 1605          if (flags & NFS4_DR_DID_OP) {
1602 1606                  /*
1603 1607                   * Caller had already done start_op, which means the
1604 1608                   * r_deleg_recall_lock is already held in READ mode
1605 1609                   * so we cannot take it in write mode.  Return the
1606 1610                   * delegation asynchronously.
1607 1611                   *
1608 1612                   * Remove the NFS4_DR_DID_OP flag so we don't
1609 1613                   * get stuck looping through here.
1610 1614                   */
1611 1615                  VN_HOLD(vp);
1612 1616                  nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE);
1613 1617                  return (0);
1614 1618          }
1615 1619  
1616 1620          /*
1617 1621           * Verify we still have a delegation and crhold the credential.
1618 1622           */
1619 1623          mutex_enter(&rp->r_statev4_lock);
1620 1624          if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1621 1625                  mutex_exit(&rp->r_statev4_lock);
1622 1626                  goto out;
1623 1627          }
1624 1628          cr = rp->r_deleg_cred;
1625 1629          ASSERT(cr != NULL);
1626 1630          crhold(cr);
1627 1631          mutex_exit(&rp->r_statev4_lock);
1628 1632  
1629 1633          /*
1630 1634           * Push the modified data back to the server synchronously
1631 1635           * before doing DELEGRETURN.
1632 1636           */
1633 1637          if (flags & NFS4_DR_PUSH)
1634 1638                  (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1635 1639  
1636 1640          /*
1637 1641           * Take r_deleg_recall_lock in WRITE mode, this will prevent
1638 1642           * nfs4_is_otw_open_necessary from trying to use the delegation
1639 1643           * while the DELEGRETURN is in progress.
1640 1644           */
1641 1645          (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE);
1642 1646  
1643 1647          rw_entered = TRUE;
1644 1648  
1645 1649          if (rp->r_deleg_type == OPEN_DELEGATE_NONE)
1646 1650                  goto out;
1647 1651  
1648 1652          if (flags & NFS4_DR_REOPEN) {
1649 1653                  /*
1650 1654                   * If R4RECOVERRP is already set, then skip re-opening
1651 1655                   * the delegation open streams and go straight to doing
1652 1656                   * delegreturn.  (XXX if the file has failed recovery, then the
1653 1657                   * delegreturn attempt is likely to be futile.)
1654 1658                   */
1655 1659                  mutex_enter(&rp->r_statelock);
1656 1660                  do_reopen = !(rp->r_flags & R4RECOVERRP);
1657 1661                  mutex_exit(&rp->r_statelock);
1658 1662  
1659 1663                  if (do_reopen) {
1660 1664                          error = deleg_reopen(vp, &needrecov, ncg, flags);
1661 1665                          if (error != 0) {
1662 1666                                  if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL))
1663 1667                                      == 0)
1664 1668                                          goto out;
1665 1669                          } else if (needrecov) {
1666 1670                                  if ((flags & NFS4_DR_FORCE) == 0)
1667 1671                                          goto out;
1668 1672                          }
1669 1673                  }
1670 1674          }
1671 1675  
1672 1676          if (flags & NFS4_DR_DISCARD) {
1673 1677                  mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1674 1678  
1675 1679                  mutex_enter(&rp->r_statelock);
1676 1680                  /*
1677 1681                   * deleg_return_pending is cleared inside of delegation_accept
1678 1682                   * when a delegation is accepted.  if this flag has been
1679 1683                   * cleared, then a new delegation has overwritten the one we
1680 1684                   * were about to throw away.
1681 1685                   */
1682 1686                  if (!rp->r_deleg_return_pending) {
1683 1687                          mutex_exit(&rp->r_statelock);
1684 1688                          goto out;
1685 1689                  }
1686 1690                  mutex_exit(&rp->r_statelock);
1687 1691                  (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
1688 1692                  nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1689 1693                  nfs_rw_exit(&mi->mi_recovlock);
1690 1694          } else {
1691 1695                  error = nfs4_do_delegreturn(rp, flags, cr, ncg);
1692 1696          }
1693 1697  
1694 1698  out:
1695 1699          if (cr)
1696 1700                  crfree(cr);
1697 1701          if (rw_entered)
1698 1702                  nfs_rw_exit(&rp->r_deleg_recall_lock);
1699 1703          return (error);
1700 1704  }
1701 1705  
1702 1706  int
1703 1707  nfs4delegreturn(rnode4_t *rp, int flags)
1704 1708  {
1705 1709          struct nfs4_callback_globals *ncg;
1706 1710  
1707 1711          ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1708 1712          ASSERT(ncg != NULL);
1709 1713  
1710 1714          return (nfs4delegreturn_impl(rp, flags, ncg));
1711 1715  }
1712 1716  
1713 1717  void
1714 1718  nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc)
1715 1719  {
1716 1720          struct cb_recall_pass *pp;
1717 1721  
1718 1722          pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
1719 1723          pp->rp = rp;
1720 1724          pp->flags = flags;
1721 1725          pp->truncate = trunc;
1722 1726  
1723 1727          /*
1724 1728           * Fire up a thread to do the actual delegreturn
1725 1729           * Caller must guarantee that the rnode doesn't
1726 1730           * vanish (by calling VN_HOLD).
1727 1731           */
1728 1732  
1729 1733          (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
1730 1734              minclsyspri);
1731 1735  }
1732 1736  
1733 1737  static void
1734 1738  delegreturn_all_thread(rpcprog_t *pp)
1735 1739  {
1736 1740          nfs4_server_t *np;
1737 1741          bool_t found = FALSE;
1738 1742          rpcprog_t prog;
1739 1743          rnode4_t *rp;
1740 1744          vnode_t *vp;
1741 1745          zoneid_t zoneid = getzoneid();
1742 1746          struct nfs4_callback_globals *ncg;
1743 1747  
1744 1748          NFS4_DEBUG(nfs4_drat_debug,
1745 1749              (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp));
1746 1750  
1747 1751          prog = *pp;
1748 1752          kmem_free(pp, sizeof (*pp));
1749 1753          pp = NULL;
1750 1754  
1751 1755          mutex_enter(&nfs4_server_lst_lock);
1752 1756          for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
1753 1757                  if (np->zoneid == zoneid && np->s_program == prog) {
1754 1758                          mutex_enter(&np->s_lock);
1755 1759                          found = TRUE;
1756 1760                          break;
1757 1761                  }
1758 1762          }
1759 1763          mutex_exit(&nfs4_server_lst_lock);
1760 1764  
1761 1765          /*
1762 1766           * It's possible that the nfs4_server which was using this
1763 1767           * program number has vanished since this thread is async.
1764 1768           * If so, just return.  Your work here is finished, my friend.
1765 1769           */
1766 1770          if (!found)
1767 1771                  goto out;
1768 1772  
1769 1773          ncg = np->zone_globals;
1770 1774          while ((rp = list_head(&np->s_deleg_list)) != NULL) {
1771 1775                  vp = RTOV4(rp);
1772 1776                  VN_HOLD(vp);
1773 1777                  mutex_exit(&np->s_lock);
1774 1778                  (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN,
1775 1779                      ncg);
1776 1780                  VN_RELE(vp);
1777 1781  
1778 1782                  /* retake the s_lock for next trip through the loop */
1779 1783                  mutex_enter(&np->s_lock);
1780 1784          }
1781 1785          mutex_exit(&np->s_lock);
1782 1786  out:
1783 1787          NFS4_DEBUG(nfs4_drat_debug,
1784 1788              (CE_NOTE, "delereturn_all_thread: complete\n"));
1785 1789          zthread_exit();
1786 1790  }
1787 1791  
1788 1792  void
1789 1793  nfs4_delegreturn_all(nfs4_server_t *sp)
1790 1794  {
1791 1795          rpcprog_t pro, *pp;
1792 1796  
1793 1797          mutex_enter(&sp->s_lock);
1794 1798  
1795 1799          /* Check to see if the delegation list is empty */
1796 1800  
1797 1801          if (list_head(&sp->s_deleg_list) == NULL) {
1798 1802                  mutex_exit(&sp->s_lock);
1799 1803                  return;
1800 1804          }
1801 1805          /*
1802 1806           * Grab the program number; the async thread will use this
1803 1807           * to find the nfs4_server.
1804 1808           */
1805 1809          pro = sp->s_program;
1806 1810          mutex_exit(&sp->s_lock);
1807 1811          pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP);
1808 1812          *pp = pro;
1809 1813          (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0,
1810 1814              minclsyspri);
1811 1815  }
1812 1816  
1813 1817  
1814 1818  /*
1815 1819   * Discard any delegations
1816 1820   *
1817 1821   * Iterate over the servers s_deleg_list and
1818 1822   * for matching mount-point rnodes discard
1819 1823   * the delegation.
1820 1824   */
1821 1825  void
1822 1826  nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp)
1823 1827  {
1824 1828          rnode4_t *rp, *next;
1825 1829          mntinfo4_t *r_mi;
1826 1830          struct nfs4_callback_globals *ncg;
1827 1831  
1828 1832          ASSERT(mutex_owned(&sp->s_lock));
1829 1833          ncg = sp->zone_globals;
1830 1834  
1831 1835          for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) {
1832 1836                  r_mi = VTOMI4(RTOV4(rp));
1833 1837                  next = list_next(&sp->s_deleg_list, rp);
1834 1838  
1835 1839                  if (r_mi != mi) {
1836 1840                          /*
1837 1841                           * Skip if this rnode is in not on the
1838 1842                           * same mount-point
1839 1843                           */
1840 1844                          continue;
1841 1845                  }
1842 1846  
1843 1847                  ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ);
1844 1848  
1845 1849  #ifdef DEBUG
1846 1850                  if (nfs4_client_recov_debug) {
1847 1851                          zprintf(getzoneid(),
1848 1852                              "nfs4_deleg_discard: matched rnode %p "
1849 1853                          "-- discarding delegation\n", (void *)rp);
1850 1854                  }
1851 1855  #endif
1852 1856                  mutex_enter(&rp->r_statev4_lock);
1853 1857                  /*
1854 1858                   * Free the cred originally held when the delegation
1855 1859                   * was granted. Also need to decrement the refcnt
1856 1860                   * on this server for each delegation we discard
1857 1861                   */
1858 1862                  if (rp->r_deleg_cred)
1859 1863                          crfree(rp->r_deleg_cred);
1860 1864                  rp->r_deleg_cred = NULL;
1861 1865                  rp->r_deleg_type = OPEN_DELEGATE_NONE;
1862 1866                  rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1863 1867                  rp->r_deleg_needs_recall = FALSE;
1864 1868                  ASSERT(sp->s_refcnt > 1);
1865 1869                  sp->s_refcnt--;
1866 1870                  list_remove(&sp->s_deleg_list, rp);
1867 1871                  mutex_exit(&rp->r_statev4_lock);
1868 1872                  nfs4_dec_state_ref_count_nolock(sp, mi);
1869 1873                  ncg->nfs4_callback_stats.delegations.value.ui64--;
1870 1874          }
1871 1875  }
1872 1876  
1873 1877  /*
1874 1878   * Reopen any open streams that were covered by the given file's
1875 1879   * delegation.
1876 1880   * Returns zero or an errno value.  If there was no error, *recovp
1877 1881   * indicates whether recovery was initiated.
1878 1882   */
1879 1883  
1880 1884  static int
1881 1885  deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg,
1882 1886      int flags)
1883 1887  {
1884 1888          nfs4_open_stream_t *osp;
1885 1889          nfs4_recov_state_t recov_state;
1886 1890          bool_t needrecov = FALSE;
1887 1891          mntinfo4_t *mi;
1888 1892          rnode4_t *rp;
1889 1893          nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1890 1894          int claimnull;
1891 1895  
1892 1896          mi = VTOMI4(vp);
1893 1897          rp = VTOR4(vp);
1894 1898  
1895 1899          recov_state.rs_flags = 0;
1896 1900          recov_state.rs_num_retry_despite_err = 0;
1897 1901  
1898 1902  retry:
1899 1903          if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) {
1900 1904                  return (e.error);
1901 1905          }
1902 1906  
1903 1907          /*
1904 1908           * if we mean to discard the delegation, it must be BAD, so don't
1905 1909           * use it when doing the reopen or it will fail too.
1906 1910           */
1907 1911          claimnull = (flags & NFS4_DR_DISCARD);
1908 1912          /*
1909 1913           * Loop through the open streams for this rnode to find
1910 1914           * all of the ones created using the delegation state ID.
1911 1915           * Each of these needs to be re-opened.
1912 1916           */
1913 1917  
1914 1918          while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) {
1915 1919  
1916 1920                  if (claimnull) {
1917 1921                          nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE);
1918 1922                  } else {
1919 1923                          ncg->nfs4_callback_stats.claim_cur.value.ui64++;
1920 1924  
1921 1925                          nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE,
1922 1926                              FALSE);
1923 1927                          if (e.error == 0 && e.stat == NFS4_OK)
1924 1928                                  ncg->nfs4_callback_stats.
1925 1929                                      claim_cur_ok.value.ui64++;
1926 1930                  }
1927 1931  
1928 1932                  if (e.error == EAGAIN) {
1929 1933                          open_stream_rele(osp, rp);
1930 1934                          nfs4_end_op(mi, vp, NULL, &recov_state, TRUE);
1931 1935                          goto retry;
1932 1936                  }
1933 1937  
1934 1938                  /*
1935 1939                   * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1936 1940                   * recovery has already been started inside of nfs4_reopen.
1937 1941                   */
1938 1942                  if (e.error == EINTR || e.error == ETIMEDOUT ||
1939 1943                      NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) {
1940 1944                          open_stream_rele(osp, rp);
1941 1945                          break;
1942 1946                  }
1943 1947  
1944 1948                  needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1945 1949  
1946 1950                  if (e.error != 0 && !needrecov) {
1947 1951                          /*
1948 1952                           * Recovery is not possible, but don't give up yet;
1949 1953                           * we'd still like to do delegreturn after
1950 1954                           * reopening as many streams as possible.
1951 1955                           * Continue processing the open streams.
1952 1956                           */
1953 1957  
1954 1958                          ncg->nfs4_callback_stats.recall_failed.value.ui64++;
1955 1959  
1956 1960                  } else if (needrecov) {
1957 1961                          /*
1958 1962                           * Start recovery and bail out.  The recovery
1959 1963                           * thread will take it from here.
1960 1964                           */
1961 1965                          (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL,
1962 1966                              NULL, OP_OPEN, NULL, NULL, NULL);
1963 1967                          open_stream_rele(osp, rp);
1964 1968                          *recovp = TRUE;
1965 1969                          break;
1966 1970                  }
1967 1971  
1968 1972                  open_stream_rele(osp, rp);
1969 1973          }
1970 1974  
1971 1975          nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1972 1976  
1973 1977          return (e.error);
1974 1978  }
1975 1979  
1976 1980  /*
1977 1981   * get_next_deleg_stream - returns the next open stream which
1978 1982   * represents a delegation for this rnode.  In order to assure
1979 1983   * forward progress, the caller must guarantee that each open
1980 1984   * stream returned is changed so that a future call won't return
1981 1985   * it again.
1982 1986   *
1983 1987   * There are several ways for the open stream to change.  If the open
1984 1988   * stream is !os_delegation, then we aren't interested in it.  Also, if
1985 1989   * either os_failed_reopen or !os_valid, then don't return the osp.
1986 1990   *
1987 1991   * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
1988 1992   * the osp if it is an os_delegation open stream.  Also, if the rnode still
1989 1993   * has r_deleg_return_pending, then return the os_delegation osp.  Lastly,
1990 1994   * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
1991 1995   * then return the osp.
1992 1996   *
1993 1997   * We have already taken the 'r_deleg_recall_lock' as WRITER, which
1994 1998   * prevents new OPENs from going OTW (as start_fop takes this
1995 1999   * lock in READ mode); thus, no new open streams can be created
1996 2000   * (which inherently means no new delegation open streams are
1997 2001   * being created).
1998 2002   */
1999 2003  
2000 2004  static nfs4_open_stream_t *
2001 2005  get_next_deleg_stream(rnode4_t *rp, int claimnull)
2002 2006  {
2003 2007          nfs4_open_stream_t      *osp;
2004 2008  
2005 2009          ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER));
2006 2010  
2007 2011          /*
2008 2012           * Search through the list of open streams looking for
2009 2013           * one that was created while holding the delegation.
2010 2014           */
2011 2015          mutex_enter(&rp->r_os_lock);
2012 2016          for (osp = list_head(&rp->r_open_streams); osp != NULL;
2013 2017              osp = list_next(&rp->r_open_streams, osp)) {
2014 2018                  mutex_enter(&osp->os_sync_lock);
2015 2019                  if (!osp->os_delegation || osp->os_failed_reopen ||
2016 2020                      !osp->os_valid) {
2017 2021                          mutex_exit(&osp->os_sync_lock);
2018 2022                          continue;
2019 2023                  }
2020 2024                  if (!claimnull || rp->r_deleg_return_pending ||
2021 2025                      !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) {
2022 2026                          osp->os_ref_count++;
2023 2027                          mutex_exit(&osp->os_sync_lock);
2024 2028                          mutex_exit(&rp->r_os_lock);
2025 2029                          return (osp);
2026 2030                  }
2027 2031                  mutex_exit(&osp->os_sync_lock);
2028 2032          }
2029 2033          mutex_exit(&rp->r_os_lock);
2030 2034  
2031 2035          return (NULL);
2032 2036  }
2033 2037  
2034 2038  static void
2035 2039  nfs4delegreturn_thread(struct cb_recall_pass *args)
2036 2040  {
2037 2041          rnode4_t *rp;
2038 2042          vnode_t *vp;
2039 2043          cred_t *cr;
2040 2044          int dtype, error, flags;
2041 2045          bool_t rdirty, rip;
2042 2046          kmutex_t cpr_lock;
2043 2047          callb_cpr_t cpr_info;
2044 2048          struct nfs4_callback_globals *ncg;
2045 2049  
2046 2050          ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2047 2051          ASSERT(ncg != NULL);
2048 2052  
2049 2053          mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
2050 2054  
2051 2055          CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
2052 2056              "nfsv4delegRtn");
2053 2057  
2054 2058          rp = args->rp;
2055 2059          vp = RTOV4(rp);
2056 2060  
2057 2061          mutex_enter(&rp->r_statev4_lock);
2058 2062          if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2059 2063                  mutex_exit(&rp->r_statev4_lock);
2060 2064                  goto out;
2061 2065          }
2062 2066          mutex_exit(&rp->r_statev4_lock);
2063 2067  
2064 2068          /*
2065 2069           * Take the read-write lock in read mode to prevent other
2066 2070           * threads from modifying the data during the recall.  This
2067 2071           * doesn't affect mmappers.
2068 2072           */
2069 2073          (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
2070 2074  
2071 2075          /* Proceed with delegreturn */
2072 2076  
2073 2077          mutex_enter(&rp->r_statev4_lock);
2074 2078          if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2075 2079                  mutex_exit(&rp->r_statev4_lock);
2076 2080                  nfs_rw_exit(&rp->r_rwlock);
2077 2081                  goto out;
2078 2082          }
2079 2083          dtype = rp->r_deleg_type;
2080 2084          cr = rp->r_deleg_cred;
2081 2085          ASSERT(cr != NULL);
2082 2086          crhold(cr);
2083 2087          mutex_exit(&rp->r_statev4_lock);
2084 2088  
2085 2089          flags = args->flags;
2086 2090  
2087 2091          /*
2088 2092           * If the file is being truncated at the server, then throw
2089 2093           * away all of the pages, it doesn't matter what flavor of
2090 2094           * delegation we have.
2091 2095           */
2092 2096  
2093 2097          if (args->truncate) {
2094 2098                  ncg->nfs4_callback_stats.recall_trunc.value.ui64++;
2095 2099                  nfs4_invalidate_pages(vp, 0, cr);
2096 2100          } else if (dtype == OPEN_DELEGATE_WRITE) {
2097 2101  
2098 2102                  mutex_enter(&rp->r_statelock);
2099 2103                  rdirty = rp->r_flags & R4DIRTY;
2100 2104                  mutex_exit(&rp->r_statelock);
2101 2105  
2102 2106                  if (rdirty) {
2103 2107                          error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
2104 2108  
2105 2109                          if (error)
2106 2110                                  CB_WARN1("nfs4delegreturn_thread:"
2107 2111                                  " VOP_PUTPAGE: %d\n", error);
2108 2112                  }
2109 2113                  /* turn off NFS4_DR_PUSH because we just did that above. */
2110 2114                  flags &= ~NFS4_DR_PUSH;
2111 2115          }
2112 2116  
2113 2117          mutex_enter(&rp->r_statelock);
2114 2118          rip =  rp->r_flags & R4RECOVERRP;
2115 2119          mutex_exit(&rp->r_statelock);
2116 2120  
2117 2121          /* If a failed recovery is indicated, discard the pages */
2118 2122  
2119 2123          if (rip) {
2120 2124  
2121 2125                  error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL);
2122 2126  
2123 2127                  if (error)
2124 2128                          CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n",
2125 2129                              error);
2126 2130          }
2127 2131  
2128 2132          /*
2129 2133           * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2130 2134           * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2131 2135           */
2132 2136          flags &= ~NFS4_DR_DID_OP;
2133 2137  
2134 2138          (void) nfs4delegreturn_impl(rp, flags, ncg);
2135 2139  
2136 2140          nfs_rw_exit(&rp->r_rwlock);
2137 2141          crfree(cr);
2138 2142  out:
2139 2143          kmem_free(args, sizeof (struct cb_recall_pass));
2140 2144          VN_RELE(vp);
2141 2145          mutex_enter(&cpr_lock);
2142 2146          CALLB_CPR_EXIT(&cpr_info);
2143 2147          mutex_destroy(&cpr_lock);
2144 2148          zthread_exit();
2145 2149  }
2146 2150  
2147 2151  /*
2148 2152   * This function has one assumption that the caller of this function is
2149 2153   * either doing recovery (therefore cannot call nfs4_start_op) or has
2150 2154   * already called nfs4_start_op().
2151 2155   */
2152 2156  void
2153 2157  nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res,
2154 2158      nfs4_ga_res_t *garp, cred_t *cr)
2155 2159  {
2156 2160          open_read_delegation4 *orp;
2157 2161          open_write_delegation4 *owp;
2158 2162          nfs4_server_t *np;
2159 2163          bool_t already = FALSE;
2160 2164          bool_t recall = FALSE;
2161 2165          bool_t valid_garp = TRUE;
2162 2166          bool_t delegation_granted = FALSE;
2163 2167          bool_t dr_needed = FALSE;
2164 2168          bool_t recov;
2165 2169          int dr_flags = 0;
2166 2170          long mapcnt;
2167 2171          uint_t rflag;
2168 2172          mntinfo4_t *mi;
2169 2173          struct nfs4_callback_globals *ncg;
2170 2174          open_delegation_type4 odt;
2171 2175  
2172 2176          ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2173 2177          ASSERT(ncg != NULL);
2174 2178  
2175 2179          mi = VTOMI4(RTOV4(rp));
2176 2180  
2177 2181          /*
2178 2182           * Accept a delegation granted to the client via an OPEN.
2179 2183           * Set the delegation fields in the rnode and insert the
2180 2184           * rnode onto the list anchored in the nfs4_server_t.  The
2181 2185           * proper locking order requires the nfs4_server_t first,
2182 2186           * even though it may not be needed in all cases.
2183 2187           *
2184 2188           * NB: find_nfs4_server returns with s_lock held.
2185 2189           */
2186 2190  
2187 2191          if ((np = find_nfs4_server(mi)) == NULL)
2188 2192                  return;
2189 2193  
2190 2194          /* grab the statelock too, for examining r_mapcnt */
2191 2195          mutex_enter(&rp->r_statelock);
2192 2196          mutex_enter(&rp->r_statev4_lock);
2193 2197  
2194 2198          if (rp->r_deleg_type == OPEN_DELEGATE_READ ||
2195 2199              rp->r_deleg_type == OPEN_DELEGATE_WRITE)
2196 2200                  already = TRUE;
2197 2201  
2198 2202          odt = res->delegation.delegation_type;
2199 2203  
2200 2204          if (odt == OPEN_DELEGATE_READ) {
2201 2205  
2202 2206                  rp->r_deleg_type = res->delegation.delegation_type;
2203 2207                  orp = &res->delegation.open_delegation4_u.read;
2204 2208                  rp->r_deleg_stateid = orp->stateid;
2205 2209                  rp->r_deleg_perms = orp->permissions;
2206 2210                  if (claim == CLAIM_PREVIOUS)
2207 2211                          if ((recall = orp->recall) != 0)
2208 2212                                  dr_needed = TRUE;
2209 2213  
2210 2214                  delegation_granted = TRUE;
2211 2215  
2212 2216                  ncg->nfs4_callback_stats.delegations.value.ui64++;
2213 2217                  ncg->nfs4_callback_stats.delegaccept_r.value.ui64++;
2214 2218  
2215 2219          } else if (odt == OPEN_DELEGATE_WRITE) {
2216 2220  
2217 2221                  rp->r_deleg_type = res->delegation.delegation_type;
2218 2222                  owp = &res->delegation.open_delegation4_u.write;
2219 2223                  rp->r_deleg_stateid = owp->stateid;
2220 2224                  rp->r_deleg_perms = owp->permissions;
2221 2225                  rp->r_deleg_limit = owp->space_limit;
2222 2226                  if (claim == CLAIM_PREVIOUS)
2223 2227                          if ((recall = owp->recall) != 0)
2224 2228                                  dr_needed = TRUE;
2225 2229  
2226 2230                  delegation_granted = TRUE;
2227 2231  
2228 2232                  if (garp == NULL || !garp->n4g_change_valid) {
2229 2233                          valid_garp = FALSE;
2230 2234                          rp->r_deleg_change = 0;
2231 2235                          rp->r_deleg_change_grant = 0;
2232 2236                  } else {
2233 2237                          rp->r_deleg_change = garp->n4g_change;
2234 2238                          rp->r_deleg_change_grant = garp->n4g_change;
2235 2239                  }
2236 2240                  mapcnt = rp->r_mapcnt;
2237 2241                  rflag = rp->r_flags;
2238 2242  
2239 2243                  /*
2240 2244                   * Update the delegation change attribute if
2241 2245                   * there are mappers for the file is dirty.  This
2242 2246                   * might be the case during recovery after server
2243 2247                   * reboot.
2244 2248                   */
2245 2249                  if (mapcnt > 0 || rflag & R4DIRTY)
2246 2250                          rp->r_deleg_change++;
2247 2251  
2248 2252                  NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2249 2253                      "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2250 2254                      (int)(rp->r_deleg_change >> 32)));
2251 2255                  NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2252 2256                      "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2253 2257                      (int)(rp->r_deleg_change_grant >> 32)));
2254 2258  
2255 2259  
2256 2260                  ncg->nfs4_callback_stats.delegations.value.ui64++;
2257 2261                  ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++;
2258 2262          } else if (already) {
2259 2263                  /*
2260 2264                   * No delegation granted.  If the rnode currently has
2261 2265                   * has one, then consider it tainted and return it.
2262 2266                   */
2263 2267                  dr_needed = TRUE;
2264 2268          }
2265 2269  
2266 2270          if (delegation_granted) {
2267 2271                  /* Add the rnode to the list. */
2268 2272                  if (!already) {
2269 2273                          crhold(cr);
2270 2274                          rp->r_deleg_cred = cr;
2271 2275  
2272 2276                          ASSERT(mutex_owned(&np->s_lock));
2273 2277                          list_insert_head(&np->s_deleg_list, rp);
2274 2278                          /* added list node gets a reference */
2275 2279                          np->s_refcnt++;
2276 2280                          nfs4_inc_state_ref_count_nolock(np, mi);
2277 2281                  }
2278 2282                  rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
2279 2283          }
2280 2284  
2281 2285          /*
2282 2286           * We've now safely accepted the delegation, if any.  Drop the
2283 2287           * locks and figure out what post-processing is needed.  We'd
2284 2288           * like to retain r_statev4_lock, but nfs4_server_rele takes
2285 2289           * s_lock which would be a lock ordering violation.
2286 2290           */
2287 2291          mutex_exit(&rp->r_statev4_lock);
2288 2292          mutex_exit(&rp->r_statelock);
2289 2293          mutex_exit(&np->s_lock);
2290 2294          nfs4_server_rele(np);
2291 2295  
2292 2296          /*
2293 2297           * Check to see if we are in recovery.  Remember that
2294 2298           * this function is protected by start_op, so a recovery
2295 2299           * cannot begin until we are out of here.
2296 2300           */
2297 2301          mutex_enter(&mi->mi_lock);
2298 2302          recov = mi->mi_recovflags & MI4_RECOV_ACTIV;
2299 2303          mutex_exit(&mi->mi_lock);
2300 2304  
2301 2305          mutex_enter(&rp->r_statev4_lock);
2302 2306  
2303 2307          if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp)
2304 2308                  dr_needed = TRUE;
2305 2309  
2306 2310          if (dr_needed && rp->r_deleg_return_pending == FALSE) {
2307 2311                  if (recov) {
2308 2312                          /*
2309 2313                           * We cannot call delegreturn from inside
2310 2314                           * of recovery or VOP_PUTPAGE will hang
2311 2315                           * due to nfs4_start_fop call in
2312 2316                           * nfs4write.  Use dlistadd to add the
2313 2317                           * rnode to the list of rnodes needing
2314 2318                           * cleaning.  We do not need to do reopen
2315 2319                           * here because recov_openfiles will do it.
2316 2320                           * In the non-recall case, just discard the
2317 2321                           * delegation as it is no longer valid.
2318 2322                           */
2319 2323                          if (recall)
2320 2324                                  dr_flags = NFS4_DR_PUSH;
2321 2325                          else
2322 2326                                  dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD;
2323 2327  
2324 2328                          nfs4_dlistadd(rp, ncg, dr_flags);
2325 2329                          dr_flags = 0;
2326 2330                  } else {
2327 2331                          /*
2328 2332                           * Push the modified data back to the server,
2329 2333                           * reopen any delegation open streams, and return
2330 2334                           * the delegation.  Drop the statev4_lock first!
2331 2335                           */
2332 2336                          dr_flags =  NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN;
2333 2337                  }
2334 2338          }
2335 2339          mutex_exit(&rp->r_statev4_lock);
2336 2340          if (dr_flags)
2337 2341                  (void) nfs4delegreturn_impl(rp, dr_flags, ncg);
2338 2342  }
2339 2343  
2340 2344  /*
2341 2345   * nfs4delegabandon - Abandon the delegation on an rnode4.  This code
2342 2346   * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2343 2347   * or BADSEQID and the recovery code is unable to recover.  Push any
2344 2348   * dirty data back to the server and return the delegation (if any).
2345 2349   */
2346 2350  
2347 2351  void
2348 2352  nfs4delegabandon(rnode4_t *rp)
2349 2353  {
2350 2354          vnode_t *vp;
2351 2355          struct cb_recall_pass *pp;
2352 2356          open_delegation_type4 dt;
2353 2357  
2354 2358          mutex_enter(&rp->r_statev4_lock);
2355 2359          dt = rp->r_deleg_type;
2356 2360          mutex_exit(&rp->r_statev4_lock);
2357 2361  
2358 2362          if (dt == OPEN_DELEGATE_NONE)
2359 2363                  return;
2360 2364  
2361 2365          vp = RTOV4(rp);
2362 2366          VN_HOLD(vp);
2363 2367  
2364 2368          pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
2365 2369          pp->rp = rp;
2366 2370          /*
2367 2371           * Recovery on the file has failed and we want to return
2368 2372           * the delegation.  We don't want to reopen files and
2369 2373           * nfs4delegreturn_thread() figures out what to do about
2370 2374           * the data.  The only thing to do is attempt to return
2371 2375           * the delegation.
2372 2376           */
2373 2377          pp->flags = 0;
2374 2378          pp->truncate = FALSE;
2375 2379  
2376 2380          /*
2377 2381           * Fire up a thread to do the delegreturn; this is
2378 2382           * necessary because we could be inside a GETPAGE or
2379 2383           * PUTPAGE and we cannot do another one.
2380 2384           */
2381 2385  
2382 2386          (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
2383 2387              minclsyspri);
2384 2388  }
2385 2389  
2386 2390  static int
2387 2391  wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp,
2388 2392      int flg)
2389 2393  {
2390 2394          rnode4_t *rp;
2391 2395          int error = 0;
2392 2396  
2393 2397  #ifdef lint
2394 2398          op = op;
2395 2399  #endif
2396 2400  
2397 2401          if (vp && vp->v_type == VREG) {
2398 2402                  rp = VTOR4(vp);
2399 2403  
2400 2404                  /*
2401 2405                   * Take r_deleg_recall_lock in read mode to synchronize
2402 2406                   * with delegreturn.
2403 2407                   */
2404 2408                  error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock,
2405 2409                      RW_READER, INTR4(vp));
2406 2410  
2407 2411                  if (error == 0)
2408 2412                          rsp->rs_flags |= flg;
2409 2413  
2410 2414          }
2411 2415          return (error);
2412 2416  }
2413 2417  
2414 2418  void
2415 2419  nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp)
2416 2420  {
2417 2421          NFS4_DEBUG(nfs4_recall_debug,
2418 2422              (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2419 2423              (void *)vp1, (void *)vp2));
2420 2424  
2421 2425          if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2)
2422 2426                  nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock);
2423 2427          if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1)
2424 2428                  nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2425 2429  }
2426 2430  
2427 2431  int
2428 2432  wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op,
2429 2433      nfs4_recov_state_t *rsp)
2430 2434  {
2431 2435          int error;
2432 2436  
2433 2437          NFS4_DEBUG(nfs4_recall_debug,
2434 2438              (CE_NOTE, "wait_for_recall:    0x%p, 0x%p\n",
2435 2439              (void *)vp1, (void *) vp2));
2436 2440  
2437 2441          rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2);
2438 2442  
2439 2443          if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0)
2440 2444                  return (error);
2441 2445  
2442 2446          if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2))
2443 2447              != 0) {
2444 2448                  if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) {
2445 2449                          nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2446 2450                          rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1;
2447 2451                  }
2448 2452  
2449 2453                  return (error);
2450 2454          }
2451 2455  
2452 2456          return (0);
2453 2457  }
2454 2458  
2455 2459  /*
2456 2460   * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2457 2461   * DELEGRETURN'd at the end of recovery.
2458 2462   */
2459 2463  
2460 2464  static void
2461 2465  nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags)
2462 2466  {
2463 2467          struct nfs4_dnode *dp;
2464 2468  
2465 2469          ASSERT(mutex_owned(&rp->r_statev4_lock));
2466 2470          /*
2467 2471           * Mark the delegation as having a return pending.
2468 2472           * This will prevent the use of the delegation stateID
2469 2473           * by read, write, setattr and open.
2470 2474           */
2471 2475          rp->r_deleg_return_pending = TRUE;
2472 2476          dp = kmem_alloc(sizeof (*dp), KM_SLEEP);
2473 2477          VN_HOLD(RTOV4(rp));
2474 2478          dp->rnodep = rp;
2475 2479          dp->flags = flags;
2476 2480          mutex_enter(&ncg->nfs4_dlist_lock);
2477 2481          list_insert_head(&ncg->nfs4_dlist, dp);
2478 2482  #ifdef  DEBUG
2479 2483          ncg->nfs4_dlistadd_c++;
2480 2484  #endif
2481 2485          mutex_exit(&ncg->nfs4_dlist_lock);
2482 2486  }
2483 2487  
2484 2488  /*
2485 2489   * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2486 2490   * of files awaiting cleaning.  If the override_flags are non-zero
2487 2491   * then use them rather than the flags that were set when the rnode
2488 2492   * was added to the dlist.
2489 2493   */
2490 2494  static void
2491 2495  nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags)
2492 2496  {
2493 2497          rnode4_t *rp;
2494 2498          struct nfs4_dnode *dp;
2495 2499          int flags;
2496 2500  
2497 2501          ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD);
2498 2502  
2499 2503          mutex_enter(&ncg->nfs4_dlist_lock);
2500 2504          while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) {
2501 2505  #ifdef  DEBUG
2502 2506                  ncg->nfs4_dlistclean_c++;
2503 2507  #endif
2504 2508                  list_remove(&ncg->nfs4_dlist, dp);
2505 2509                  mutex_exit(&ncg->nfs4_dlist_lock);
2506 2510                  rp = dp->rnodep;
2507 2511                  flags = (override_flags != 0) ? override_flags : dp->flags;
2508 2512                  kmem_free(dp, sizeof (*dp));
2509 2513                  (void) nfs4delegreturn_impl(rp, flags, ncg);
2510 2514                  VN_RELE(RTOV4(rp));
2511 2515                  mutex_enter(&ncg->nfs4_dlist_lock);
2512 2516          }
2513 2517          mutex_exit(&ncg->nfs4_dlist_lock);
2514 2518  }
2515 2519  
2516 2520  void
2517 2521  nfs4_dlistclean(void)
2518 2522  {
2519 2523          struct nfs4_callback_globals *ncg;
2520 2524  
2521 2525          ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2522 2526          ASSERT(ncg != NULL);
2523 2527  
2524 2528          nfs4_dlistclean_impl(ncg, 0);
2525 2529  }
  
    | 
      ↓ open down ↓ | 
    1946 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX