Print this page
NEX-15279 support NFS server in zone
NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini
Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com
Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-6778 NFS kstats leak and cause system to hang
Revert "NEX-4261 Per-client NFS server IOPS, bandwidth, and latency kstats"
This reverts commit 586c3ab1927647487f01c337ddc011c642575a52.
Revert "NEX-5354 Aggregated IOPS, bandwidth, and latency kstats for NFS server"
This reverts commit c91d7614da8618ef48018102b077f60ecbbac8c2.
Revert "NEX-5667 nfssrv_stats_flags does not work for aggregated kstats"
This reverts commit 3dcf42618be7dd5f408c327f429c81e07ca08e74.
Revert "NEX-5750 Time values for aggregated NFS server kstats should be normalized"
This reverts commit 1f4d4f901153b0191027969fa4a8064f9d3b9ee1.
Revert "NEX-5942 Panic in rfs4_minorvers_mismatch() with NFSv4.1 client"
This reverts commit 40766417094a162f5e4cc8786c0fa0a7e5871cd9.
Revert "NEX-5752 NFS server: namespace collision in kstats"
This reverts commit ae81e668db86050da8e483264acb0cce0444a132.
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-5942 Panic in rfs4_minorvers_mismatch() with NFSv4.1 client
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
NEX-4261 Per-client NFS server IOPS, bandwidth, and latency kstats
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-3097 IOPS, bandwidth, and latency kstats for NFS server
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 




  27 #include <sys/systm.h>
  28 #include <sys/sdt.h>
  29 #include <rpc/types.h>
  30 #include <rpc/auth.h>
  31 #include <rpc/auth_unix.h>
  32 #include <rpc/auth_des.h>
  33 #include <rpc/svc.h>
  34 #include <rpc/xdr.h>
  35 #include <nfs/nfs4.h>
  36 #include <nfs/nfs_dispatch.h>
  37 #include <nfs/nfs4_drc.h>
  38 
  39 #define NFS4_MAX_MINOR_VERSION  0
  40 
  41 /*
  42  * This is the duplicate request cache for NFSv4
  43  */
  44 rfs4_drc_t *nfs4_drc = NULL;
  45 
  46 /*
  47  * The default size of the duplicate request cache
  48  */
  49 uint32_t nfs4_drc_max = 8 * 1024;
  50 
  51 /*
  52  * The number of buckets we'd like to hash the
  53  * replies into.. do not change this on the fly.
  54  */
  55 uint32_t nfs4_drc_hash = 541;
  56 
  57 static void rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp);
  58 


  59 /*
  60  * Initialize a duplicate request cache.
  61  */
  62 rfs4_drc_t *
  63 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size)
  64 {
  65         rfs4_drc_t *drc;
  66         uint32_t   bki;
  67 
  68         ASSERT(drc_size);
  69         ASSERT(drc_hash_size);
  70 
  71         drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP);
  72 
  73         drc->max_size = drc_size;
  74         drc->in_use = 0;
  75 
  76         mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL);
  77 
  78         drc->dr_hash = drc_hash_size;
  79 
  80         drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP);
  81 
  82         for (bki = 0; bki < drc_hash_size; bki++) {
  83                 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t),
  84                     offsetof(rfs4_dupreq_t, dr_bkt_next));
  85         }
  86 
  87         list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t),
  88             offsetof(rfs4_dupreq_t, dr_next));
  89 
  90         return (drc);
  91 }
  92 
  93 /*
  94  * Destroy a duplicate request cache.
  95  */
  96 void
  97 rfs4_fini_drc(rfs4_drc_t *drc)
  98 {


  99         rfs4_dupreq_t *drp, *drp_next;
 100 
 101         ASSERT(drc);
 102 
 103         /* iterate over the dr_cache and free the enties */
 104         for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) {
 105 
 106                 if (drp->dr_state == NFS4_DUP_REPLAY)
 107                         rfs4_compound_free(&(drp->dr_res));
 108 
 109                 if (drp->dr_addr.buf != NULL)
 110                         kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
 111 
 112                 drp_next = list_next(&(drc->dr_cache), drp);
 113 
 114                 kmem_free(drp, sizeof (rfs4_dupreq_t));
 115         }
 116 
 117         mutex_destroy(&drc->lock);
 118         kmem_free(drc->dr_buckets,
 119             sizeof (list_t)*drc->dr_hash);
 120         kmem_free(drc, sizeof (rfs4_drc_t));
 121 }
 122 


 343         list_insert_head(&drc->dr_cache, drp);
 344         list_insert_head(dr_bkt, drp);
 345         mutex_exit(&drc->lock);
 346 
 347         *dup = drp;
 348 
 349         return (NFS4_DUP_NEW);
 350 }
 351 
 352 /*
 353  *
 354  * This function handles the duplicate request cache,
 355  * NULL_PROC and COMPOUND procedure calls for NFSv4;
 356  *
 357  * Passed into this function are:-
 358  *
 359  *      disp    A pointer to our dispatch table entry
 360  *      req     The request to process
 361  *      xprt    The server transport handle
 362  *      ap      A pointer to the arguments

 363  *
 364  *
 365  * When appropriate this function is responsible for inserting
 366  * the reply into the duplicate cache or replaying an existing
 367  * cached reply.
 368  *
 369  * dr_stat      reflects the state of the duplicate request that
 370  *              has been inserted into or retrieved from the cache
 371  *
 372  * drp          is the duplicate request entry
 373  *
 374  */
 375 int
 376 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req,
 377                 SVCXPRT *xprt, char *ap)
 378 {
 379 
 380         COMPOUND4res     res_buf;
 381         COMPOUND4res    *rbp;
 382         COMPOUND4args   *cap;
 383         cred_t          *cr = NULL;
 384         int              error = 0;
 385         int              dis_flags = 0;
 386         int              dr_stat = NFS4_NOT_DUP;
 387         rfs4_dupreq_t   *drp = NULL;
 388         int              rv;


 389 
 390         ASSERT(disp);
 391 
 392         /*
 393          * Short circuit the RPC_NULL proc.
 394          */
 395         if (disp->dis_proc == rpc_null) {
 396                 DTRACE_NFSV4_1(null__start, struct svc_req *, req);
 397                 if (!svc_sendreply(xprt, xdr_void, NULL)) {
 398                         DTRACE_NFSV4_1(null__done, struct svc_req *, req);
 399                         svcerr_systemerr(xprt);
 400                         return (1);
 401                 }
 402                 DTRACE_NFSV4_1(null__done, struct svc_req *, req);

 403                 return (0);
 404         }
 405 
 406         /* Only NFSv4 Compounds from this point onward */
 407 
 408         rbp = &res_buf;
 409         cap = (COMPOUND4args *)ap;
 410 
 411         /*





 412          * Figure out the disposition of the whole COMPOUND
 413          * and record it's IDEMPOTENTCY.
 414          */
 415         rfs4_compound_flagproc(cap, &dis_flags);
 416 
 417         /*
 418          * If NON-IDEMPOTENT then we need to figure out if this
 419          * request can be replied from the duplicate cache.
 420          *
 421          * If this is a new request then we need to insert the
 422          * reply into the duplicate cache.
 423          */
 424         if (!(dis_flags & RPC_IDEMPOTENT)) {
 425                 /* look for a replay from the cache or allocate */
 426                 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp);
 427 
 428                 switch (dr_stat) {
 429 
 430                 case NFS4_DUP_ERROR:
 431                         rfs4_resource_err(req, cap);


 481                 curthread->t_flag &= ~T_DONTPEND;
 482 
 483                 if (rv)         /* short ckt sendreply on error */
 484                         return (rv);
 485 
 486                 if (curthread->t_flag & T_WOULDBLOCK) {
 487                         curthread->t_flag &= ~T_WOULDBLOCK;
 488                         return (1);
 489                 }
 490         }
 491 
 492         /*
 493          * Send out the replayed reply or the 'real' one.
 494          */
 495         if (!svc_sendreply(xprt,  xdr_COMPOUND4res_srv, (char *)rbp)) {
 496                 DTRACE_PROBE2(nfss__e__dispatch_sendfail,
 497                     struct svc_req *, xprt,
 498                     char *, rbp);
 499                 svcerr_systemerr(xprt);
 500                 error++;






 501         }
 502 
 503         /*
 504          * If this reply was just inserted into the duplicate cache
 505          * or it was replayed from the dup cache; (re)mark it as
 506          * available for replay
 507          *
 508          * At first glance, this 'if' statement seems a little strange;
 509          * testing for NFS4_DUP_REPLAY, and then calling...
 510          *
 511          *      rfs4_dr_chatate(NFS4_DUP_REPLAY)
 512          *
 513          * ... but notice that we are checking dr_stat, and not the
 514          * state of the entry itself, the entry will be NFS4_DUP_INUSE,
 515          * we do that so that we know not to prematurely reap it whilst
 516          * we resent it to the client.
 517          *
 518          */
 519         if (dr_stat == NFS4_DUP_NEW || dr_stat == NFS4_DUP_REPLAY) {
 520                 mutex_enter(&drp->drc->lock);




   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2018 Nexenta Systems, Inc.
  29  */
  30 
  31 #include <sys/systm.h>
  32 #include <sys/sdt.h>
  33 #include <rpc/types.h>
  34 #include <rpc/auth.h>
  35 #include <rpc/auth_unix.h>
  36 #include <rpc/auth_des.h>
  37 #include <rpc/svc.h>
  38 #include <rpc/xdr.h>
  39 #include <nfs/nfs4.h>
  40 #include <nfs/nfs_dispatch.h>
  41 #include <nfs/nfs4_drc.h>
  42 
  43 #define NFS4_MAX_MINOR_VERSION  0
  44 
  45 /*





  46  * The default size of the duplicate request cache
  47  */
  48 uint32_t nfs4_drc_max = 8 * 1024;
  49 
  50 /*
  51  * The number of buckets we'd like to hash the
  52  * replies into.. do not change this on the fly.
  53  */
  54 uint32_t nfs4_drc_hash = 541;
  55 
  56 static void rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp);
  57 
  58 extern zone_key_t rfs4_zone_key;
  59 
  60 /*
  61  * Initialize a duplicate request cache.
  62  */
  63 rfs4_drc_t *
  64 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size)
  65 {
  66         rfs4_drc_t *drc;
  67         uint32_t   bki;
  68 
  69         ASSERT(drc_size);
  70         ASSERT(drc_hash_size);
  71 
  72         drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP);
  73 
  74         drc->max_size = drc_size;
  75         drc->in_use = 0;
  76 
  77         mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL);
  78 
  79         drc->dr_hash = drc_hash_size;
  80 
  81         drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP);
  82 
  83         for (bki = 0; bki < drc_hash_size; bki++) {
  84                 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t),
  85                     offsetof(rfs4_dupreq_t, dr_bkt_next));
  86         }
  87 
  88         list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t),
  89             offsetof(rfs4_dupreq_t, dr_next));
  90 
  91         return (drc);
  92 }
  93 
  94 /*
  95  * Destroy a duplicate request cache.
  96  */
  97 void
  98 rfs4_fini_drc(void)
  99 {
 100         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
 101         rfs4_drc_t *drc = nsrv4->nfs4_drc;
 102         rfs4_dupreq_t *drp, *drp_next;
 103 


 104         /* iterate over the dr_cache and free the enties */
 105         for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) {
 106 
 107                 if (drp->dr_state == NFS4_DUP_REPLAY)
 108                         rfs4_compound_free(&(drp->dr_res));
 109 
 110                 if (drp->dr_addr.buf != NULL)
 111                         kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
 112 
 113                 drp_next = list_next(&(drc->dr_cache), drp);
 114 
 115                 kmem_free(drp, sizeof (rfs4_dupreq_t));
 116         }
 117 
 118         mutex_destroy(&drc->lock);
 119         kmem_free(drc->dr_buckets,
 120             sizeof (list_t)*drc->dr_hash);
 121         kmem_free(drc, sizeof (rfs4_drc_t));
 122 }
 123 


 344         list_insert_head(&drc->dr_cache, drp);
 345         list_insert_head(dr_bkt, drp);
 346         mutex_exit(&drc->lock);
 347 
 348         *dup = drp;
 349 
 350         return (NFS4_DUP_NEW);
 351 }
 352 
 353 /*
 354  *
 355  * This function handles the duplicate request cache,
 356  * NULL_PROC and COMPOUND procedure calls for NFSv4;
 357  *
 358  * Passed into this function are:-
 359  *
 360  *      disp    A pointer to our dispatch table entry
 361  *      req     The request to process
 362  *      xprt    The server transport handle
 363  *      ap      A pointer to the arguments
 364  *      rlen    A pointer to the reply length (output)
 365  *
 366  *
 367  * When appropriate this function is responsible for inserting
 368  * the reply into the duplicate cache or replaying an existing
 369  * cached reply.
 370  *
 371  * dr_stat      reflects the state of the duplicate request that
 372  *              has been inserted into or retrieved from the cache
 373  *
 374  * drp          is the duplicate request entry
 375  *
 376  */
 377 int
 378 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req,
 379     SVCXPRT *xprt, char *ap, size_t *rlen)
 380 {
 381 
 382         COMPOUND4res     res_buf;
 383         COMPOUND4res    *rbp;
 384         COMPOUND4args   *cap;
 385         cred_t          *cr = NULL;
 386         int              error = 0;
 387         int              dis_flags = 0;
 388         int              dr_stat = NFS4_NOT_DUP;
 389         rfs4_dupreq_t   *drp = NULL;
 390         int              rv;
 391         nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
 392         rfs4_drc_t *nfs4_drc = nsrv4->nfs4_drc;
 393 
 394         ASSERT(disp);
 395 
 396         /*
 397          * Short circuit the RPC_NULL proc.
 398          */
 399         if (disp->dis_proc == rpc_null) {
 400                 DTRACE_NFSV4_1(null__start, struct svc_req *, req);
 401                 if (!svc_sendreply(xprt, xdr_void, NULL)) {
 402                         DTRACE_NFSV4_1(null__done, struct svc_req *, req);
 403                         svcerr_systemerr(xprt);
 404                         return (1);
 405                 }
 406                 DTRACE_NFSV4_1(null__done, struct svc_req *, req);
 407                 *rlen = xdr_sizeof(xdr_void, NULL);
 408                 return (0);
 409         }
 410 
 411         /* Only NFSv4 Compounds from this point onward */
 412 
 413         rbp = &res_buf;
 414         cap = (COMPOUND4args *)ap;
 415 
 416         /*
 417          * Update kstats
 418          */
 419         rfs4_compound_kstat_args(cap);
 420 
 421         /*
 422          * Figure out the disposition of the whole COMPOUND
 423          * and record it's IDEMPOTENTCY.
 424          */
 425         rfs4_compound_flagproc(cap, &dis_flags);
 426 
 427         /*
 428          * If NON-IDEMPOTENT then we need to figure out if this
 429          * request can be replied from the duplicate cache.
 430          *
 431          * If this is a new request then we need to insert the
 432          * reply into the duplicate cache.
 433          */
 434         if (!(dis_flags & RPC_IDEMPOTENT)) {
 435                 /* look for a replay from the cache or allocate */
 436                 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp);
 437 
 438                 switch (dr_stat) {
 439 
 440                 case NFS4_DUP_ERROR:
 441                         rfs4_resource_err(req, cap);


 491                 curthread->t_flag &= ~T_DONTPEND;
 492 
 493                 if (rv)         /* short ckt sendreply on error */
 494                         return (rv);
 495 
 496                 if (curthread->t_flag & T_WOULDBLOCK) {
 497                         curthread->t_flag &= ~T_WOULDBLOCK;
 498                         return (1);
 499                 }
 500         }
 501 
 502         /*
 503          * Send out the replayed reply or the 'real' one.
 504          */
 505         if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)rbp)) {
 506                 DTRACE_PROBE2(nfss__e__dispatch_sendfail,
 507                     struct svc_req *, xprt,
 508                     char *, rbp);
 509                 svcerr_systemerr(xprt);
 510                 error++;
 511         } else {
 512                 /*
 513                  * Update kstats
 514                  */
 515                 rfs4_compound_kstat_res(rbp);
 516                 *rlen = xdr_sizeof(xdr_COMPOUND4res_srv, rbp);
 517         }
 518 
 519         /*
 520          * If this reply was just inserted into the duplicate cache
 521          * or it was replayed from the dup cache; (re)mark it as
 522          * available for replay
 523          *
 524          * At first glance, this 'if' statement seems a little strange;
 525          * testing for NFS4_DUP_REPLAY, and then calling...
 526          *
 527          *      rfs4_dr_chatate(NFS4_DUP_REPLAY)
 528          *
 529          * ... but notice that we are checking dr_stat, and not the
 530          * state of the entry itself, the entry will be NFS4_DUP_INUSE,
 531          * we do that so that we know not to prematurely reap it whilst
 532          * we resent it to the client.
 533          *
 534          */
 535         if (dr_stat == NFS4_DUP_NEW || dr_stat == NFS4_DUP_REPLAY) {
 536                 mutex_enter(&drp->drc->lock);