Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/nfs/nfs_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
↓ open down ↓ 22 lines elided ↑ open up ↑
  23   23   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  25   25   * Copyright (c) 2016 by Delphix. All rights reserved.
  26   26   */
  27   27  
  28   28  /*
  29   29   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30   30   *      All rights reserved.
  31   31   */
  32   32  
       33 +/*
       34 + * Copyright 2018 Nexenta Systems, Inc.
       35 + * Copyright (c) 2016 by Delphix. All rights reserved.
       36 + */
       37 +
  33   38  #include <sys/param.h>
  34   39  #include <sys/types.h>
  35   40  #include <sys/systm.h>
  36   41  #include <sys/cred.h>
  37   42  #include <sys/buf.h>
  38   43  #include <sys/vfs.h>
  39   44  #include <sys/vnode.h>
  40   45  #include <sys/uio.h>
  41   46  #include <sys/stat.h>
  42   47  #include <sys/errno.h>
↓ open down ↓ 20 lines elided ↑ open up ↑
  63   68  #include <nfs/nfs_cmd.h>
  64   69  
  65   70  #include <vm/hat.h>
  66   71  #include <vm/as.h>
  67   72  #include <vm/seg.h>
  68   73  #include <vm/seg_map.h>
  69   74  #include <vm/seg_kmem.h>
  70   75  
  71   76  #include <sys/strsubr.h>
  72   77  
       78 +struct rfs_async_write_list;
       79 +
  73   80  /*
       81 + * Zone globals of NFSv2 server
       82 + */
       83 +typedef struct nfs_srv {
       84 +        kmutex_t                        async_write_lock;
       85 +        struct rfs_async_write_list     *async_write_head;
       86 +
       87 +        /*
       88 +         * enables write clustering if == 1
       89 +         */
       90 +        int             write_async;
       91 +} nfs_srv_t;
       92 +
       93 +/*
  74   94   * These are the interface routines for the server side of the
  75   95   * Network File System.  See the NFS version 2 protocol specification
  76   96   * for a description of this interface.
  77   97   */
  78   98  
  79   99  static int      sattr_to_vattr(struct nfssattr *, struct vattr *);
  80  100  static void     acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
  81  101                          cred_t *);
  82  102  
      103 +
  83  104  /*
  84  105   * Some "over the wire" UNIX file types.  These are encoded
  85  106   * into the mode.  This needs to be fixed in the next rev.
  86  107   */
  87  108  #define IFMT            0170000         /* type of file */
  88  109  #define IFCHR           0020000         /* character special */
  89  110  #define IFBLK           0060000         /* block special */
  90  111  #define IFSOCK          0140000         /* socket */
  91  112  
  92  113  u_longlong_t nfs2_srv_caller_id;
  93  114  
      115 +static nfs_srv_t *
      116 +nfs_get_srv(void)
      117 +{
      118 +        nfs_globals_t *ng = nfs_srv_getzg();
      119 +        nfs_srv_t *srv = ng->nfs_srv;
      120 +        ASSERT(srv != NULL);
      121 +        return (srv);
      122 +}
      123 +
  94  124  /*
  95  125   * Get file attributes.
  96  126   * Returns the current attributes of the file with the given fhandle.
  97  127   */
  98  128  /* ARGSUSED */
  99  129  void
 100  130  rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
 101  131      struct svc_req *req, cred_t *cr, bool_t ro)
 102  132  {
 103  133          int error;
↓ open down ↓ 275 lines elided ↑ open up ↑
 379  409  /*
 380  410   * Given mounted "dvp" and "exi", go upper mountpoint
 381  411   * with dvp/exi correction
 382  412   * Return 0 in success
 383  413   */
 384  414  int
 385  415  rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
 386  416  {
 387  417          struct exportinfo *exi;
 388  418          vnode_t *dvp = *dvpp;
      419 +        vnode_t *zone_rootvp;
 389  420  
 390      -        ASSERT(dvp->v_flag & VROOT);
      421 +        zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
      422 +        ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
 391  423  
 392  424          VN_HOLD(dvp);
 393      -        dvp = untraverse(dvp);
      425 +        dvp = untraverse(dvp, zone_rootvp);
 394  426          exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
 395  427          if (exi == NULL) {
 396  428                  VN_RELE(dvp);
 397  429                  return (-1);
 398  430          }
 399  431  
      432 +        ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
 400  433          exi_rele(*exip);
 401  434          *exip = exi;
 402  435          VN_RELE(*dvpp);
 403  436          *dvpp = dvp;
 404  437  
 405  438          return (0);
 406  439  }
 407  440  /*
 408  441   * Directory lookup.
 409  442   * Returns an fhandle and file attributes for file name in a directory.
↓ open down ↓ 29 lines elided ↑ open up ↑
 439  472          if (da->da_name == NULL || *da->da_name == '\0') {
 440  473                  dr->dr_status = NFSERR_ACCES;
 441  474                  return;
 442  475          }
 443  476  
 444  477          /*
 445  478           * Allow lookups from the root - the default
 446  479           * location of the public filehandle.
 447  480           */
 448  481          if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 449      -                dvp = rootdir;
      482 +                dvp = ZONE_ROOTVP();
 450  483                  VN_HOLD(dvp);
 451  484          } else {
 452  485                  dvp = nfs_fhtovp(fhp, exi);
 453  486                  if (dvp == NULL) {
 454  487                          dr->dr_status = NFSERR_STALE;
 455  488                          return;
 456  489                  }
 457  490          }
 458  491  
 459  492          exi_hold(exi);
      493 +        ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
 460  494  
 461  495          /*
 462  496           * Not allow lookup beyond root.
 463  497           * If the filehandle matches a filehandle of the exi,
 464  498           * then the ".." refers beyond the root of an exported filesystem.
 465  499           */
 466  500          if (strcmp(da->da_name, "..") == 0 &&
 467  501              EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
 468  502                  if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
 469      -                    (dvp->v_flag & VROOT)) {
      503 +                    ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
 470  504                          /*
 471  505                           * special case for ".." and 'nohide'exported root
 472  506                           */
 473  507                          if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
 474  508                                  error = NFSERR_ACCES;
 475  509                                  goto out;
 476  510                          }
 477  511                  } else  {
 478  512                          error = NFSERR_NOENT;
 479  513                          goto out;
↓ open down ↓ 15 lines elided ↑ open up ↑
 495  529           * a pathname and follow symbolic links if
 496  530           * necessary.
 497  531           *
 498  532           * This may result in a vnode in another filesystem
 499  533           * which is OK as long as the filesystem is exported.
 500  534           */
 501  535          if (PUBLIC_FH2(fhp)) {
 502  536                  publicfh_flag = TRUE;
 503  537  
 504  538                  exi_rele(exi);
      539 +                exi = NULL;
 505  540  
 506  541                  error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
 507  542                      &sec);
 508  543          } else {
 509  544                  /*
 510  545                   * Do a normal single component lookup.
 511  546                   */
 512  547                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
 513  548                      NULL, NULL, NULL);
 514  549          }
↓ open down ↓ 113 lines elided ↑ open up ↑
 628  663          }
 629  664  
 630  665          /*
 631  666           * Allocate data for pathname.  This will be freed by rfs_rlfree.
 632  667           */
 633  668          rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
 634  669  
 635  670          if (is_referral) {
 636  671                  char *s;
 637  672                  size_t strsz;
      673 +                kstat_named_t *stat =
      674 +                    exi->exi_ne->ne_globals->svstat[NFS_VERSION];
 638  675  
 639  676                  /* Get an artificial symlink based on a referral */
 640  677                  s = build_symlink(vp, cr, &strsz);
 641      -                global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
      678 +                stat[NFS_REFERLINKS].value.ui64++;
 642  679                  DTRACE_PROBE2(nfs2serv__func__referral__reflink,
 643  680                      vnode_t *, vp, char *, s);
 644  681                  if (s == NULL)
 645  682                          error = EINVAL;
 646  683                  else {
 647  684                          error = 0;
 648  685                          (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
 649  686                          rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
 650  687                          kmem_free(s, strsz);
 651  688                  }
↓ open down ↓ 116 lines elided ↑ open up ↑
 768  805                          rr->rr_status = NFSERR_ACCES;
 769  806                          return;
 770  807                  }
 771  808                  in_crit = 1;
 772  809          }
 773  810  
 774  811          error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 775  812  
 776  813          /* check if a monitor detected a delegation conflict */
 777  814          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
      815 +                if (in_crit)
      816 +                        nbl_end_crit(vp);
 778  817                  VN_RELE(vp);
 779  818                  /* mark as wouldblock so response is dropped */
 780  819                  curthread->t_flag |= T_WOULDBLOCK;
 781  820  
 782  821                  rr->rr_data = NULL;
 783  822                  return;
 784  823          }
 785  824  
 786  825          va.va_mask = AT_ALL;
 787  826  
↓ open down ↓ 305 lines elided ↑ open up ↑
1093 1132                      wa->wa_count, 0, NULL)) {
1094 1133                          error = EACCES;
1095 1134                          goto out;
1096 1135                  }
1097 1136          }
1098 1137  
1099 1138          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1100 1139  
1101 1140          /* check if a monitor detected a delegation conflict */
1102 1141          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1103      -                VN_RELE(vp);
1104      -                /* mark as wouldblock so response is dropped */
1105      -                curthread->t_flag |= T_WOULDBLOCK;
1106      -                return;
     1142 +                goto out;
1107 1143          }
1108 1144  
1109 1145          if (wa->wa_data || wa->wa_rlist) {
1110 1146                  /* Do the RDMA thing if necessary */
1111 1147                  if (wa->wa_rlist) {
1112 1148                          iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1113 1149                          iov[0].iov_len = wa->wa_count;
1114 1150                  } else  {
1115 1151                          iov[0].iov_base = wa->wa_data;
1116 1152                          iov[0].iov_len = wa->wa_count;
↓ open down ↓ 19 lines elided ↑ open up ↑
1136 1172                  /*
1137 1173                   * We're changing creds because VM may fault and we need
1138 1174                   * the cred of the current thread to be used if quota
1139 1175                   * checking is enabled.
1140 1176                   */
1141 1177                  savecred = curthread->t_cred;
1142 1178                  curthread->t_cred = cr;
1143 1179                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1144 1180                  curthread->t_cred = savecred;
1145 1181          } else {
     1182 +
1146 1183                  iovcnt = 0;
1147 1184                  for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1148 1185                          iovcnt++;
1149 1186                  if (iovcnt <= MAX_IOVECS) {
1150 1187  #ifdef DEBUG
1151 1188                          rfs_write_sync_hits++;
1152 1189  #endif
1153 1190                          iovp = iov;
1154 1191                  } else {
1155 1192  #ifdef DEBUG
↓ open down ↓ 123 lines elided ↑ open up ↑
1279 1316          int count;
1280 1317          int rcount;
1281 1318          uint_t off;
1282 1319          uint_t len;
1283 1320          struct rfs_async_write nrpsp;
1284 1321          struct rfs_async_write_list nlpsp;
1285 1322          ushort_t t_flag;
1286 1323          cred_t *savecred;
1287 1324          int in_crit = 0;
1288 1325          caller_context_t ct;
     1326 +        nfs_srv_t *nsrv;
1289 1327  
1290      -        if (!rfs_write_async) {
     1328 +        ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
     1329 +        nsrv = nfs_get_srv();
     1330 +        if (!nsrv->write_async) {
1291 1331                  rfs_write_sync(wa, ns, exi, req, cr, ro);
1292 1332                  return;
1293 1333          }
1294 1334  
1295 1335          /*
1296 1336           * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1297 1337           * is considered an OK.
1298 1338           */
1299 1339          ns->ns_status = RFSWRITE_INITVAL;
1300 1340  
↓ open down ↓ 4 lines elided ↑ open up ↑
1305 1345          nrp->cr = cr;
1306 1346          nrp->ro = ro;
1307 1347          nrp->thread = curthread;
1308 1348  
1309 1349          ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1310 1350  
1311 1351          /*
1312 1352           * Look to see if there is already a cluster started
1313 1353           * for this file.
1314 1354           */
1315      -        mutex_enter(&rfs_async_write_lock);
1316      -        for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
     1355 +        mutex_enter(&nsrv->async_write_lock);
     1356 +        for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1317 1357                  if (bcmp(&wa->wa_fhandle, lp->fhp,
1318 1358                      sizeof (fhandle_t)) == 0)
1319 1359                          break;
1320 1360          }
1321 1361  
1322 1362          /*
1323 1363           * If lp is non-NULL, then there is already a cluster
1324 1364           * started.  We need to place ourselves in the cluster
1325 1365           * list in the right place as determined by starting
1326 1366           * offset.  Conflicts with non-blocking mandatory locked
↓ open down ↓ 5 lines elided ↑ open up ↑
1332 1372                  while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1333 1373                          trp = rp;
1334 1374                          rp = rp->list;
1335 1375                  }
1336 1376                  nrp->list = rp;
1337 1377                  if (trp == NULL)
1338 1378                          lp->list = nrp;
1339 1379                  else
1340 1380                          trp->list = nrp;
1341 1381                  while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1342      -                        cv_wait(&lp->cv, &rfs_async_write_lock);
1343      -                mutex_exit(&rfs_async_write_lock);
     1382 +                        cv_wait(&lp->cv, &nsrv->async_write_lock);
     1383 +                mutex_exit(&nsrv->async_write_lock);
1344 1384  
1345 1385                  return;
1346 1386          }
1347 1387  
1348 1388          /*
1349 1389           * No cluster started yet, start one and add ourselves
1350 1390           * to the list of clusters.
1351 1391           */
1352 1392          nrp->list = NULL;
1353 1393  
1354 1394          nlp = &nlpsp;
1355 1395          nlp->fhp = &wa->wa_fhandle;
1356 1396          cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1357 1397          nlp->list = nrp;
1358 1398          nlp->next = NULL;
1359 1399  
1360      -        if (rfs_async_write_head == NULL) {
1361      -                rfs_async_write_head = nlp;
     1400 +        if (nsrv->async_write_head == NULL) {
     1401 +                nsrv->async_write_head = nlp;
1362 1402          } else {
1363      -                lp = rfs_async_write_head;
     1403 +                lp = nsrv->async_write_head;
1364 1404                  while (lp->next != NULL)
1365 1405                          lp = lp->next;
1366 1406                  lp->next = nlp;
1367 1407          }
1368      -        mutex_exit(&rfs_async_write_lock);
     1408 +        mutex_exit(&nsrv->async_write_lock);
1369 1409  
1370 1410          /*
1371 1411           * Convert the file handle common to all of the requests
1372 1412           * in this cluster to a vnode.
1373 1413           */
1374 1414          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1375 1415          if (vp == NULL) {
1376      -                mutex_enter(&rfs_async_write_lock);
1377      -                if (rfs_async_write_head == nlp)
1378      -                        rfs_async_write_head = nlp->next;
     1416 +                mutex_enter(&nsrv->async_write_lock);
     1417 +                if (nsrv->async_write_head == nlp)
     1418 +                        nsrv->async_write_head = nlp->next;
1379 1419                  else {
1380      -                        lp = rfs_async_write_head;
     1420 +                        lp = nsrv->async_write_head;
1381 1421                          while (lp->next != nlp)
1382 1422                                  lp = lp->next;
1383 1423                          lp->next = nlp->next;
1384 1424                  }
1385 1425                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1386 1426                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1387 1427                          rp->ns->ns_status = NFSERR_STALE;
1388 1428                          rp->thread->t_flag |= t_flag;
1389 1429                  }
1390 1430                  cv_broadcast(&nlp->cv);
1391      -                mutex_exit(&rfs_async_write_lock);
     1431 +                mutex_exit(&nsrv->async_write_lock);
1392 1432  
1393 1433                  return;
1394 1434          }
1395 1435  
1396 1436          /*
1397 1437           * Can only write regular files.  Attempts to write any
1398 1438           * other file types fail with EISDIR.
1399 1439           */
1400 1440          if (vp->v_type != VREG) {
1401 1441                  VN_RELE(vp);
1402      -                mutex_enter(&rfs_async_write_lock);
1403      -                if (rfs_async_write_head == nlp)
1404      -                        rfs_async_write_head = nlp->next;
     1442 +                mutex_enter(&nsrv->async_write_lock);
     1443 +                if (nsrv->async_write_head == nlp)
     1444 +                        nsrv->async_write_head = nlp->next;
1405 1445                  else {
1406      -                        lp = rfs_async_write_head;
     1446 +                        lp = nsrv->async_write_head;
1407 1447                          while (lp->next != nlp)
1408 1448                                  lp = lp->next;
1409 1449                          lp->next = nlp->next;
1410 1450                  }
1411 1451                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1412 1452                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1413 1453                          rp->ns->ns_status = NFSERR_ISDIR;
1414 1454                          rp->thread->t_flag |= t_flag;
1415 1455                  }
1416 1456                  cv_broadcast(&nlp->cv);
1417      -                mutex_exit(&rfs_async_write_lock);
     1457 +                mutex_exit(&nsrv->async_write_lock);
1418 1458  
1419 1459                  return;
1420 1460          }
1421 1461  
1422 1462          /*
1423 1463           * Enter the critical region before calling VOP_RWLOCK, to avoid a
1424 1464           * deadlock with ufs.
1425 1465           */
1426 1466          if (nbl_need_check(vp)) {
1427 1467                  nbl_start_crit(vp, RW_READER);
↓ open down ↓ 11 lines elided ↑ open up ↑
1439 1479           */
1440 1480          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1441 1481  
1442 1482          /* check if a monitor detected a delegation conflict */
1443 1483          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1444 1484                  if (in_crit)
1445 1485                          nbl_end_crit(vp);
1446 1486                  VN_RELE(vp);
1447 1487                  /* mark as wouldblock so response is dropped */
1448 1488                  curthread->t_flag |= T_WOULDBLOCK;
1449      -                mutex_enter(&rfs_async_write_lock);
1450      -                if (rfs_async_write_head == nlp)
1451      -                        rfs_async_write_head = nlp->next;
     1489 +                mutex_enter(&nsrv->async_write_lock);
     1490 +                if (nsrv->async_write_head == nlp)
     1491 +                        nsrv->async_write_head = nlp->next;
1452 1492                  else {
1453      -                        lp = rfs_async_write_head;
     1493 +                        lp = nsrv->async_write_head;
1454 1494                          while (lp->next != nlp)
1455 1495                                  lp = lp->next;
1456 1496                          lp->next = nlp->next;
1457 1497                  }
1458 1498                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1459 1499                          if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1460 1500                                  rp->ns->ns_status = puterrno(error);
1461 1501                                  rp->thread->t_flag |= T_WOULDBLOCK;
1462 1502                          }
1463 1503                  }
1464 1504                  cv_broadcast(&nlp->cv);
1465      -                mutex_exit(&rfs_async_write_lock);
     1505 +                mutex_exit(&nsrv->async_write_lock);
1466 1506  
1467 1507                  return;
1468 1508          }
1469 1509  
1470 1510          /*
1471 1511           * Disconnect this cluster from the list of clusters.
1472 1512           * The cluster that is being dealt with must be fixed
1473 1513           * in size after this point, so there is no reason
1474 1514           * to leave it on the list so that new requests can
1475 1515           * find it.
↓ open down ↓ 1 lines elided ↑ open up ↑
1477 1517           * The algorithm is that the first write request will
1478 1518           * create a cluster, convert the file handle to a
1479 1519           * vnode pointer, and then lock the file for writing.
1480 1520           * This request is not likely to be clustered with
1481 1521           * any others.  However, the next request will create
1482 1522           * a new cluster and be blocked in VOP_RWLOCK while
1483 1523           * the first request is being processed.  This delay
1484 1524           * will allow more requests to be clustered in this
1485 1525           * second cluster.
1486 1526           */
1487      -        mutex_enter(&rfs_async_write_lock);
1488      -        if (rfs_async_write_head == nlp)
1489      -                rfs_async_write_head = nlp->next;
     1527 +        mutex_enter(&nsrv->async_write_lock);
     1528 +        if (nsrv->async_write_head == nlp)
     1529 +                nsrv->async_write_head = nlp->next;
1490 1530          else {
1491      -                lp = rfs_async_write_head;
     1531 +                lp = nsrv->async_write_head;
1492 1532                  while (lp->next != nlp)
1493 1533                          lp = lp->next;
1494 1534                  lp->next = nlp->next;
1495 1535          }
1496      -        mutex_exit(&rfs_async_write_lock);
     1536 +        mutex_exit(&nsrv->async_write_lock);
1497 1537  
1498 1538          /*
1499 1539           * Step through the list of requests in this cluster.
1500 1540           * We need to check permissions to make sure that all
1501 1541           * of the requests have sufficient permission to write
1502 1542           * the file.  A cluster can be composed of requests
1503 1543           * from different clients and different users on each
1504 1544           * client.
1505 1545           *
1506 1546           * As a side effect, we also calculate the size of the
↓ open down ↓ 224 lines elided ↑ open up ↑
1731 1771                  }
1732 1772          }
1733 1773  
1734 1774          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1735 1775  
1736 1776          if (in_crit)
1737 1777                  nbl_end_crit(vp);
1738 1778          VN_RELE(vp);
1739 1779  
1740 1780          t_flag = curthread->t_flag & T_WOULDBLOCK;
1741      -        mutex_enter(&rfs_async_write_lock);
     1781 +        mutex_enter(&nsrv->async_write_lock);
1742 1782          for (rp = nlp->list; rp != NULL; rp = rp->list) {
1743 1783                  if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1744 1784                          rp->ns->ns_status = puterrno(error);
1745 1785                          rp->thread->t_flag |= t_flag;
1746 1786                  }
1747 1787          }
1748 1788          cv_broadcast(&nlp->cv);
1749      -        mutex_exit(&rfs_async_write_lock);
     1789 +        mutex_exit(&nsrv->async_write_lock);
1750 1790  
1751 1791  }
1752 1792  
1753 1793  void *
1754 1794  rfs_write_getfh(struct nfswriteargs *wa)
1755 1795  {
1756 1796          return (&wa->wa_fhandle);
1757 1797  }
1758 1798  
1759 1799  /*
↓ open down ↓ 444 lines elided ↑ open up ↑
2204 2244          if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2205 2245                  VN_RELE(tovp);
2206 2246                  VN_RELE(fromvp);
2207 2247                  VN_RELE(srcvp);
2208 2248                  curthread->t_flag |= T_WOULDBLOCK;
2209 2249                  return;
2210 2250          }
2211 2251  
2212 2252          /* Check for delegation on the file being renamed over, if it exists */
2213 2253  
2214      -        if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
     2254 +        if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2215 2255              VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2216 2256              NULL, NULL, NULL) == 0) {
2217 2257  
2218 2258                  if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2219 2259                          VN_RELE(tovp);
2220 2260                          VN_RELE(fromvp);
2221 2261                          VN_RELE(srcvp);
2222 2262                          VN_RELE(targvp);
2223 2263                          curthread->t_flag |= T_WOULDBLOCK;
2224 2264                          return;
↓ open down ↓ 346 lines elided ↑ open up ↑
2571 2611  
2572 2612          /*
2573 2613           * VOP_RMDIR takes a third argument (the current
2574 2614           * directory of the process).  That's because someone
2575 2615           * wants to return EINVAL if one tries to remove ".".
2576 2616           * Of course, NFS servers have no idea what their
2577 2617           * clients' current directories are.  We fake it by
2578 2618           * supplying a vnode known to exist and illegal to
2579 2619           * remove.
2580 2620           */
2581      -        error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
     2621 +        error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2582 2622  
2583 2623          /*
2584 2624           * Force modified data and metadata out to stable storage.
2585 2625           */
2586 2626          (void) VOP_FSYNC(vp, 0, cr, NULL);
2587 2627  
2588 2628          VN_RELE(vp);
2589 2629  
2590 2630          /*
2591 2631           * System V defines rmdir to return EEXIST, not ENOTEMPTY,
↓ open down ↓ 254 lines elided ↑ open up ↑
2846 2886                  /*
2847 2887                   * nfs protocol defines times as unsigned so don't extend sign,
2848 2888                   * unless sysadmin set nfs_allow_preepoch_time.
2849 2889                   */
2850 2890                  NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2851 2891                  vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2852 2892          }
2853 2893          return (0);
2854 2894  }
2855 2895  
2856      -static enum nfsftype vt_to_nf[] = {
     2896 +static const enum nfsftype vt_to_nf[] = {
2857 2897          0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2858 2898  };
2859 2899  
2860 2900  /*
2861 2901   * check the following fields for overflow: nodeid, size, and time.
2862 2902   * There could be a problem when converting 64-bit LP64 fields
2863 2903   * into 32-bit ones.  Return an error if there is an overflow.
2864 2904   */
2865 2905  int
2866 2906  vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
↓ open down ↓ 198 lines elided ↑ open up ↑
3065 3105                  }
3066 3106                  if (vsa.vsa_aclcnt)
3067 3107                          kmem_free(vsa.vsa_aclentp,
3068 3108                              vsa.vsa_aclcnt * sizeof (aclent_t));
3069 3109          }
3070 3110  }
3071 3111  
3072 3112  void
3073 3113  rfs_srvrinit(void)
3074 3114  {
3075      -        mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3076 3115          nfs2_srv_caller_id = fs_new_caller_id();
3077 3116  }
3078 3117  
3079 3118  void
3080 3119  rfs_srvrfini(void)
3081 3120  {
3082      -        mutex_destroy(&rfs_async_write_lock);
3083 3121  }
3084 3122  
     3123 +/* ARGSUSED */
     3124 +void
     3125 +rfs_srv_zone_init(nfs_globals_t *ng)
     3126 +{
     3127 +        nfs_srv_t *ns;
     3128 +
     3129 +        ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
     3130 +
     3131 +        mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
     3132 +        ns->write_async = 1;
     3133 +
     3134 +        ng->nfs_srv = ns;
     3135 +}
     3136 +
     3137 +/* ARGSUSED */
     3138 +void
     3139 +rfs_srv_zone_fini(nfs_globals_t *ng)
     3140 +{
     3141 +        nfs_srv_t *ns = ng->nfs_srv;
     3142 +
     3143 +        ng->nfs_srv = NULL;
     3144 +
     3145 +        mutex_destroy(&ns->async_write_lock);
     3146 +        kmem_free(ns, sizeof (*ns));
     3147 +}
     3148 +
3085 3149  static int
3086 3150  rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3087 3151  {
3088 3152          struct clist    *wcl;
3089 3153          int             wlist_len;
3090 3154          uint32_t        count = rr->rr_count;
3091 3155  
3092 3156          wcl = ra->ra_wlist;
3093 3157  
3094 3158          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3095 3159                  return (FALSE);
3096 3160          }
3097 3161  
3098 3162          wcl = ra->ra_wlist;
3099 3163          rr->rr_ok.rrok_wlist_len = wlist_len;
3100 3164          rr->rr_ok.rrok_wlist = wcl;
3101 3165  
3102 3166          return (TRUE);
3103 3167  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX