Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  */
  27 




  28 #include <sys/systm.h>
  29 #include <rpc/auth.h>
  30 #include <rpc/clnt.h>
  31 #include <nfs/nfs4_kprot.h>
  32 #include <nfs/nfs4.h>
  33 #include <nfs/lm.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/disp.h>
  36 #include <sys/sdt.h>
  37 
  38 #include <sys/pathname.h>
  39 
  40 #include <sys/strsubr.h>
  41 #include <sys/ddi.h>
  42 
  43 #include <sys/vnode.h>
  44 #include <sys/sdt.h>
  45 #include <inet/common.h>
  46 #include <inet/ip.h>
  47 #include <inet/ip6.h>
  48 
  49 #define MAX_READ_DELEGATIONS 5
  50 
  51 krwlock_t rfs4_deleg_policy_lock;
  52 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
  53 static int rfs4_deleg_wlp = 5;
  54 kmutex_t rfs4_deleg_lock;
  55 static int rfs4_deleg_disabled;
  56 static int rfs4_max_setup_cb_tries = 5;
  57 
  58 #ifdef DEBUG
  59 
  60 static int rfs4_test_cbgetattr_fail = 0;
  61 int rfs4_cb_null;
  62 int rfs4_cb_debug;
  63 int rfs4_deleg_debug;
  64 
  65 #endif
  66 
  67 static void rfs4_recall_file(rfs4_file_t *,
  68     void (*recall)(rfs4_deleg_state_t *, bool_t),
  69     bool_t, rfs4_client_t *);
  70 static  void            rfs4_revoke_file(rfs4_file_t *);
  71 static  void            rfs4_cb_chflush(rfs4_cbinfo_t *);
  72 static  CLIENT          *rfs4_cb_getch(rfs4_cbinfo_t *);
  73 static  void            rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
  74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,


 121 
 122                                 *pp = htons(port);
 123 
 124                                 ua[k] = '.';
 125                                 return (0);
 126                         } else {
 127                                 ua[k] = '.';
 128                                 return (EINVAL);
 129                         }
 130                 }
 131         }
 132 
 133         return (EINVAL);
 134 }
 135 
 136 /*
 137  * Update the delegation policy with the
 138  * value of "new_policy"
 139  */
 140 void
 141 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
 142 {
 143         rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
 144         rfs4_deleg_policy = new_policy;
 145         rw_exit(&rfs4_deleg_policy_lock);
 146 }
 147 
 148 void
 149 rfs4_hold_deleg_policy(void)
 150 {
 151         rw_enter(&rfs4_deleg_policy_lock, RW_READER);
 152 }
 153 
 154 void
 155 rfs4_rele_deleg_policy(void)
 156 {
 157         rw_exit(&rfs4_deleg_policy_lock);
 158 }
 159 






 160 

 161 /*
 162  * This free function is to be used when the client struct is being
 163  * released and nothing at all is needed of the callback info any
 164  * longer.
 165  */
 166 void
 167 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
 168 {
 169         char *addr = cbp->cb_callback.cb_location.r_addr;
 170         char *netid = cbp->cb_callback.cb_location.r_netid;
 171 
 172         /* Free old address if any */
 173 
 174         if (addr)
 175                 kmem_free(addr, strlen(addr) + 1);
 176         if (netid)
 177                 kmem_free(netid, strlen(netid) + 1);
 178 
 179         addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
 180         netid = cbp->cb_newer.cb_callback.cb_location.r_netid;


 193  * The server uses this to check the callback path supplied by the
 194  * client.  The callback connection is marked "in progress" while this
 195  * work is going on and then eventually marked either OK or FAILED.
 196  * This work can be done as part of a separate thread and at the end
 197  * of this the thread will exit or it may be done such that the caller
 198  * will continue with other work.
 199  */
 200 static void
 201 rfs4_do_cb_null(rfs4_client_t *cp)
 202 {
 203         struct timeval tv;
 204         CLIENT *ch;
 205         rfs4_cbstate_t newstate;
 206         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 207 
 208         mutex_enter(cbp->cb_lock);
 209         /* If another thread is doing CB_NULL RPC then return */
 210         if (cbp->cb_nullcaller == TRUE) {
 211                 mutex_exit(cbp->cb_lock);
 212                 rfs4_client_rele(cp);
 213                 return;
 214         }
 215 
 216         /* Mark the cbinfo as having a thread in the NULL callback */
 217         cbp->cb_nullcaller = TRUE;
 218 
 219         /*
 220          * Are there other threads still using the cbinfo client
 221          * handles?  If so, this thread must wait before going and
 222          * mucking aroiund with the callback information
 223          */
 224         while (cbp->cb_refcnt != 0)
 225                 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
 226 
 227         /*
 228          * This thread itself may find that new callback info has
 229          * arrived and is set up to handle this case and redrive the
 230          * call to the client's callback server.
 231          */
 232 retry:
 233         if (cbp->cb_newer.cb_new == TRUE &&


 261                 cbp->cb_ident = cbp->cb_newer.cb_ident;
 262                 cbp->cb_newer.cb_ident = 0;
 263 
 264                 /* no longer new */
 265                 cbp->cb_newer.cb_new = FALSE;
 266                 cbp->cb_newer.cb_confirmed = FALSE;
 267 
 268                 /* get rid of the old client handles that may exist */
 269                 rfs4_cb_chflush(cbp);
 270 
 271                 cbp->cb_state = CB_NONE;
 272                 cbp->cb_timefailed = 0; /* reset the clock */
 273                 cbp->cb_notified_of_cb_path_down = TRUE;
 274         }
 275 
 276         if (cbp->cb_state != CB_NONE) {
 277                 cv_broadcast(cbp->cb_cv);    /* let the others know */
 278                 cbp->cb_nullcaller = FALSE;
 279                 mutex_exit(cbp->cb_lock);
 280                 rfs4_client_rele(cp);
 281                 return;
 282         }
 283 
 284         /* mark rfs4_client_t as CALLBACK NULL in progress */
 285         cbp->cb_state = CB_INPROG;
 286         mutex_exit(cbp->cb_lock);
 287 
 288         /* get/generate a client handle */
 289         if ((ch = rfs4_cb_getch(cbp)) == NULL) {
 290                 mutex_enter(cbp->cb_lock);
 291                 cbp->cb_state = CB_BAD;
 292                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 293                 goto retry;
 294         }
 295 
 296 
 297         tv.tv_sec = 30;
 298         tv.tv_usec = 0;
 299         if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
 300                 newstate = CB_BAD;
 301         } else {


 303 #ifdef  DEBUG
 304                 rfs4_cb_null++;
 305 #endif
 306         }
 307 
 308         /* Check to see if the client has specified new callback info */
 309         mutex_enter(cbp->cb_lock);
 310         rfs4_cb_freech(cbp, ch, TRUE);
 311         if (cbp->cb_newer.cb_new == TRUE &&
 312             cbp->cb_newer.cb_confirmed == TRUE) {
 313                 goto retry;     /* give the CB_NULL another chance */
 314         }
 315 
 316         cbp->cb_state = newstate;
 317         if (cbp->cb_state == CB_BAD)
 318                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 319 
 320         cv_broadcast(cbp->cb_cv);    /* start up the other threads */
 321         cbp->cb_nullcaller = FALSE;
 322         mutex_exit(cbp->cb_lock);
 323 
 324         rfs4_client_rele(cp);

 325 }
 326 
 327 /*
 328  * Given a client struct, inspect the callback info to see if the
 329  * callback path is up and available.
 330  *
 331  * If new callback path is available and no one has set it up then
 332  * try to set it up. If setup is not successful after 5 tries (5 secs)
 333  * then gives up and returns NULL.
 334  *
 335  * If callback path is being initialized, then wait for the CB_NULL RPC
 336  * call to occur.
 337  */
 338 static rfs4_cbinfo_t *
 339 rfs4_cbinfo_hold(rfs4_client_t *cp)
 340 {
 341         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 342         int retries = 0;
 343 
 344         mutex_enter(cbp->cb_lock);


 670 
 671         mutex_exit(cbp->cb_lock);
 672 }
 673 
 674 /*
 675  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
 676  * information may have been provided on SETCLIENTID and this call
 677  * marks that information as confirmed and then starts a thread to
 678  * test the callback path.
 679  */
 680 void
 681 rfs4_deleg_cb_check(rfs4_client_t *cp)
 682 {
 683         if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
 684                 return;
 685 
 686         cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
 687 
 688         rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
 689 
 690         (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
 691             minclsyspri);
 692 }
 693 
 694 static void
 695 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
 696 {
 697         CB_RECALL4args  *rec_argp;
 698 
 699         rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 700         if (rec_argp->fh.nfs_fh4_val)
 701                 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
 702 }
 703 
 704 /* ARGSUSED */
 705 static void
 706 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
 707 {
 708         CB_GETATTR4args *argp;
 709 
 710         argp = &argop->nfs_cb_argop4_u.opcbgetattr;


 931                         (void) (*arg->recall)(dsp, arg->trunc);
 932         }
 933 
 934         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 935         /*
 936          * Recall count may go negative if the parent thread that is
 937          * creating the individual callback threads does not modify
 938          * the recall_count field before the callback thread actually
 939          * gets a response from the CB_RECALL
 940          */
 941         fp->rf_dinfo.rd_recall_count--;
 942         if (fp->rf_dinfo.rd_recall_count == 0)
 943                 cv_signal(fp->rf_dinfo.rd_recall_cv);
 944         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 945 
 946         mutex_enter(&cpr_lock);
 947         CALLB_CPR_EXIT(&cpr_info);
 948         mutex_destroy(&cpr_lock);
 949 
 950         rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
 951 
 952         kmem_free(arg, sizeof (struct recall_arg));

 953 }
 954 
 955 struct master_recall_args {
 956     rfs4_file_t *fp;
 957     void (*recall)(rfs4_deleg_state_t *, bool_t);
 958     bool_t trunc;
 959 };
 960 
 961 static void
 962 do_recall_file(struct master_recall_args *map)
 963 {
 964         rfs4_file_t *fp = map->fp;
 965         rfs4_deleg_state_t *dsp;
 966         struct recall_arg *arg;
 967         callb_cpr_t cpr_info;
 968         kmutex_t cpr_lock;
 969         int32_t recall_count;
 970 
 971         rfs4_dbe_lock(fp->rf_dbe);
 972 
 973         /* Recall already in progress ? */
 974         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 975         if (fp->rf_dinfo.rd_recall_count != 0) {
 976                 mutex_exit(fp->rf_dinfo.rd_recall_lock);
 977                 rfs4_dbe_rele_nolock(fp->rf_dbe);
 978                 rfs4_dbe_unlock(fp->rf_dbe);
 979                 kmem_free(map, sizeof (struct master_recall_args));
 980                 return;
 981         }
 982 
 983         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 984 
 985         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 986         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
 987 
 988         recall_count = 0;
 989         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
 990             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
 991 
 992                 rfs4_dbe_lock(dsp->rds_dbe);
 993                 /*
 994                  * if this delegation state
 995                  * is being reaped skip it
 996                  */
 997                 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
 998                         rfs4_dbe_unlock(dsp->rds_dbe);
 999                         continue;
1000                 }
1001 
1002                 /* hold for receiving thread */
1003                 rfs4_dbe_hold(dsp->rds_dbe);
1004                 rfs4_dbe_unlock(dsp->rds_dbe);
1005 
1006                 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1007                 arg->recall = map->recall;
1008                 arg->trunc = map->trunc;
1009                 arg->dsp = dsp;
1010 
1011                 recall_count++;
1012 
1013                 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
1014                     minclsyspri);
1015         }
1016 
1017         rfs4_dbe_unlock(fp->rf_dbe);
1018 
1019         mutex_enter(fp->rf_dinfo.rd_recall_lock);
1020         /*
1021          * Recall count may go negative if the parent thread that is
1022          * creating the individual callback threads does not modify
1023          * the recall_count field before the callback thread actually
1024          * gets a response from the CB_RECALL
1025          */
1026         fp->rf_dinfo.rd_recall_count += recall_count;
1027         while (fp->rf_dinfo.rd_recall_count)
1028                 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1029 
1030         mutex_exit(fp->rf_dinfo.rd_recall_lock);
1031 
1032         DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1033         rfs4_file_rele(fp);
1034         kmem_free(map, sizeof (struct master_recall_args));
1035         mutex_enter(&cpr_lock);
1036         CALLB_CPR_EXIT(&cpr_info);
1037         mutex_destroy(&cpr_lock);

1038 }
1039 
1040 static void
1041 rfs4_recall_file(rfs4_file_t *fp,
1042     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1043     bool_t trunc, rfs4_client_t *cp)
1044 {
1045         struct master_recall_args *args;
1046 
1047         rfs4_dbe_lock(fp->rf_dbe);
1048         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1049                 rfs4_dbe_unlock(fp->rf_dbe);
1050                 return;
1051         }
1052         rfs4_dbe_hold(fp->rf_dbe);   /* hold for new thread */
1053 
1054         /*
1055          * Mark the time we started the recall processing.
1056          * If it has been previously recalled, do not reset the
1057          * timer since this is used for the revocation decision.
1058          */
1059         if (fp->rf_dinfo.rd_time_recalled == 0)
1060                 fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1061         fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1062         /* Client causing recall not always available */
1063         if (cp)
1064                 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1065 
1066         rfs4_dbe_unlock(fp->rf_dbe);
1067 
1068         args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1069         args->fp = fp;
1070         args->recall = recall;
1071         args->trunc = trunc;
1072 
1073         (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
1074             minclsyspri);
1075 }
1076 
1077 void
1078 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1079 {
1080         time_t elapsed1, elapsed2;
1081 
1082         if (fp->rf_dinfo.rd_time_recalled != 0) {
1083                 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1084                 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1085                 /* First check to see if a revocation should occur */
1086                 if (elapsed1 > rfs4_lease_time &&
1087                     elapsed2 > rfs4_lease_time) {
1088                         rfs4_revoke_file(fp);
1089                         return;
1090                 }
1091                 /*
1092                  * Next check to see if a recall should be done again
1093                  * so quickly.


1189                  * have been recalled already.
1190                  */
1191                 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1192                     (deny & OPEN4_SHARE_DENY_READ))
1193                         return (OPEN_DELEGATE_NONE);
1194                 return (OPEN_DELEGATE_READ);
1195 
1196         case OPEN_DELEGATE_WRITE:
1197                 return (OPEN_DELEGATE_WRITE);
1198         }
1199 
1200         /* Shouldn't get here */
1201         return (OPEN_DELEGATE_NONE);
1202 }
1203 
1204 /*
1205  * Given the desired delegation type and the "history" of the file
1206  * determine the actual delegation type to return.
1207  */
1208 static open_delegation_type4
1209 rfs4_delegation_policy(open_delegation_type4 dtype,
1210     rfs4_dinfo_t *dinfo, clientid4 cid)
1211 {
1212         time_t elapsed;
1213 
1214         if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1215                 return (OPEN_DELEGATE_NONE);
1216 
1217         /*
1218          * Has this file/delegation ever been recalled?  If not then
1219          * no further checks for a delegation race need to be done.
1220          * However if a recall has occurred, then check to see if a
1221          * client has caused its own delegation recall to occur.  If
1222          * not, then has a delegation for this file been returned
1223          * recently?  If so, then do not assign a new delegation to
1224          * avoid a "delegation race" between the original client and
1225          * the new/conflicting client.
1226          */
1227         if (dinfo->rd_ever_recalled == TRUE) {
1228                 if (dinfo->rd_conflicted_client != cid) {
1229                         elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1230                         if (elapsed < rfs4_lease_time)
1231                                 return (OPEN_DELEGATE_NONE);
1232                 }
1233         }
1234 


1237             dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1238                 return (OPEN_DELEGATE_NONE);
1239 
1240         /*
1241          * Should consider limiting total number of read/write
1242          * delegations the server will permit.
1243          */
1244 
1245         return (dtype);
1246 }
1247 
1248 /*
1249  * Try and grant a delegation for an open give the state. The routine
1250  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1251  *
1252  * The state and associate file entry must be locked
1253  */
1254 rfs4_deleg_state_t *
1255 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1256 {

1257         rfs4_file_t *fp = sp->rs_finfo;
1258         open_delegation_type4 dtype;
1259         int no_delegation;
1260 
1261         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1262         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1263 


1264         /* Is the server even providing delegations? */
1265         if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)

1266                 return (NULL);

1267 
1268         /* Check to see if delegations have been temporarily disabled */
1269         mutex_enter(&rfs4_deleg_lock);
1270         no_delegation = rfs4_deleg_disabled;
1271         mutex_exit(&rfs4_deleg_lock);
1272 
1273         if (no_delegation)
1274                 return (NULL);
1275 
1276         /* Don't grant a delegation if a deletion is impending. */
1277         if (fp->rf_dinfo.rd_hold_grant > 0) {
1278                 return (NULL);
1279         }
1280 
1281         /*
1282          * Don't grant a delegation if there are any lock manager
1283          * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1284          * if there are only read locks we should be able to grant a
1285          * read-only delegation), but it's good enough for now.
1286          *
1287          * MT safety: the lock manager checks for conflicting delegations
1288          * before processing a lock request.  That check will block until
1289          * we are done here.  So if the lock manager acquires a lock after
1290          * we decide to grant the delegation, the delegation will get
1291          * immediately recalled (if there's a conflict), so we're safe.


1332                  * If we are waiting for a delegation to be returned then
1333                  * don't delegate this file. We do this for correctness as
1334                  * well as if the file is being recalled we would likely
1335                  * recall this file again.
1336                  */
1337 
1338                 if (fp->rf_dinfo.rd_time_recalled != 0 ||
1339                     fp->rf_dinfo.rd_time_rm_delayed != 0)
1340                         return (NULL);
1341 
1342                 /* Get the "best" delegation candidate */
1343                 dtype = rfs4_check_delegation(sp, fp);
1344 
1345                 if (dtype == OPEN_DELEGATE_NONE)
1346                         return (NULL);
1347 
1348                 /*
1349                  * Based on policy and the history of the file get the
1350                  * actual delegation.
1351                  */
1352                 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
1353                     sp->rs_owner->ro_client->rc_clientid);
1354 
1355                 if (dtype == OPEN_DELEGATE_NONE)
1356                         return (NULL);
1357                 break;
1358         default:
1359                 return (NULL);
1360         }
1361 
1362         /* set the delegation for the state */
1363         return (rfs4_deleg_state(sp, dtype, recall));
1364 }
1365 
1366 void
1367 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1368     nfsace4 *ace,  int recall)
1369 {
1370         open_write_delegation4 *wp;
1371         open_read_delegation4 *rp;
1372         nfs_space_limit4 *spl;


1421                 break;
1422         }
1423 }
1424 
1425 /*
1426  * Check if the file is delegated via the provided file struct.
1427  * Return TRUE if it is delegated.  This is intended for use by
1428  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1429  *
1430  * Note that if the file is found to have a delegation, it is
1431  * recalled, unless the clientid of the caller matches the clientid of the
1432  * delegation. If the caller has specified, there is a slight delay
1433  * inserted in the hopes that the delegation will be returned quickly.
1434  */
1435 bool_t
1436 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1437     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1438 {
1439         rfs4_deleg_state_t *dsp;
1440 


1441         /* Is delegation enabled? */
1442         if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1443                 return (FALSE);
1444 
1445         /* do we have a delegation on this file? */
1446         rfs4_dbe_lock(fp->rf_dbe);
1447         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1448                 if (is_rm)
1449                         fp->rf_dinfo.rd_hold_grant++;
1450                 rfs4_dbe_unlock(fp->rf_dbe);
1451                 return (FALSE);
1452         }
1453         /*
1454          * do we have a write delegation on this file or are we
1455          * requesting write access to a file with any type of existing
1456          * delegation?
1457          */
1458         if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1459                 if (cp != NULL) {
1460                         dsp = list_head(&fp->rf_delegstatelist);
1461                         if (dsp == NULL) {
1462                                 rfs4_dbe_unlock(fp->rf_dbe);


1487                 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1488                         fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1489                         rfs4_dbe_unlock(fp->rf_dbe);
1490                         return (TRUE);
1491                 }
1492         }
1493         if (is_rm)
1494                 fp->rf_dinfo.rd_hold_grant++;
1495         rfs4_dbe_unlock(fp->rf_dbe);
1496         return (FALSE);
1497 }
1498 
1499 /*
1500  * Check if the file is delegated in the case of a v2 or v3 access.
1501  * Return TRUE if it is delegated which in turn means that v2 should
1502  * drop the request and in the case of v3 JUKEBOX should be returned.
1503  */
1504 bool_t
1505 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1506 {

1507         rfs4_file_t *fp;
1508         bool_t create = FALSE;
1509         bool_t rc = FALSE;
1510 
1511         rfs4_hold_deleg_policy();

1512 
1513         /* Is delegation enabled? */
1514         if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1515                 fp = rfs4_findfile(vp, NULL, &create);
1516                 if (fp != NULL) {
1517                         if (rfs4_check_delegated_byfp(mode, fp, trunc,
1518                             TRUE, FALSE, NULL)) {
1519                                 rc = TRUE;
1520                         }
1521                         rfs4_file_rele(fp);
1522                 }
1523         }
1524         rfs4_rele_deleg_policy();
1525         return (rc);
1526 }
1527 
1528 /*
1529  * Release a hold on the hold_grant counter which
1530  * prevents delegation from being granted while a remove
1531  * or a rename is in progress.
1532  */
1533 void
1534 rfs4_clear_dont_grant(rfs4_file_t *fp)
1535 {
1536         if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)


1537                 return;
1538         rfs4_dbe_lock(fp->rf_dbe);
1539         ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1540         fp->rf_dinfo.rd_hold_grant--;
1541         fp->rf_dinfo.rd_time_rm_delayed = 0;
1542         rfs4_dbe_unlock(fp->rf_dbe);
1543 }
1544 
1545 /*
1546  * State support for delegation.
1547  * Set the state delegation type for this state;
1548  * This routine is called from open via rfs4_grant_delegation and the entry
1549  * locks on sp and sp->rs_finfo are assumed.
1550  */
1551 static rfs4_deleg_state_t *
1552 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1553 {
1554         rfs4_file_t *fp = sp->rs_finfo;
1555         bool_t create = TRUE;
1556         rfs4_deleg_state_t *dsp;


1852 bool_t
1853 rfs4_is_deleg(rfs4_state_t *sp)
1854 {
1855         rfs4_deleg_state_t *dsp;
1856         rfs4_file_t *fp = sp->rs_finfo;
1857         rfs4_client_t *cp = sp->rs_owner->ro_client;
1858 
1859         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1860         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1861             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1862                 if (cp != dsp->rds_client) {
1863                         return (TRUE);
1864                 }
1865         }
1866         return (FALSE);
1867 }
1868 
1869 void
1870 rfs4_disable_delegation(void)
1871 {
1872         mutex_enter(&rfs4_deleg_lock);



1873         rfs4_deleg_disabled++;
1874         mutex_exit(&rfs4_deleg_lock);
1875 }
1876 
1877 void
1878 rfs4_enable_delegation(void)
1879 {
1880         mutex_enter(&rfs4_deleg_lock);



1881         ASSERT(rfs4_deleg_disabled > 0);
1882         rfs4_deleg_disabled--;
1883         mutex_exit(&rfs4_deleg_lock);
1884 }
1885 
1886 void
1887 rfs4_mon_hold(void *arg)
1888 {
1889         rfs4_file_t *fp = arg;
1890 
1891         rfs4_dbe_hold(fp->rf_dbe);
1892 }
1893 
1894 void
1895 rfs4_mon_rele(void *arg)
1896 {
1897         rfs4_file_t *fp = arg;
1898 
1899         rfs4_dbe_rele_nolock(fp->rf_dbe);
1900 }


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.

  25  */
  26 
  27 /*
  28  * Copyright 2018 Nexenta Systems, Inc.
  29  */
  30 
  31 #include <sys/systm.h>
  32 #include <rpc/auth.h>
  33 #include <rpc/clnt.h>
  34 #include <nfs/nfs4_kprot.h>
  35 #include <nfs/nfs4.h>
  36 #include <nfs/lm.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/disp.h>
  39 #include <sys/sdt.h>
  40 
  41 #include <sys/pathname.h>
  42 
  43 #include <sys/strsubr.h>
  44 #include <sys/ddi.h>
  45 
  46 #include <sys/vnode.h>
  47 #include <sys/sdt.h>
  48 #include <inet/common.h>
  49 #include <inet/ip.h>
  50 #include <inet/ip6.h>
  51 
  52 #define MAX_READ_DELEGATIONS 5
  53 


  54 static int rfs4_deleg_wlp = 5;

  55 static int rfs4_deleg_disabled;
  56 static int rfs4_max_setup_cb_tries = 5;
  57 
  58 #ifdef DEBUG
  59 
  60 static int rfs4_test_cbgetattr_fail = 0;
  61 int rfs4_cb_null;
  62 int rfs4_cb_debug;
  63 int rfs4_deleg_debug;
  64 
  65 #endif
  66 
  67 static void rfs4_recall_file(rfs4_file_t *,
  68     void (*recall)(rfs4_deleg_state_t *, bool_t),
  69     bool_t, rfs4_client_t *);
  70 static  void            rfs4_revoke_file(rfs4_file_t *);
  71 static  void            rfs4_cb_chflush(rfs4_cbinfo_t *);
  72 static  CLIENT          *rfs4_cb_getch(rfs4_cbinfo_t *);
  73 static  void            rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
  74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,


 121 
 122                                 *pp = htons(port);
 123 
 124                                 ua[k] = '.';
 125                                 return (0);
 126                         } else {
 127                                 ua[k] = '.';
 128                                 return (EINVAL);
 129                         }
 130                 }
 131         }
 132 
 133         return (EINVAL);
 134 }
 135 
 136 /*
 137  * Update the delegation policy with the
 138  * value of "new_policy"
 139  */
 140 void
 141 rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy)
 142 {
 143         rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER);
 144         nsrv4->nfs4_deleg_policy = new_policy;
 145         rw_exit(&nsrv4->deleg_policy_lock);
 146 }
 147 
 148 void
 149 rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4)
 150 {
 151         rw_enter(&nsrv4->deleg_policy_lock, RW_READER);
 152 }
 153 
 154 void
 155 rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4)
 156 {
 157         rw_exit(&nsrv4->deleg_policy_lock);
 158 }
 159 
 160 srv_deleg_policy_t
 161 nfs4_get_deleg_policy()
 162 {
 163         nfs4_srv_t *nsrv4 = nfs4_get_srv();
 164         return (nsrv4->nfs4_deleg_policy);
 165 }
 166 
 167 
 168 /*
 169  * This free function is to be used when the client struct is being
 170  * released and nothing at all is needed of the callback info any
 171  * longer.
 172  */
 173 void
 174 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
 175 {
 176         char *addr = cbp->cb_callback.cb_location.r_addr;
 177         char *netid = cbp->cb_callback.cb_location.r_netid;
 178 
 179         /* Free old address if any */
 180 
 181         if (addr)
 182                 kmem_free(addr, strlen(addr) + 1);
 183         if (netid)
 184                 kmem_free(netid, strlen(netid) + 1);
 185 
 186         addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
 187         netid = cbp->cb_newer.cb_callback.cb_location.r_netid;


 200  * The server uses this to check the callback path supplied by the
 201  * client.  The callback connection is marked "in progress" while this
 202  * work is going on and then eventually marked either OK or FAILED.
 203  * This work can be done as part of a separate thread and at the end
 204  * of this the thread will exit or it may be done such that the caller
 205  * will continue with other work.
 206  */
 207 static void
 208 rfs4_do_cb_null(rfs4_client_t *cp)
 209 {
 210         struct timeval tv;
 211         CLIENT *ch;
 212         rfs4_cbstate_t newstate;
 213         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 214 
 215         mutex_enter(cbp->cb_lock);
 216         /* If another thread is doing CB_NULL RPC then return */
 217         if (cbp->cb_nullcaller == TRUE) {
 218                 mutex_exit(cbp->cb_lock);
 219                 rfs4_client_rele(cp);
 220                 zthread_exit();
 221         }
 222 
 223         /* Mark the cbinfo as having a thread in the NULL callback */
 224         cbp->cb_nullcaller = TRUE;
 225 
 226         /*
 227          * Are there other threads still using the cbinfo client
 228          * handles?  If so, this thread must wait before going and
 229          * mucking aroiund with the callback information
 230          */
 231         while (cbp->cb_refcnt != 0)
 232                 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
 233 
 234         /*
 235          * This thread itself may find that new callback info has
 236          * arrived and is set up to handle this case and redrive the
 237          * call to the client's callback server.
 238          */
 239 retry:
 240         if (cbp->cb_newer.cb_new == TRUE &&


 268                 cbp->cb_ident = cbp->cb_newer.cb_ident;
 269                 cbp->cb_newer.cb_ident = 0;
 270 
 271                 /* no longer new */
 272                 cbp->cb_newer.cb_new = FALSE;
 273                 cbp->cb_newer.cb_confirmed = FALSE;
 274 
 275                 /* get rid of the old client handles that may exist */
 276                 rfs4_cb_chflush(cbp);
 277 
 278                 cbp->cb_state = CB_NONE;
 279                 cbp->cb_timefailed = 0; /* reset the clock */
 280                 cbp->cb_notified_of_cb_path_down = TRUE;
 281         }
 282 
 283         if (cbp->cb_state != CB_NONE) {
 284                 cv_broadcast(cbp->cb_cv);    /* let the others know */
 285                 cbp->cb_nullcaller = FALSE;
 286                 mutex_exit(cbp->cb_lock);
 287                 rfs4_client_rele(cp);
 288                 zthread_exit();
 289         }
 290 
 291         /* mark rfs4_client_t as CALLBACK NULL in progress */
 292         cbp->cb_state = CB_INPROG;
 293         mutex_exit(cbp->cb_lock);
 294 
 295         /* get/generate a client handle */
 296         if ((ch = rfs4_cb_getch(cbp)) == NULL) {
 297                 mutex_enter(cbp->cb_lock);
 298                 cbp->cb_state = CB_BAD;
 299                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 300                 goto retry;
 301         }
 302 
 303 
 304         tv.tv_sec = 30;
 305         tv.tv_usec = 0;
 306         if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
 307                 newstate = CB_BAD;
 308         } else {


 310 #ifdef  DEBUG
 311                 rfs4_cb_null++;
 312 #endif
 313         }
 314 
 315         /* Check to see if the client has specified new callback info */
 316         mutex_enter(cbp->cb_lock);
 317         rfs4_cb_freech(cbp, ch, TRUE);
 318         if (cbp->cb_newer.cb_new == TRUE &&
 319             cbp->cb_newer.cb_confirmed == TRUE) {
 320                 goto retry;     /* give the CB_NULL another chance */
 321         }
 322 
 323         cbp->cb_state = newstate;
 324         if (cbp->cb_state == CB_BAD)
 325                 cbp->cb_timefailed = gethrestime_sec(); /* observability */
 326 
 327         cv_broadcast(cbp->cb_cv);    /* start up the other threads */
 328         cbp->cb_nullcaller = FALSE;
 329         mutex_exit(cbp->cb_lock);

 330         rfs4_client_rele(cp);
 331         zthread_exit();
 332 }
 333 
 334 /*
 335  * Given a client struct, inspect the callback info to see if the
 336  * callback path is up and available.
 337  *
 338  * If new callback path is available and no one has set it up then
 339  * try to set it up. If setup is not successful after 5 tries (5 secs)
 340  * then gives up and returns NULL.
 341  *
 342  * If callback path is being initialized, then wait for the CB_NULL RPC
 343  * call to occur.
 344  */
 345 static rfs4_cbinfo_t *
 346 rfs4_cbinfo_hold(rfs4_client_t *cp)
 347 {
 348         rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
 349         int retries = 0;
 350 
 351         mutex_enter(cbp->cb_lock);


 677 
 678         mutex_exit(cbp->cb_lock);
 679 }
 680 
 681 /*
 682  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
 683  * information may have been provided on SETCLIENTID and this call
 684  * marks that information as confirmed and then starts a thread to
 685  * test the callback path.
 686  */
 687 void
 688 rfs4_deleg_cb_check(rfs4_client_t *cp)
 689 {
 690         if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
 691                 return;
 692 
 693         cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
 694 
 695         rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
 696 
 697         (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0,
 698             minclsyspri);
 699 }
 700 
 701 static void
 702 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
 703 {
 704         CB_RECALL4args  *rec_argp;
 705 
 706         rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
 707         if (rec_argp->fh.nfs_fh4_val)
 708                 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
 709 }
 710 
 711 /* ARGSUSED */
 712 static void
 713 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
 714 {
 715         CB_GETATTR4args *argp;
 716 
 717         argp = &argop->nfs_cb_argop4_u.opcbgetattr;


 938                         (void) (*arg->recall)(dsp, arg->trunc);
 939         }
 940 
 941         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 942         /*
 943          * Recall count may go negative if the parent thread that is
 944          * creating the individual callback threads does not modify
 945          * the recall_count field before the callback thread actually
 946          * gets a response from the CB_RECALL
 947          */
 948         fp->rf_dinfo.rd_recall_count--;
 949         if (fp->rf_dinfo.rd_recall_count == 0)
 950                 cv_signal(fp->rf_dinfo.rd_recall_cv);
 951         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 952 
 953         mutex_enter(&cpr_lock);
 954         CALLB_CPR_EXIT(&cpr_info);
 955         mutex_destroy(&cpr_lock);
 956 
 957         rfs4_deleg_state_rele(dsp); /* release the hold for this thread */

 958         kmem_free(arg, sizeof (struct recall_arg));
 959         zthread_exit();
 960 }
 961 
 962 struct master_recall_args {
 963     rfs4_file_t *fp;
 964     void (*recall)(rfs4_deleg_state_t *, bool_t);
 965     bool_t trunc;
 966 };
 967 
 968 static void
 969 do_recall_file(struct master_recall_args *map)
 970 {
 971         rfs4_file_t *fp = map->fp;
 972         rfs4_deleg_state_t *dsp;
 973         struct recall_arg *arg;
 974         callb_cpr_t cpr_info;
 975         kmutex_t cpr_lock;
 976         int32_t recall_count;
 977 
 978         rfs4_dbe_lock(fp->rf_dbe);
 979 
 980         /* Recall already in progress ? */
 981         mutex_enter(fp->rf_dinfo.rd_recall_lock);
 982         if (fp->rf_dinfo.rd_recall_count != 0) {
 983                 mutex_exit(fp->rf_dinfo.rd_recall_lock);
 984                 rfs4_dbe_rele_nolock(fp->rf_dbe);
 985                 rfs4_dbe_unlock(fp->rf_dbe);
 986                 kmem_free(map, sizeof (struct master_recall_args));
 987                 zthread_exit();
 988         }
 989 
 990         mutex_exit(fp->rf_dinfo.rd_recall_lock);
 991 
 992         mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
 993         CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
 994 
 995         recall_count = 0;
 996         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
 997             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
 998 
 999                 rfs4_dbe_lock(dsp->rds_dbe);
1000                 /*
1001                  * if this delegation state
1002                  * is being reaped skip it
1003                  */
1004                 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1005                         rfs4_dbe_unlock(dsp->rds_dbe);
1006                         continue;
1007                 }
1008 
1009                 /* hold for receiving thread */
1010                 rfs4_dbe_hold(dsp->rds_dbe);
1011                 rfs4_dbe_unlock(dsp->rds_dbe);
1012 
1013                 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1014                 arg->recall = map->recall;
1015                 arg->trunc = map->trunc;
1016                 arg->dsp = dsp;
1017 
1018                 recall_count++;
1019 
1020                 (void) zthread_create(NULL, 0, do_recall, arg, 0,
1021                     minclsyspri);
1022         }
1023 
1024         rfs4_dbe_unlock(fp->rf_dbe);
1025 
1026         mutex_enter(fp->rf_dinfo.rd_recall_lock);
1027         /*
1028          * Recall count may go negative if the parent thread that is
1029          * creating the individual callback threads does not modify
1030          * the recall_count field before the callback thread actually
1031          * gets a response from the CB_RECALL
1032          */
1033         fp->rf_dinfo.rd_recall_count += recall_count;
1034         while (fp->rf_dinfo.rd_recall_count)
1035                 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1036 
1037         mutex_exit(fp->rf_dinfo.rd_recall_lock);
1038 
1039         DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1040         rfs4_file_rele(fp);
1041         kmem_free(map, sizeof (struct master_recall_args));
1042         mutex_enter(&cpr_lock);
1043         CALLB_CPR_EXIT(&cpr_info);
1044         mutex_destroy(&cpr_lock);
1045         zthread_exit();
1046 }
1047 
1048 static void
1049 rfs4_recall_file(rfs4_file_t *fp,
1050     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1051     bool_t trunc, rfs4_client_t *cp)
1052 {
1053         struct master_recall_args *args;
1054 
1055         rfs4_dbe_lock(fp->rf_dbe);
1056         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1057                 rfs4_dbe_unlock(fp->rf_dbe);
1058                 return;
1059         }
1060         rfs4_dbe_hold(fp->rf_dbe);   /* hold for new thread */
1061 
1062         /*
1063          * Mark the time we started the recall processing.
1064          * If it has been previously recalled, do not reset the
1065          * timer since this is used for the revocation decision.
1066          */
1067         if (fp->rf_dinfo.rd_time_recalled == 0)
1068                 fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1069         fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1070         /* Client causing recall not always available */
1071         if (cp)
1072                 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1073 
1074         rfs4_dbe_unlock(fp->rf_dbe);
1075 
1076         args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1077         args->fp = fp;
1078         args->recall = recall;
1079         args->trunc = trunc;
1080 
1081         (void) zthread_create(NULL, 0, do_recall_file, args, 0,
1082             minclsyspri);
1083 }
1084 
1085 void
1086 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1087 {
1088         time_t elapsed1, elapsed2;
1089 
1090         if (fp->rf_dinfo.rd_time_recalled != 0) {
1091                 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1092                 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1093                 /* First check to see if a revocation should occur */
1094                 if (elapsed1 > rfs4_lease_time &&
1095                     elapsed2 > rfs4_lease_time) {
1096                         rfs4_revoke_file(fp);
1097                         return;
1098                 }
1099                 /*
1100                  * Next check to see if a recall should be done again
1101                  * so quickly.


1197                  * have been recalled already.
1198                  */
1199                 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1200                     (deny & OPEN4_SHARE_DENY_READ))
1201                         return (OPEN_DELEGATE_NONE);
1202                 return (OPEN_DELEGATE_READ);
1203 
1204         case OPEN_DELEGATE_WRITE:
1205                 return (OPEN_DELEGATE_WRITE);
1206         }
1207 
1208         /* Shouldn't get here */
1209         return (OPEN_DELEGATE_NONE);
1210 }
1211 
1212 /*
1213  * Given the desired delegation type and the "history" of the file
1214  * determine the actual delegation type to return.
1215  */
1216 static open_delegation_type4
1217 rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype,
1218     rfs4_dinfo_t *dinfo, clientid4 cid)
1219 {
1220         time_t elapsed;
1221 
1222         if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1223                 return (OPEN_DELEGATE_NONE);
1224 
1225         /*
1226          * Has this file/delegation ever been recalled?  If not then
1227          * no further checks for a delegation race need to be done.
1228          * However if a recall has occurred, then check to see if a
1229          * client has caused its own delegation recall to occur.  If
1230          * not, then has a delegation for this file been returned
1231          * recently?  If so, then do not assign a new delegation to
1232          * avoid a "delegation race" between the original client and
1233          * the new/conflicting client.
1234          */
1235         if (dinfo->rd_ever_recalled == TRUE) {
1236                 if (dinfo->rd_conflicted_client != cid) {
1237                         elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1238                         if (elapsed < rfs4_lease_time)
1239                                 return (OPEN_DELEGATE_NONE);
1240                 }
1241         }
1242 


1245             dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1246                 return (OPEN_DELEGATE_NONE);
1247 
1248         /*
1249          * Should consider limiting total number of read/write
1250          * delegations the server will permit.
1251          */
1252 
1253         return (dtype);
1254 }
1255 
1256 /*
1257  * Try and grant a delegation for an open give the state. The routine
1258  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1259  *
1260  * The state and associate file entry must be locked
1261  */
1262 rfs4_deleg_state_t *
1263 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1264 {
1265         nfs4_srv_t *nsrv4;
1266         rfs4_file_t *fp = sp->rs_finfo;
1267         open_delegation_type4 dtype;
1268         int no_delegation;
1269 
1270         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1271         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1272 
1273         nsrv4 = nfs4_get_srv();
1274 
1275         /* Is the server even providing delegations? */
1276         if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE ||
1277             dreq == DELEG_NONE) {
1278                 return (NULL);
1279         }
1280 
1281         /* Check to see if delegations have been temporarily disabled */
1282         mutex_enter(&nsrv4->deleg_lock);
1283         no_delegation = rfs4_deleg_disabled;
1284         mutex_exit(&nsrv4->deleg_lock);
1285 
1286         if (no_delegation)
1287                 return (NULL);
1288 
1289         /* Don't grant a delegation if a deletion is impending. */
1290         if (fp->rf_dinfo.rd_hold_grant > 0) {
1291                 return (NULL);
1292         }
1293 
1294         /*
1295          * Don't grant a delegation if there are any lock manager
1296          * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1297          * if there are only read locks we should be able to grant a
1298          * read-only delegation), but it's good enough for now.
1299          *
1300          * MT safety: the lock manager checks for conflicting delegations
1301          * before processing a lock request.  That check will block until
1302          * we are done here.  So if the lock manager acquires a lock after
1303          * we decide to grant the delegation, the delegation will get
1304          * immediately recalled (if there's a conflict), so we're safe.


1345                  * If we are waiting for a delegation to be returned then
1346                  * don't delegate this file. We do this for correctness as
1347                  * well as if the file is being recalled we would likely
1348                  * recall this file again.
1349                  */
1350 
1351                 if (fp->rf_dinfo.rd_time_recalled != 0 ||
1352                     fp->rf_dinfo.rd_time_rm_delayed != 0)
1353                         return (NULL);
1354 
1355                 /* Get the "best" delegation candidate */
1356                 dtype = rfs4_check_delegation(sp, fp);
1357 
1358                 if (dtype == OPEN_DELEGATE_NONE)
1359                         return (NULL);
1360 
1361                 /*
1362                  * Based on policy and the history of the file get the
1363                  * actual delegation.
1364                  */
1365                 dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo,
1366                     sp->rs_owner->ro_client->rc_clientid);
1367 
1368                 if (dtype == OPEN_DELEGATE_NONE)
1369                         return (NULL);
1370                 break;
1371         default:
1372                 return (NULL);
1373         }
1374 
1375         /* set the delegation for the state */
1376         return (rfs4_deleg_state(sp, dtype, recall));
1377 }
1378 
1379 void
1380 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1381     nfsace4 *ace,  int recall)
1382 {
1383         open_write_delegation4 *wp;
1384         open_read_delegation4 *rp;
1385         nfs_space_limit4 *spl;


1434                 break;
1435         }
1436 }
1437 
1438 /*
1439  * Check if the file is delegated via the provided file struct.
1440  * Return TRUE if it is delegated.  This is intended for use by
1441  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1442  *
1443  * Note that if the file is found to have a delegation, it is
1444  * recalled, unless the clientid of the caller matches the clientid of the
1445  * delegation. If the caller has specified, there is a slight delay
1446  * inserted in the hopes that the delegation will be returned quickly.
1447  */
1448 bool_t
1449 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1450     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1451 {
1452         rfs4_deleg_state_t *dsp;
1453 
1454         nfs4_srv_t *nsrv4 = nfs4_get_srv();
1455 
1456         /* Is delegation enabled? */
1457         if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1458                 return (FALSE);
1459 
1460         /* do we have a delegation on this file? */
1461         rfs4_dbe_lock(fp->rf_dbe);
1462         if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1463                 if (is_rm)
1464                         fp->rf_dinfo.rd_hold_grant++;
1465                 rfs4_dbe_unlock(fp->rf_dbe);
1466                 return (FALSE);
1467         }
1468         /*
1469          * do we have a write delegation on this file or are we
1470          * requesting write access to a file with any type of existing
1471          * delegation?
1472          */
1473         if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1474                 if (cp != NULL) {
1475                         dsp = list_head(&fp->rf_delegstatelist);
1476                         if (dsp == NULL) {
1477                                 rfs4_dbe_unlock(fp->rf_dbe);


1502                 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1503                         fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1504                         rfs4_dbe_unlock(fp->rf_dbe);
1505                         return (TRUE);
1506                 }
1507         }
1508         if (is_rm)
1509                 fp->rf_dinfo.rd_hold_grant++;
1510         rfs4_dbe_unlock(fp->rf_dbe);
1511         return (FALSE);
1512 }
1513 
1514 /*
1515  * Check if the file is delegated in the case of a v2 or v3 access.
1516  * Return TRUE if it is delegated which in turn means that v2 should
1517  * drop the request and in the case of v3 JUKEBOX should be returned.
1518  */
1519 bool_t
1520 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1521 {
1522         nfs4_srv_t *nsrv4;
1523         rfs4_file_t *fp;
1524         bool_t create = FALSE;
1525         bool_t rc = FALSE;
1526 
1527         nsrv4 = nfs4_get_srv();
1528         rfs4_hold_deleg_policy(nsrv4);
1529 
1530         /* Is delegation enabled? */
1531         if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1532                 fp = rfs4_findfile(vp, NULL, &create);
1533                 if (fp != NULL) {
1534                         if (rfs4_check_delegated_byfp(mode, fp, trunc,
1535                             TRUE, FALSE, NULL)) {
1536                                 rc = TRUE;
1537                         }
1538                         rfs4_file_rele(fp);
1539                 }
1540         }
1541         rfs4_rele_deleg_policy(nsrv4);
1542         return (rc);
1543 }
1544 
1545 /*
1546  * Release a hold on the hold_grant counter which
1547  * prevents delegation from being granted while a remove
1548  * or a rename is in progress.
1549  */
1550 void
1551 rfs4_clear_dont_grant(rfs4_file_t *fp)
1552 {
1553         nfs4_srv_t *nsrv4 = nfs4_get_srv();
1554 
1555         if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1556                 return;
1557         rfs4_dbe_lock(fp->rf_dbe);
1558         ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1559         fp->rf_dinfo.rd_hold_grant--;
1560         fp->rf_dinfo.rd_time_rm_delayed = 0;
1561         rfs4_dbe_unlock(fp->rf_dbe);
1562 }
1563 
1564 /*
1565  * State support for delegation.
1566  * Set the state delegation type for this state;
1567  * This routine is called from open via rfs4_grant_delegation and the entry
1568  * locks on sp and sp->rs_finfo are assumed.
1569  */
1570 static rfs4_deleg_state_t *
1571 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1572 {
1573         rfs4_file_t *fp = sp->rs_finfo;
1574         bool_t create = TRUE;
1575         rfs4_deleg_state_t *dsp;


1871 bool_t
1872 rfs4_is_deleg(rfs4_state_t *sp)
1873 {
1874         rfs4_deleg_state_t *dsp;
1875         rfs4_file_t *fp = sp->rs_finfo;
1876         rfs4_client_t *cp = sp->rs_owner->ro_client;
1877 
1878         ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1879         for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1880             dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1881                 if (cp != dsp->rds_client) {
1882                         return (TRUE);
1883                 }
1884         }
1885         return (FALSE);
1886 }
1887 
1888 void
1889 rfs4_disable_delegation(void)
1890 {
1891         nfs4_srv_t *nsrv4;
1892 
1893         nsrv4 = nfs4_get_srv();
1894         mutex_enter(&nsrv4->deleg_lock);
1895         rfs4_deleg_disabled++;
1896         mutex_exit(&nsrv4->deleg_lock);
1897 }
1898 
1899 void
1900 rfs4_enable_delegation(void)
1901 {
1902         nfs4_srv_t *nsrv4;
1903 
1904         nsrv4 = nfs4_get_srv();
1905         mutex_enter(&nsrv4->deleg_lock);
1906         ASSERT(rfs4_deleg_disabled > 0);
1907         rfs4_deleg_disabled--;
1908         mutex_exit(&nsrv4->deleg_lock);
1909 }
1910 
1911 void
1912 rfs4_mon_hold(void *arg)
1913 {
1914         rfs4_file_t *fp = arg;
1915 
1916         rfs4_dbe_hold(fp->rf_dbe);
1917 }
1918 
1919 void
1920 rfs4_mon_rele(void *arg)
1921 {
1922         rfs4_file_t *fp = arg;
1923 
1924         rfs4_dbe_rele_nolock(fp->rf_dbe);
1925 }