1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 */
26
27 /*
28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
29 * All rights reserved.
30 */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/stat.h>
41 #include <sys/errno.h>
42 #include <sys/sysmacros.h>
43 #include <sys/statvfs.h>
44 #include <sys/kmem.h>
45 #include <sys/kstat.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/vtrace.h>
50 #include <sys/mode.h>
51 #include <sys/acl.h>
52 #include <sys/nbmlock.h>
53 #include <sys/policy.h>
54 #include <sys/sdt.h>
55
56 #include <rpc/types.h>
57 #include <rpc/auth.h>
58 #include <rpc/svc.h>
59
60 #include <nfs/nfs.h>
61 #include <nfs/export.h>
62 #include <nfs/nfs_cmd.h>
63
64 #include <vm/hat.h>
65 #include <vm/as.h>
66 #include <vm/seg.h>
67 #include <vm/seg_map.h>
68 #include <vm/seg_kmem.h>
69
70 #include <sys/strsubr.h>
71
72 /*
73 * These are the interface routines for the server side of the
74 * Network File System. See the NFS version 2 protocol specification
75 * for a description of this interface.
76 */
77
78 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
79 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
80 cred_t *);
81
82 /*
83 * Some "over the wire" UNIX file types. These are encoded
84 * into the mode. This needs to be fixed in the next rev.
85 */
86 #define IFMT 0170000 /* type of file */
87 #define IFCHR 0020000 /* character special */
88 #define IFBLK 0060000 /* block special */
89 #define IFSOCK 0140000 /* socket */
90
91 u_longlong_t nfs2_srv_caller_id;
92
93 /*
94 * Get file attributes.
95 * Returns the current attributes of the file with the given fhandle.
96 */
97 /* ARGSUSED */
98 void
99 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
100 struct svc_req *req, cred_t *cr, bool_t ro)
101 {
102 int error;
103 vnode_t *vp;
104 struct vattr va;
105
106 vp = nfs_fhtovp(fhp, exi);
107 if (vp == NULL) {
108 ns->ns_status = NFSERR_STALE;
109 return;
110 }
111
312 }
313 }
314
315 ct.cc_flags = 0;
316
317 /*
318 * Force modified metadata out to stable storage.
319 */
320 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
321
322 VN_RELE(vp);
323
324 ns->ns_status = puterrno(error);
325 }
326 void *
327 rfs_setattr_getfh(struct nfssaargs *args)
328 {
329 return (&args->saa_fh);
330 }
331
332 /*
333 * Directory lookup.
334 * Returns an fhandle and file attributes for file name in a directory.
335 */
336 /* ARGSUSED */
337 void
338 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
339 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
340 {
341 int error;
342 vnode_t *dvp;
343 vnode_t *vp;
344 struct vattr va;
345 fhandle_t *fhp = da->da_fhandle;
346 struct sec_ol sec = {0, 0};
347 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
348 char *name;
349 struct sockaddr *ca;
350
351 /*
352 * Trusted Extension doesn't support NFSv2. MOUNT
354 * access via WebNFS here.
355 */
356 if (is_system_labeled() && req->rq_vers == 2) {
357 dr->dr_status = NFSERR_ACCES;
358 return;
359 }
360
361 /*
362 * Disallow NULL paths
363 */
364 if (da->da_name == NULL || *da->da_name == '\0') {
365 dr->dr_status = NFSERR_ACCES;
366 return;
367 }
368
369 /*
370 * Allow lookups from the root - the default
371 * location of the public filehandle.
372 */
373 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
374 dvp = rootdir;
375 VN_HOLD(dvp);
376 } else {
377 dvp = nfs_fhtovp(fhp, exi);
378 if (dvp == NULL) {
379 dr->dr_status = NFSERR_STALE;
380 return;
381 }
382 }
383
384 /*
385 * Not allow lookup beyond root.
386 * If the filehandle matches a filehandle of the exi,
387 * then the ".." refers beyond the root of an exported filesystem.
388 */
389 if (strcmp(da->da_name, "..") == 0 &&
390 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
391 VN_RELE(dvp);
392 dr->dr_status = NFSERR_NOENT;
393 return;
394 }
395
396 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
397 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
398 MAXPATHLEN);
399
400 if (name == NULL) {
401 dr->dr_status = NFSERR_ACCES;
402 return;
403 }
404
405 /*
406 * If the public filehandle is used then allow
407 * a multi-component lookup, i.e. evaluate
408 * a pathname and follow symbolic links if
409 * necessary.
410 *
411 * This may result in a vnode in another filesystem
412 * which is OK as long as the filesystem is exported.
413 */
414 if (PUBLIC_FH2(fhp)) {
415 publicfh_flag = TRUE;
416 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
417 &sec);
418 } else {
419 /*
420 * Do a normal single component lookup.
421 */
422 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
423 NULL, NULL, NULL);
424 }
425
426 if (name != da->da_name)
427 kmem_free(name, MAXPATHLEN);
428
429
430 if (!error) {
431 va.va_mask = AT_ALL; /* we want everything */
432
433 error = rfs4_delegated_getattr(vp, &va, 0, cr);
434
435 /* check for overflows */
436 if (!error) {
437 acl_perm(vp, exi, &va, cr);
438 error = vattr_to_nattr(&va, &dr->dr_attr);
439 if (!error) {
440 if (sec.sec_flags & SEC_QUERY)
441 error = makefh_ol(&dr->dr_fhandle, exi,
442 sec.sec_index);
443 else {
444 error = makefh(&dr->dr_fhandle, vp,
445 exi);
446 if (!error && publicfh_flag &&
447 !chk_clnt_sec(exi, req))
448 auth_weak = TRUE;
449 }
450 }
451 }
452 VN_RELE(vp);
453 }
454
455 VN_RELE(dvp);
456
457 /*
458 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
459 * and have obtained a new exportinfo in exi which needs to be
460 * released. Note the the original exportinfo pointed to by exi
461 * will be released by the caller, comon_dispatch.
462 */
463 if (publicfh_flag && exi != NULL)
464 exi_rele(exi);
465
466 /*
467 * If it's public fh, no 0x81, and client's flavor is
468 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
469 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
470 */
471 if (auth_weak)
472 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
473 else
474 dr->dr_status = puterrno(error);
475 }
476 void *
477 rfs_lookup_getfh(struct nfsdiropargs *da)
478 {
479 return (da->da_fhandle);
480 }
481
482 /*
483 * Read symbolic link.
484 * Returns the string in the symbolic link at the given fhandle.
668 * Enter the critical region before calling VOP_RWLOCK
669 * to avoid a deadlock with write requests.
670 */
671 if (nbl_need_check(vp)) {
672 nbl_start_crit(vp, RW_READER);
673 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
674 0, NULL)) {
675 nbl_end_crit(vp);
676 VN_RELE(vp);
677 rr->rr_data = NULL;
678 rr->rr_status = NFSERR_ACCES;
679 return;
680 }
681 in_crit = 1;
682 }
683
684 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
685
686 /* check if a monitor detected a delegation conflict */
687 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
688 VN_RELE(vp);
689 /* mark as wouldblock so response is dropped */
690 curthread->t_flag |= T_WOULDBLOCK;
691
692 rr->rr_data = NULL;
693 return;
694 }
695
696 va.va_mask = AT_ALL;
697
698 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
699
700 if (error) {
701 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
702 if (in_crit)
703 nbl_end_crit(vp);
704
705 VN_RELE(vp);
706 rr->rr_data = NULL;
707 rr->rr_status = puterrno(error);
993 }
994
995 /*
996 * We have to enter the critical region before calling VOP_RWLOCK
997 * to avoid a deadlock with ufs.
998 */
999 if (nbl_need_check(vp)) {
1000 nbl_start_crit(vp, RW_READER);
1001 in_crit = 1;
1002 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1003 wa->wa_count, 0, NULL)) {
1004 error = EACCES;
1005 goto out;
1006 }
1007 }
1008
1009 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1010
1011 /* check if a monitor detected a delegation conflict */
1012 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1013 VN_RELE(vp);
1014 /* mark as wouldblock so response is dropped */
1015 curthread->t_flag |= T_WOULDBLOCK;
1016 return;
1017 }
1018
1019 if (wa->wa_data || wa->wa_rlist) {
1020 /* Do the RDMA thing if necessary */
1021 if (wa->wa_rlist) {
1022 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1023 iov[0].iov_len = wa->wa_count;
1024 } else {
1025 iov[0].iov_base = wa->wa_data;
1026 iov[0].iov_len = wa->wa_count;
1027 }
1028 uio.uio_iov = iov;
1029 uio.uio_iovcnt = 1;
1030 uio.uio_segflg = UIO_SYSSPACE;
1031 uio.uio_extflg = UIO_COPY_DEFAULT;
1032 uio.uio_loffset = (offset_t)wa->wa_offset;
1033 uio.uio_resid = wa->wa_count;
1034 /*
1035 * The limit is checked on the client. We
1036 * should allow any size writes here.
1037 */
1038 uio.uio_llimit = curproc->p_fsz_ctl;
1039 rlimit = uio.uio_llimit - wa->wa_offset;
1040 if (rlimit < (rlim64_t)uio.uio_resid)
1041 uio.uio_resid = (uint_t)rlimit;
1042
1043 /*
1044 * for now we assume no append mode
1045 */
1046 /*
1047 * We're changing creds because VM may fault and we need
1048 * the cred of the current thread to be used if quota
1049 * checking is enabled.
1050 */
1051 savecred = curthread->t_cred;
1052 curthread->t_cred = cr;
1053 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1054 curthread->t_cred = savecred;
1055 } else {
1056 iovcnt = 0;
1057 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1058 iovcnt++;
1059 if (iovcnt <= MAX_IOVECS) {
1060 #ifdef DEBUG
1061 rfs_write_sync_hits++;
1062 #endif
1063 iovp = iov;
1064 } else {
1065 #ifdef DEBUG
1066 rfs_write_sync_misses++;
1067 #endif
1068 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1069 }
1070 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1071 uio.uio_iov = iovp;
1072 uio.uio_iovcnt = iovcnt;
1073 uio.uio_segflg = UIO_SYSSPACE;
1074 uio.uio_extflg = UIO_COPY_DEFAULT;
1075 uio.uio_loffset = (offset_t)wa->wa_offset;
1134
1135 struct rfs_async_write {
1136 struct nfswriteargs *wa;
1137 struct nfsattrstat *ns;
1138 struct svc_req *req;
1139 cred_t *cr;
1140 bool_t ro;
1141 kthread_t *thread;
1142 struct rfs_async_write *list;
1143 };
1144
1145 struct rfs_async_write_list {
1146 fhandle_t *fhp;
1147 kcondvar_t cv;
1148 struct rfs_async_write *list;
1149 struct rfs_async_write_list *next;
1150 };
1151
1152 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1153 static kmutex_t rfs_async_write_lock;
1154 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1155
1156 #define MAXCLIOVECS 42
1157 #define RFSWRITE_INITVAL (enum nfsstat) -1
1158
1159 #ifdef DEBUG
1160 static int rfs_write_hits = 0;
1161 static int rfs_write_misses = 0;
1162 #endif
1163
1164 /*
1165 * Write data to file.
1166 * Returns attributes of a file after writing some data to it.
1167 */
1168 void
1169 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1170 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1171 {
1172 int error;
1173 vnode_t *vp;
1174 rlim64_t rlimit;
1179 struct rfs_async_write *rp;
1180 struct rfs_async_write *nrp;
1181 struct rfs_async_write *trp;
1182 struct rfs_async_write *lrp;
1183 int data_written;
1184 int iovcnt;
1185 mblk_t *m;
1186 struct iovec *iovp;
1187 struct iovec *niovp;
1188 struct iovec iov[MAXCLIOVECS];
1189 int count;
1190 int rcount;
1191 uint_t off;
1192 uint_t len;
1193 struct rfs_async_write nrpsp;
1194 struct rfs_async_write_list nlpsp;
1195 ushort_t t_flag;
1196 cred_t *savecred;
1197 int in_crit = 0;
1198 caller_context_t ct;
1199
1200 if (!rfs_write_async) {
1201 rfs_write_sync(wa, ns, exi, req, cr, ro);
1202 return;
1203 }
1204
1205 /*
1206 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1207 * is considered an OK.
1208 */
1209 ns->ns_status = RFSWRITE_INITVAL;
1210
1211 nrp = &nrpsp;
1212 nrp->wa = wa;
1213 nrp->ns = ns;
1214 nrp->req = req;
1215 nrp->cr = cr;
1216 nrp->ro = ro;
1217 nrp->thread = curthread;
1218
1219 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1220
1221 /*
1222 * Look to see if there is already a cluster started
1223 * for this file.
1224 */
1225 mutex_enter(&rfs_async_write_lock);
1226 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1227 if (bcmp(&wa->wa_fhandle, lp->fhp,
1228 sizeof (fhandle_t)) == 0)
1229 break;
1230 }
1231
1232 /*
1233 * If lp is non-NULL, then there is already a cluster
1234 * started. We need to place ourselves in the cluster
1235 * list in the right place as determined by starting
1236 * offset. Conflicts with non-blocking mandatory locked
1237 * regions will be checked when the cluster is processed.
1238 */
1239 if (lp != NULL) {
1240 rp = lp->list;
1241 trp = NULL;
1242 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1243 trp = rp;
1244 rp = rp->list;
1245 }
1246 nrp->list = rp;
1247 if (trp == NULL)
1248 lp->list = nrp;
1249 else
1250 trp->list = nrp;
1251 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1252 cv_wait(&lp->cv, &rfs_async_write_lock);
1253 mutex_exit(&rfs_async_write_lock);
1254
1255 return;
1256 }
1257
1258 /*
1259 * No cluster started yet, start one and add ourselves
1260 * to the list of clusters.
1261 */
1262 nrp->list = NULL;
1263
1264 nlp = &nlpsp;
1265 nlp->fhp = &wa->wa_fhandle;
1266 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1267 nlp->list = nrp;
1268 nlp->next = NULL;
1269
1270 if (rfs_async_write_head == NULL) {
1271 rfs_async_write_head = nlp;
1272 } else {
1273 lp = rfs_async_write_head;
1274 while (lp->next != NULL)
1275 lp = lp->next;
1276 lp->next = nlp;
1277 }
1278 mutex_exit(&rfs_async_write_lock);
1279
1280 /*
1281 * Convert the file handle common to all of the requests
1282 * in this cluster to a vnode.
1283 */
1284 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1285 if (vp == NULL) {
1286 mutex_enter(&rfs_async_write_lock);
1287 if (rfs_async_write_head == nlp)
1288 rfs_async_write_head = nlp->next;
1289 else {
1290 lp = rfs_async_write_head;
1291 while (lp->next != nlp)
1292 lp = lp->next;
1293 lp->next = nlp->next;
1294 }
1295 t_flag = curthread->t_flag & T_WOULDBLOCK;
1296 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1297 rp->ns->ns_status = NFSERR_STALE;
1298 rp->thread->t_flag |= t_flag;
1299 }
1300 cv_broadcast(&nlp->cv);
1301 mutex_exit(&rfs_async_write_lock);
1302
1303 return;
1304 }
1305
1306 /*
1307 * Can only write regular files. Attempts to write any
1308 * other file types fail with EISDIR.
1309 */
1310 if (vp->v_type != VREG) {
1311 VN_RELE(vp);
1312 mutex_enter(&rfs_async_write_lock);
1313 if (rfs_async_write_head == nlp)
1314 rfs_async_write_head = nlp->next;
1315 else {
1316 lp = rfs_async_write_head;
1317 while (lp->next != nlp)
1318 lp = lp->next;
1319 lp->next = nlp->next;
1320 }
1321 t_flag = curthread->t_flag & T_WOULDBLOCK;
1322 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1323 rp->ns->ns_status = NFSERR_ISDIR;
1324 rp->thread->t_flag |= t_flag;
1325 }
1326 cv_broadcast(&nlp->cv);
1327 mutex_exit(&rfs_async_write_lock);
1328
1329 return;
1330 }
1331
1332 /*
1333 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1334 * deadlock with ufs.
1335 */
1336 if (nbl_need_check(vp)) {
1337 nbl_start_crit(vp, RW_READER);
1338 in_crit = 1;
1339 }
1340
1341 ct.cc_sysid = 0;
1342 ct.cc_pid = 0;
1343 ct.cc_caller_id = nfs2_srv_caller_id;
1344 ct.cc_flags = CC_DONTBLOCK;
1345
1346 /*
1347 * Lock the file for writing. This operation provides
1348 * the delay which allows clusters to grow.
1349 */
1350 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1351
1352 /* check if a monitor detected a delegation conflict */
1353 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1354 if (in_crit)
1355 nbl_end_crit(vp);
1356 VN_RELE(vp);
1357 /* mark as wouldblock so response is dropped */
1358 curthread->t_flag |= T_WOULDBLOCK;
1359 mutex_enter(&rfs_async_write_lock);
1360 if (rfs_async_write_head == nlp)
1361 rfs_async_write_head = nlp->next;
1362 else {
1363 lp = rfs_async_write_head;
1364 while (lp->next != nlp)
1365 lp = lp->next;
1366 lp->next = nlp->next;
1367 }
1368 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1369 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1370 rp->ns->ns_status = puterrno(error);
1371 rp->thread->t_flag |= T_WOULDBLOCK;
1372 }
1373 }
1374 cv_broadcast(&nlp->cv);
1375 mutex_exit(&rfs_async_write_lock);
1376
1377 return;
1378 }
1379
1380 /*
1381 * Disconnect this cluster from the list of clusters.
1382 * The cluster that is being dealt with must be fixed
1383 * in size after this point, so there is no reason
1384 * to leave it on the list so that new requests can
1385 * find it.
1386 *
1387 * The algorithm is that the first write request will
1388 * create a cluster, convert the file handle to a
1389 * vnode pointer, and then lock the file for writing.
1390 * This request is not likely to be clustered with
1391 * any others. However, the next request will create
1392 * a new cluster and be blocked in VOP_RWLOCK while
1393 * the first request is being processed. This delay
1394 * will allow more requests to be clustered in this
1395 * second cluster.
1396 */
1397 mutex_enter(&rfs_async_write_lock);
1398 if (rfs_async_write_head == nlp)
1399 rfs_async_write_head = nlp->next;
1400 else {
1401 lp = rfs_async_write_head;
1402 while (lp->next != nlp)
1403 lp = lp->next;
1404 lp->next = nlp->next;
1405 }
1406 mutex_exit(&rfs_async_write_lock);
1407
1408 /*
1409 * Step through the list of requests in this cluster.
1410 * We need to check permissions to make sure that all
1411 * of the requests have sufficient permission to write
1412 * the file. A cluster can be composed of requests
1413 * from different clients and different users on each
1414 * client.
1415 *
1416 * As a side effect, we also calculate the size of the
1417 * byte range that this cluster encompasses.
1418 */
1419 rp = nlp->list;
1420 off = rp->wa->wa_offset;
1421 len = (uint_t)0;
1422 do {
1423 if (rdonly(rp->ro, vp)) {
1424 rp->ns->ns_status = NFSERR_ROFS;
1425 t_flag = curthread->t_flag & T_WOULDBLOCK;
1426 rp->thread->t_flag |= t_flag;
1631
1632 /*
1633 * If any data was written at all, then we need to flush
1634 * the data and metadata to stable storage.
1635 */
1636 if (data_written) {
1637 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1638
1639 if (!error) {
1640 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1641 }
1642 }
1643
1644 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1645
1646 if (in_crit)
1647 nbl_end_crit(vp);
1648 VN_RELE(vp);
1649
1650 t_flag = curthread->t_flag & T_WOULDBLOCK;
1651 mutex_enter(&rfs_async_write_lock);
1652 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1653 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1654 rp->ns->ns_status = puterrno(error);
1655 rp->thread->t_flag |= t_flag;
1656 }
1657 }
1658 cv_broadcast(&nlp->cv);
1659 mutex_exit(&rfs_async_write_lock);
1660
1661 }
1662
1663 void *
1664 rfs_write_getfh(struct nfswriteargs *wa)
1665 {
1666 return (&wa->wa_fhandle);
1667 }
1668
1669 /*
1670 * Create a file.
1671 * Creates a file with given attributes and returns those attributes
1672 * and an fhandle for the new file.
1673 */
1674 void
1675 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1676 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1677 {
1678 int error;
1679 int lookuperr;
1701 if (dvp == NULL) {
1702 dr->dr_status = NFSERR_STALE;
1703 return;
1704 }
1705
1706 error = sattr_to_vattr(args->ca_sa, &va);
1707 if (error) {
1708 dr->dr_status = puterrno(error);
1709 return;
1710 }
1711
1712 /*
1713 * Must specify the mode.
1714 */
1715 if (!(va.va_mask & AT_MODE)) {
1716 VN_RELE(dvp);
1717 dr->dr_status = NFSERR_INVAL;
1718 return;
1719 }
1720
1721 /*
1722 * This is a completely gross hack to make mknod
1723 * work over the wire until we can wack the protocol
1724 */
1725 if ((va.va_mode & IFMT) == IFCHR) {
1726 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1727 va.va_type = VFIFO; /* xtra kludge for named pipe */
1728 else {
1729 va.va_type = VCHR;
1730 /*
1731 * uncompress the received dev_t
1732 * if the top half is zero indicating a request
1733 * from an `older style' OS.
1734 */
1735 if ((va.va_size & 0xffff0000) == 0)
1736 va.va_rdev = nfsv2_expdev(va.va_size);
1737 else
1738 va.va_rdev = (dev_t)va.va_size;
1739 }
1740 va.va_mask &= ~AT_SIZE;
2040 vnode_t *tovp;
2041 struct exportinfo *to_exi;
2042 fhandle_t *fh;
2043 vnode_t *srcvp;
2044 vnode_t *targvp;
2045 int in_crit = 0;
2046
2047 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2048 if (fromvp == NULL) {
2049 *status = NFSERR_STALE;
2050 return;
2051 }
2052
2053 fh = args->rna_to.da_fhandle;
2054 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2055 if (to_exi == NULL) {
2056 VN_RELE(fromvp);
2057 *status = NFSERR_ACCES;
2058 return;
2059 }
2060 exi_rele(to_exi);
2061
2062 if (to_exi != exi) {
2063 VN_RELE(fromvp);
2064 *status = NFSERR_XDEV;
2065 return;
2066 }
2067
2068 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2069 if (tovp == NULL) {
2070 VN_RELE(fromvp);
2071 *status = NFSERR_STALE;
2072 return;
2073 }
2074
2075 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2076 VN_RELE(tovp);
2077 VN_RELE(fromvp);
2078 *status = NFSERR_NOTDIR;
2079 return;
2080 }
2081
2082 /*
2083 * Disallow NULL paths
2084 */
2085 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2086 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2087 VN_RELE(tovp);
2088 VN_RELE(fromvp);
2089 *status = NFSERR_ACCES;
2090 return;
2091 }
2092
2093 if (rdonly(ro, tovp)) {
2094 VN_RELE(tovp);
2095 VN_RELE(fromvp);
2096 *status = NFSERR_ROFS;
2097 return;
2098 }
2099
2100 /*
2101 * Check for a conflict with a non-blocking mandatory share reservation.
2102 */
2103 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2104 NULL, cr, NULL, NULL, NULL);
2105 if (error != 0) {
2106 VN_RELE(tovp);
2107 VN_RELE(fromvp);
2108 *status = puterrno(error);
2109 return;
2110 }
2111
2112 /* Check for delegations on the source file */
2113
2114 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2115 VN_RELE(tovp);
2116 VN_RELE(fromvp);
2117 VN_RELE(srcvp);
2118 curthread->t_flag |= T_WOULDBLOCK;
2119 return;
2120 }
2121
2122 /* Check for delegation on the file being renamed over, if it exists */
2123
2124 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2125 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2126 NULL, NULL, NULL) == 0) {
2127
2128 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2129 VN_RELE(tovp);
2130 VN_RELE(fromvp);
2131 VN_RELE(srcvp);
2132 VN_RELE(targvp);
2133 curthread->t_flag |= T_WOULDBLOCK;
2134 return;
2135 }
2136 VN_RELE(targvp);
2137 }
2138
2139
2140 if (nbl_need_check(srcvp)) {
2141 nbl_start_crit(srcvp, RW_READER);
2142 in_crit = 1;
2143 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2144 error = EACCES;
2186 {
2187 int error;
2188 vnode_t *fromvp;
2189 vnode_t *tovp;
2190 struct exportinfo *to_exi;
2191 fhandle_t *fh;
2192
2193 fromvp = nfs_fhtovp(args->la_from, exi);
2194 if (fromvp == NULL) {
2195 *status = NFSERR_STALE;
2196 return;
2197 }
2198
2199 fh = args->la_to.da_fhandle;
2200 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2201 if (to_exi == NULL) {
2202 VN_RELE(fromvp);
2203 *status = NFSERR_ACCES;
2204 return;
2205 }
2206 exi_rele(to_exi);
2207
2208 if (to_exi != exi) {
2209 VN_RELE(fromvp);
2210 *status = NFSERR_XDEV;
2211 return;
2212 }
2213
2214 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2215 if (tovp == NULL) {
2216 VN_RELE(fromvp);
2217 *status = NFSERR_STALE;
2218 return;
2219 }
2220
2221 if (tovp->v_type != VDIR) {
2222 VN_RELE(tovp);
2223 VN_RELE(fromvp);
2224 *status = NFSERR_NOTDIR;
2225 return;
2226 }
2227 /*
2228 * Disallow NULL paths
2229 */
2230 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2231 VN_RELE(tovp);
2232 VN_RELE(fromvp);
2233 *status = NFSERR_ACCES;
2234 return;
2235 }
2236
2237 if (rdonly(ro, tovp)) {
2238 VN_RELE(tovp);
2239 VN_RELE(fromvp);
2240 *status = NFSERR_ROFS;
2241 return;
2242 }
2243
2244 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2245
2246 /*
2247 * Force modified data and metadata out to stable storage.
2248 */
2249 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2250 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2251
2252 VN_RELE(tovp);
2253 VN_RELE(fromvp);
2254
2255 *status = puterrno(error);
2256
2257 }
2258 void *
2259 rfs_link_getfh(struct nfslinkargs *args)
2260 {
2261 return (args->la_from);
2262 }
2263
2264 /*
2265 * Symbolicly link to a file.
2266 * Create a file (to) with the given attributes which is a symbolic link
2267 * to the given path name (to).
2268 */
2269 void
2270 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2271 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2272 {
2273 int error;
2274 struct vattr va;
2275 vnode_t *vp;
2276 vnode_t *svp;
2277 int lerror;
2278 struct sockaddr *ca;
2279 char *name = NULL;
2280
2281 /*
2282 * Disallow NULL paths
2283 */
2284 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2285 *status = NFSERR_ACCES;
2286 return;
2294
2295 if (rdonly(ro, vp)) {
2296 VN_RELE(vp);
2297 *status = NFSERR_ROFS;
2298 return;
2299 }
2300
2301 error = sattr_to_vattr(args->sla_sa, &va);
2302 if (error) {
2303 VN_RELE(vp);
2304 *status = puterrno(error);
2305 return;
2306 }
2307
2308 if (!(va.va_mask & AT_MODE)) {
2309 VN_RELE(vp);
2310 *status = NFSERR_INVAL;
2311 return;
2312 }
2313
2314 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2315 name = nfscmd_convname(ca, exi, args->sla_tnm,
2316 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2317
2318 if (name == NULL) {
2319 *status = NFSERR_ACCES;
2320 return;
2321 }
2322
2323 va.va_type = VLNK;
2324 va.va_mask |= AT_TYPE;
2325
2326 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2327
2328 /*
2329 * Force new data and metadata out to stable storage.
2330 */
2331 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2332 NULL, cr, NULL, NULL, NULL);
2333
2386
2387 if (rdonly(ro, vp)) {
2388 VN_RELE(vp);
2389 dr->dr_status = NFSERR_ROFS;
2390 return;
2391 }
2392
2393 error = sattr_to_vattr(args->ca_sa, &va);
2394 if (error) {
2395 VN_RELE(vp);
2396 dr->dr_status = puterrno(error);
2397 return;
2398 }
2399
2400 if (!(va.va_mask & AT_MODE)) {
2401 VN_RELE(vp);
2402 dr->dr_status = NFSERR_INVAL;
2403 return;
2404 }
2405
2406 va.va_type = VDIR;
2407 va.va_mask |= AT_TYPE;
2408
2409 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2410
2411 if (!error) {
2412 /*
2413 * Attribtutes of the newly created directory should
2414 * be returned to the client.
2415 */
2416 va.va_mask = AT_ALL; /* We want everything */
2417 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2418
2419 /* check for overflows */
2420 if (!error) {
2421 acl_perm(vp, exi, &va, cr);
2422 error = vattr_to_nattr(&va, &dr->dr_attr);
2423 if (!error) {
2424 error = makefh(&dr->dr_fhandle, dvp, exi);
2425 }
2471 if (vp == NULL) {
2472 *status = NFSERR_STALE;
2473 return;
2474 }
2475
2476 if (rdonly(ro, vp)) {
2477 VN_RELE(vp);
2478 *status = NFSERR_ROFS;
2479 return;
2480 }
2481
2482 /*
2483 * VOP_RMDIR takes a third argument (the current
2484 * directory of the process). That's because someone
2485 * wants to return EINVAL if one tries to remove ".".
2486 * Of course, NFS servers have no idea what their
2487 * clients' current directories are. We fake it by
2488 * supplying a vnode known to exist and illegal to
2489 * remove.
2490 */
2491 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2492
2493 /*
2494 * Force modified data and metadata out to stable storage.
2495 */
2496 (void) VOP_FSYNC(vp, 0, cr, NULL);
2497
2498 VN_RELE(vp);
2499
2500 /*
2501 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2502 * if the directory is not empty. A System V NFS server
2503 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2504 * over the wire.
2505 */
2506 if (error == EEXIST)
2507 *status = NFSERR_NOTEMPTY;
2508 else
2509 *status = puterrno(error);
2510
2511 }
2512 void *
2513 rfs_rmdir_getfh(struct nfsdiropargs *da)
2514 {
2515 return (da->da_fhandle);
2516 }
2517
2518 /* ARGSUSED */
2519 void
2520 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2521 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2522 {
2523 int error;
2524 int iseof;
2525 struct iovec iov;
2526 struct uio uio;
2527 vnode_t *vp;
2528 char *ndata = NULL;
2529 struct sockaddr *ca;
2530 size_t nents;
2531 int ret;
2532
2533 vp = nfs_fhtovp(&rda->rda_fh, exi);
2534 if (vp == NULL) {
2535 rd->rd_entries = NULL;
2536 rd->rd_status = NFSERR_STALE;
2537 return;
2538 }
2539
2540 if (vp->v_type != VDIR) {
2541 VN_RELE(vp);
2542 rd->rd_entries = NULL;
2543 rd->rd_status = NFSERR_NOTDIR;
2544 return;
2545 }
2546
2547 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2548
2549 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2550
2551 if (error) {
2552 rd->rd_entries = NULL;
2553 goto bad;
2554 }
2555
2556 if (rda->rda_count == 0) {
2557 rd->rd_entries = NULL;
2558 rd->rd_size = 0;
2559 rd->rd_eof = FALSE;
2560 goto bad;
2561 }
2562
2563 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2564
2565 /*
2566 * Allocate data for entries. This will be freed by rfs_rddirfree.
2567 */
2568 rd->rd_bufsize = (uint_t)rda->rda_count;
2569 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2570
2571 /*
2572 * Set up io vector to read directory data
2573 */
2574 iov.iov_base = (caddr_t)rd->rd_entries;
2575 iov.iov_len = rda->rda_count;
2576 uio.uio_iov = &iov;
2577 uio.uio_iovcnt = 1;
2578 uio.uio_segflg = UIO_SYSSPACE;
2579 uio.uio_extflg = UIO_COPY_CACHED;
2580 uio.uio_loffset = (offset_t)rda->rda_offset;
2581 uio.uio_resid = rda->rda_count;
2582
2583 /*
2584 * read directory
2585 */
2586 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2587
2588 /*
2589 * Clean up
2590 */
2591 if (!error) {
2592 /*
2593 * set size and eof
2594 */
2595 if (uio.uio_resid == rda->rda_count) {
2596 rd->rd_size = 0;
2597 rd->rd_eof = TRUE;
2598 } else {
2599 rd->rd_size = (uint32_t)(rda->rda_count -
2600 uio.uio_resid);
2601 rd->rd_eof = iseof ? TRUE : FALSE;
2602 }
2603 }
2604
2605 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2606 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2607 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2608 rda->rda_count, &ndata);
2609
2610 if (ret != 0) {
2611 size_t dropbytes;
2612 /*
2613 * We had to drop one or more entries in order to fit
2614 * during the character conversion. We need to patch
2615 * up the size and eof info.
2616 */
2617 if (rd->rd_eof)
2618 rd->rd_eof = FALSE;
2619 dropbytes = nfscmd_dropped_entrysize(
2620 (struct dirent64 *)rd->rd_entries, nents, ret);
2621 rd->rd_size -= dropbytes;
2622 }
2623 if (ndata == NULL) {
2624 ndata = (char *)rd->rd_entries;
2625 } else if (ndata != (char *)rd->rd_entries) {
2626 kmem_free(rd->rd_entries, rd->rd_bufsize);
2627 rd->rd_entries = (void *)ndata;
2628 rd->rd_bufsize = rda->rda_count;
2629 }
2630
2631 bad:
2632 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2633
2634 #if 0 /* notyet */
2635 /*
2636 * Don't do this. It causes local disk writes when just
2637 * reading the file and the overhead is deemed larger
2638 * than the benefit.
2639 */
2640 /*
2641 * Force modified metadata out to stable storage.
2642 */
2643 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2644 #endif
2645
2646 VN_RELE(vp);
2647
2648 rd->rd_status = puterrno(error);
2649
2650 }
2651 void *
2652 rfs_readdir_getfh(struct nfsrddirargs *rda)
2653 {
2654 return (&rda->rda_fh);
2655 }
2656 void
2657 rfs_rddirfree(struct nfsrddirres *rd)
2658 {
2659 if (rd->rd_entries != NULL)
2660 kmem_free(rd->rd_entries, rd->rd_bufsize);
2661 }
2662
2663 /* ARGSUSED */
2664 void
2665 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2666 struct svc_req *req, cred_t *cr, bool_t ro)
2667 {
2668 int error;
2669 struct statvfs64 sb;
2670 vnode_t *vp;
2671
2672 vp = nfs_fhtovp(fh, exi);
2673 if (vp == NULL) {
2674 fs->fs_status = NFSERR_STALE;
2675 return;
2676 }
2677
2678 error = VFS_STATVFS(vp->v_vfsp, &sb);
2679
2680 if (!error) {
2746 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2747 }
2748 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2749 sa->sa_mtime.tv_usec != (int32_t)-1) {
2750 #ifndef _LP64
2751 /* return error if time overflow */
2752 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2753 return (EOVERFLOW);
2754 #endif
2755 vap->va_mask |= AT_MTIME;
2756 /*
2757 * nfs protocol defines times as unsigned so don't extend sign,
2758 * unless sysadmin set nfs_allow_preepoch_time.
2759 */
2760 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2761 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2762 }
2763 return (0);
2764 }
2765
2766 static enum nfsftype vt_to_nf[] = {
2767 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2768 };
2769
2770 /*
2771 * check the following fields for overflow: nodeid, size, and time.
2772 * There could be a problem when converting 64-bit LP64 fields
2773 * into 32-bit ones. Return an error if there is an overflow.
2774 */
2775 int
2776 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2777 {
2778 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2779 na->na_type = vt_to_nf[vap->va_type];
2780
2781 if (vap->va_mode == (unsigned short) -1)
2782 na->na_mode = (uint32_t)-1;
2783 else
2784 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2785
2786 if (vap->va_uid == (unsigned short)(-1))
2965 aclentp->a_perm;
2966 break;
2967 default:
2968 break;
2969 }
2970 }
2971 }
2972 /* copy to va */
2973 va->va_mode &= ~077;
2974 va->va_mode |= grp_perm | other_perm;
2975 }
2976 if (vsa.vsa_aclcnt)
2977 kmem_free(vsa.vsa_aclentp,
2978 vsa.vsa_aclcnt * sizeof (aclent_t));
2979 }
2980 }
2981
2982 void
2983 rfs_srvrinit(void)
2984 {
2985 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2986 nfs2_srv_caller_id = fs_new_caller_id();
2987 }
2988
2989 void
2990 rfs_srvrfini(void)
2991 {
2992 mutex_destroy(&rfs_async_write_lock);
2993 }
2994
2995 static int
2996 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2997 {
2998 struct clist *wcl;
2999 int wlist_len;
3000 uint32_t count = rr->rr_count;
3001
3002 wcl = ra->ra_wlist;
3003
3004 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3005 return (FALSE);
3006 }
3007
3008 wcl = ra->ra_wlist;
3009 rr->rr_ok.rrok_wlist_len = wlist_len;
3010 rr->rr_ok.rrok_wlist = wcl;
3011
3012 return (TRUE);
3013 }
|
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All rights reserved.
29 */
30
31 /*
32 * Copyright 2018 Nexenta Systems, Inc.
33 * Copyright (c) 2016 by Delphix. All rights reserved.
34 */
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/cred.h>
40 #include <sys/buf.h>
41 #include <sys/vfs.h>
42 #include <sys/vnode.h>
43 #include <sys/uio.h>
44 #include <sys/stat.h>
45 #include <sys/errno.h>
46 #include <sys/sysmacros.h>
47 #include <sys/statvfs.h>
48 #include <sys/kmem.h>
49 #include <sys/kstat.h>
50 #include <sys/dirent.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/vtrace.h>
54 #include <sys/mode.h>
55 #include <sys/acl.h>
56 #include <sys/nbmlock.h>
57 #include <sys/policy.h>
58 #include <sys/sdt.h>
59
60 #include <rpc/types.h>
61 #include <rpc/auth.h>
62 #include <rpc/svc.h>
63
64 #include <nfs/nfs.h>
65 #include <nfs/export.h>
66 #include <nfs/nfs_cmd.h>
67
68 #include <vm/hat.h>
69 #include <vm/as.h>
70 #include <vm/seg.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_kmem.h>
73
74 #include <sys/strsubr.h>
75
76 struct rfs_async_write_list;
77
78 /*
79 * Zone globals of NFSv2 server
80 */
81 typedef struct nfs_srv {
82 kmutex_t async_write_lock;
83 struct rfs_async_write_list *async_write_head;
84
85 /*
86 * enables write clustering if == 1
87 */
88 int write_async;
89 } nfs_srv_t;
90
91 /*
92 * These are the interface routines for the server side of the
93 * Network File System. See the NFS version 2 protocol specification
94 * for a description of this interface.
95 */
96
97 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
98 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
99 cred_t *);
100 static void *rfs_zone_init(zoneid_t zoneid);
101 static void rfs_zone_fini(zoneid_t zoneid, void *data);
102
103
104 /*
105 * Some "over the wire" UNIX file types. These are encoded
106 * into the mode. This needs to be fixed in the next rev.
107 */
108 #define IFMT 0170000 /* type of file */
109 #define IFCHR 0020000 /* character special */
110 #define IFBLK 0060000 /* block special */
111 #define IFSOCK 0140000 /* socket */
112
113 u_longlong_t nfs2_srv_caller_id;
114 static zone_key_t rfs_zone_key;
115
116 /*
117 * Get file attributes.
118 * Returns the current attributes of the file with the given fhandle.
119 */
120 /* ARGSUSED */
121 void
122 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
123 struct svc_req *req, cred_t *cr, bool_t ro)
124 {
125 int error;
126 vnode_t *vp;
127 struct vattr va;
128
129 vp = nfs_fhtovp(fhp, exi);
130 if (vp == NULL) {
131 ns->ns_status = NFSERR_STALE;
132 return;
133 }
134
335 }
336 }
337
338 ct.cc_flags = 0;
339
340 /*
341 * Force modified metadata out to stable storage.
342 */
343 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
344
345 VN_RELE(vp);
346
347 ns->ns_status = puterrno(error);
348 }
349 void *
350 rfs_setattr_getfh(struct nfssaargs *args)
351 {
352 return (&args->saa_fh);
353 }
354
355 /* Change and release @exip and @vpp only in success */
356 int
357 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
358 {
359 struct exportinfo *exi;
360 vnode_t *vp = *vpp;
361 fid_t fid;
362 int error;
363
364 VN_HOLD(vp);
365
366 if ((error = traverse(&vp)) != 0) {
367 VN_RELE(vp);
368 return (error);
369 }
370
371 bzero(&fid, sizeof (fid));
372 fid.fid_len = MAXFIDSZ;
373 error = VOP_FID(vp, &fid, NULL);
374 if (error) {
375 VN_RELE(vp);
376 return (error);
377 }
378
379 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
380 if (exi == NULL ||
381 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
382 /*
383 * It is not error, just subdir is not exported
384 * or "nohide" is not set
385 */
386 if (exi != NULL)
387 exi_rele(&exi);
388 VN_RELE(vp);
389 } else {
390 /* go to submount */
391 exi_rele(exip);
392 *exip = exi;
393
394 VN_RELE(*vpp);
395 *vpp = vp;
396 }
397
398 return (0);
399 }
400
401 /*
402 * Given mounted "dvp" and "exi", go upper mountpoint
403 * with dvp/exi correction
404 * Return 0 in success
405 */
406 int
407 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
408 {
409 struct exportinfo *exi;
410 vnode_t *dvp = *dvpp;
411
412 ASSERT(dvp->v_flag & VROOT);
413
414 VN_HOLD(dvp);
415 dvp = untraverse(dvp);
416 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
417 if (exi == NULL) {
418 VN_RELE(dvp);
419 return (-1);
420 }
421
422 exi_rele(exip);
423 *exip = exi;
424 VN_RELE(*dvpp);
425 *dvpp = dvp;
426
427 return (0);
428 }
429 /*
430 * Directory lookup.
431 * Returns an fhandle and file attributes for file name in a directory.
432 */
433 /* ARGSUSED */
434 void
435 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
436 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
437 {
438 int error;
439 vnode_t *dvp;
440 vnode_t *vp;
441 struct vattr va;
442 fhandle_t *fhp = da->da_fhandle;
443 struct sec_ol sec = {0, 0};
444 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
445 char *name;
446 struct sockaddr *ca;
447
448 /*
449 * Trusted Extension doesn't support NFSv2. MOUNT
451 * access via WebNFS here.
452 */
453 if (is_system_labeled() && req->rq_vers == 2) {
454 dr->dr_status = NFSERR_ACCES;
455 return;
456 }
457
458 /*
459 * Disallow NULL paths
460 */
461 if (da->da_name == NULL || *da->da_name == '\0') {
462 dr->dr_status = NFSERR_ACCES;
463 return;
464 }
465
466 /*
467 * Allow lookups from the root - the default
468 * location of the public filehandle.
469 */
470 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
471 dvp = ZONE_ROOTVP();
472 VN_HOLD(dvp);
473 } else {
474 dvp = nfs_fhtovp(fhp, exi);
475 if (dvp == NULL) {
476 dr->dr_status = NFSERR_STALE;
477 return;
478 }
479 }
480
481 exi_hold(exi);
482
483 /*
484 * Not allow lookup beyond root.
485 * If the filehandle matches a filehandle of the exi,
486 * then the ".." refers beyond the root of an exported filesystem.
487 */
488 if (strcmp(da->da_name, "..") == 0 &&
489 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
490 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
491 (dvp->v_flag & VROOT)) {
492 /*
493 * special case for ".." and 'nohide'exported root
494 */
495 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
496 error = NFSERR_ACCES;
497 goto out;
498 }
499 } else {
500 error = NFSERR_NOENT;
501 goto out;
502 }
503 }
504
505 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
506 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
507 MAXPATHLEN);
508
509 if (name == NULL) {
510 error = NFSERR_ACCES;
511 goto out;
512 }
513
514 /*
515 * If the public filehandle is used then allow
516 * a multi-component lookup, i.e. evaluate
517 * a pathname and follow symbolic links if
518 * necessary.
519 *
520 * This may result in a vnode in another filesystem
521 * which is OK as long as the filesystem is exported.
522 */
523 if (PUBLIC_FH2(fhp)) {
524 publicfh_flag = TRUE;
525
526 exi_rele(&exi);
527
528 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
529 &sec);
530 } else {
531 /*
532 * Do a normal single component lookup.
533 */
534 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
535 NULL, NULL, NULL);
536 }
537
538 if (name != da->da_name)
539 kmem_free(name, MAXPATHLEN);
540
541 if (error == 0 && vn_ismntpt(vp)) {
542 error = rfs_cross_mnt(&vp, &exi);
543 if (error)
544 VN_RELE(vp);
545 }
546
547 if (!error) {
548 va.va_mask = AT_ALL; /* we want everything */
549
550 error = rfs4_delegated_getattr(vp, &va, 0, cr);
551
552 /* check for overflows */
553 if (!error) {
554 acl_perm(vp, exi, &va, cr);
555 error = vattr_to_nattr(&va, &dr->dr_attr);
556 if (!error) {
557 if (sec.sec_flags & SEC_QUERY)
558 error = makefh_ol(&dr->dr_fhandle, exi,
559 sec.sec_index);
560 else {
561 error = makefh(&dr->dr_fhandle, vp,
562 exi);
563 if (!error && publicfh_flag &&
564 !chk_clnt_sec(exi, req))
565 auth_weak = TRUE;
566 }
567 }
568 }
569 VN_RELE(vp);
570 }
571
572 out:
573 VN_RELE(dvp);
574
575 if (exi != NULL)
576 exi_rele(&exi);
577
578 /*
579 * If it's public fh, no 0x81, and client's flavor is
580 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
581 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
582 */
583 if (auth_weak)
584 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
585 else
586 dr->dr_status = puterrno(error);
587 }
588 void *
589 rfs_lookup_getfh(struct nfsdiropargs *da)
590 {
591 return (da->da_fhandle);
592 }
593
594 /*
595 * Read symbolic link.
596 * Returns the string in the symbolic link at the given fhandle.
780 * Enter the critical region before calling VOP_RWLOCK
781 * to avoid a deadlock with write requests.
782 */
783 if (nbl_need_check(vp)) {
784 nbl_start_crit(vp, RW_READER);
785 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
786 0, NULL)) {
787 nbl_end_crit(vp);
788 VN_RELE(vp);
789 rr->rr_data = NULL;
790 rr->rr_status = NFSERR_ACCES;
791 return;
792 }
793 in_crit = 1;
794 }
795
796 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
797
798 /* check if a monitor detected a delegation conflict */
799 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
800 if (in_crit)
801 nbl_end_crit(vp);
802 VN_RELE(vp);
803 /* mark as wouldblock so response is dropped */
804 curthread->t_flag |= T_WOULDBLOCK;
805
806 rr->rr_data = NULL;
807 return;
808 }
809
810 va.va_mask = AT_ALL;
811
812 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
813
814 if (error) {
815 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
816 if (in_crit)
817 nbl_end_crit(vp);
818
819 VN_RELE(vp);
820 rr->rr_data = NULL;
821 rr->rr_status = puterrno(error);
1107 }
1108
1109 /*
1110 * We have to enter the critical region before calling VOP_RWLOCK
1111 * to avoid a deadlock with ufs.
1112 */
1113 if (nbl_need_check(vp)) {
1114 nbl_start_crit(vp, RW_READER);
1115 in_crit = 1;
1116 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1117 wa->wa_count, 0, NULL)) {
1118 error = EACCES;
1119 goto out;
1120 }
1121 }
1122
1123 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1124
1125 /* check if a monitor detected a delegation conflict */
1126 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1127 goto out;
1128 }
1129
1130 if (wa->wa_data || wa->wa_rlist) {
1131 /* Do the RDMA thing if necessary */
1132 if (wa->wa_rlist) {
1133 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1134 iov[0].iov_len = wa->wa_count;
1135 } else {
1136 iov[0].iov_base = wa->wa_data;
1137 iov[0].iov_len = wa->wa_count;
1138 }
1139 uio.uio_iov = iov;
1140 uio.uio_iovcnt = 1;
1141 uio.uio_segflg = UIO_SYSSPACE;
1142 uio.uio_extflg = UIO_COPY_DEFAULT;
1143 uio.uio_loffset = (offset_t)wa->wa_offset;
1144 uio.uio_resid = wa->wa_count;
1145 /*
1146 * The limit is checked on the client. We
1147 * should allow any size writes here.
1148 */
1149 uio.uio_llimit = curproc->p_fsz_ctl;
1150 rlimit = uio.uio_llimit - wa->wa_offset;
1151 if (rlimit < (rlim64_t)uio.uio_resid)
1152 uio.uio_resid = (uint_t)rlimit;
1153
1154 /*
1155 * for now we assume no append mode
1156 */
1157 /*
1158 * We're changing creds because VM may fault and we need
1159 * the cred of the current thread to be used if quota
1160 * checking is enabled.
1161 */
1162 savecred = curthread->t_cred;
1163 curthread->t_cred = cr;
1164 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1165 curthread->t_cred = savecred;
1166 } else {
1167
1168 iovcnt = 0;
1169 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1170 iovcnt++;
1171 if (iovcnt <= MAX_IOVECS) {
1172 #ifdef DEBUG
1173 rfs_write_sync_hits++;
1174 #endif
1175 iovp = iov;
1176 } else {
1177 #ifdef DEBUG
1178 rfs_write_sync_misses++;
1179 #endif
1180 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1181 }
1182 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1183 uio.uio_iov = iovp;
1184 uio.uio_iovcnt = iovcnt;
1185 uio.uio_segflg = UIO_SYSSPACE;
1186 uio.uio_extflg = UIO_COPY_DEFAULT;
1187 uio.uio_loffset = (offset_t)wa->wa_offset;
1246
1247 struct rfs_async_write {
1248 struct nfswriteargs *wa;
1249 struct nfsattrstat *ns;
1250 struct svc_req *req;
1251 cred_t *cr;
1252 bool_t ro;
1253 kthread_t *thread;
1254 struct rfs_async_write *list;
1255 };
1256
1257 struct rfs_async_write_list {
1258 fhandle_t *fhp;
1259 kcondvar_t cv;
1260 struct rfs_async_write *list;
1261 struct rfs_async_write_list *next;
1262 };
1263
1264 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1265 static kmutex_t rfs_async_write_lock;
1266 volatile int rfs_write_async = 1; /* enables write clustering if == 1 */
1267
1268 #define MAXCLIOVECS 42
1269 #define RFSWRITE_INITVAL (enum nfsstat) -1
1270
1271 #ifdef DEBUG
1272 static int rfs_write_hits = 0;
1273 static int rfs_write_misses = 0;
1274 #endif
1275
1276 /*
1277 * Write data to file.
1278 * Returns attributes of a file after writing some data to it.
1279 */
1280 void
1281 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1282 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1283 {
1284 int error;
1285 vnode_t *vp;
1286 rlim64_t rlimit;
1291 struct rfs_async_write *rp;
1292 struct rfs_async_write *nrp;
1293 struct rfs_async_write *trp;
1294 struct rfs_async_write *lrp;
1295 int data_written;
1296 int iovcnt;
1297 mblk_t *m;
1298 struct iovec *iovp;
1299 struct iovec *niovp;
1300 struct iovec iov[MAXCLIOVECS];
1301 int count;
1302 int rcount;
1303 uint_t off;
1304 uint_t len;
1305 struct rfs_async_write nrpsp;
1306 struct rfs_async_write_list nlpsp;
1307 ushort_t t_flag;
1308 cred_t *savecred;
1309 int in_crit = 0;
1310 caller_context_t ct;
1311 nfs_srv_t *nsrv;
1312
1313 nsrv = zone_getspecific(rfs_zone_key, curzone);
1314 if (!nsrv->write_async) {
1315 rfs_write_sync(wa, ns, exi, req, cr, ro);
1316 return;
1317 }
1318
1319 /*
1320 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1321 * is considered an OK.
1322 */
1323 ns->ns_status = RFSWRITE_INITVAL;
1324
1325 nrp = &nrpsp;
1326 nrp->wa = wa;
1327 nrp->ns = ns;
1328 nrp->req = req;
1329 nrp->cr = cr;
1330 nrp->ro = ro;
1331 nrp->thread = curthread;
1332
1333 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1334
1335 /*
1336 * Look to see if there is already a cluster started
1337 * for this file.
1338 */
1339 mutex_enter(&nsrv->async_write_lock);
1340 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1341 if (bcmp(&wa->wa_fhandle, lp->fhp,
1342 sizeof (fhandle_t)) == 0)
1343 break;
1344 }
1345
1346 /*
1347 * If lp is non-NULL, then there is already a cluster
1348 * started. We need to place ourselves in the cluster
1349 * list in the right place as determined by starting
1350 * offset. Conflicts with non-blocking mandatory locked
1351 * regions will be checked when the cluster is processed.
1352 */
1353 if (lp != NULL) {
1354 rp = lp->list;
1355 trp = NULL;
1356 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1357 trp = rp;
1358 rp = rp->list;
1359 }
1360 nrp->list = rp;
1361 if (trp == NULL)
1362 lp->list = nrp;
1363 else
1364 trp->list = nrp;
1365 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1366 cv_wait(&lp->cv, &nsrv->async_write_lock);
1367 mutex_exit(&nsrv->async_write_lock);
1368
1369 return;
1370 }
1371
1372 /*
1373 * No cluster started yet, start one and add ourselves
1374 * to the list of clusters.
1375 */
1376 nrp->list = NULL;
1377
1378 nlp = &nlpsp;
1379 nlp->fhp = &wa->wa_fhandle;
1380 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1381 nlp->list = nrp;
1382 nlp->next = NULL;
1383
1384 if (nsrv->async_write_head == NULL) {
1385 nsrv->async_write_head = nlp;
1386 } else {
1387 lp = nsrv->async_write_head;
1388 while (lp->next != NULL)
1389 lp = lp->next;
1390 lp->next = nlp;
1391 }
1392 mutex_exit(&nsrv->async_write_lock);
1393
1394 /*
1395 * Convert the file handle common to all of the requests
1396 * in this cluster to a vnode.
1397 */
1398 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1399 if (vp == NULL) {
1400 mutex_enter(&nsrv->async_write_lock);
1401 if (nsrv->async_write_head == nlp)
1402 nsrv->async_write_head = nlp->next;
1403 else {
1404 lp = nsrv->async_write_head;
1405 while (lp->next != nlp)
1406 lp = lp->next;
1407 lp->next = nlp->next;
1408 }
1409 t_flag = curthread->t_flag & T_WOULDBLOCK;
1410 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1411 rp->ns->ns_status = NFSERR_STALE;
1412 rp->thread->t_flag |= t_flag;
1413 }
1414 cv_broadcast(&nlp->cv);
1415 mutex_exit(&nsrv->async_write_lock);
1416
1417 return;
1418 }
1419
1420 /*
1421 * Can only write regular files. Attempts to write any
1422 * other file types fail with EISDIR.
1423 */
1424 if (vp->v_type != VREG) {
1425 VN_RELE(vp);
1426 mutex_enter(&nsrv->async_write_lock);
1427 if (nsrv->async_write_head == nlp)
1428 nsrv->async_write_head = nlp->next;
1429 else {
1430 lp = nsrv->async_write_head;
1431 while (lp->next != nlp)
1432 lp = lp->next;
1433 lp->next = nlp->next;
1434 }
1435 t_flag = curthread->t_flag & T_WOULDBLOCK;
1436 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1437 rp->ns->ns_status = NFSERR_ISDIR;
1438 rp->thread->t_flag |= t_flag;
1439 }
1440 cv_broadcast(&nlp->cv);
1441 mutex_exit(&nsrv->async_write_lock);
1442
1443 return;
1444 }
1445
1446 /*
1447 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1448 * deadlock with ufs.
1449 */
1450 if (nbl_need_check(vp)) {
1451 nbl_start_crit(vp, RW_READER);
1452 in_crit = 1;
1453 }
1454
1455 ct.cc_sysid = 0;
1456 ct.cc_pid = 0;
1457 ct.cc_caller_id = nfs2_srv_caller_id;
1458 ct.cc_flags = CC_DONTBLOCK;
1459
1460 /*
1461 * Lock the file for writing. This operation provides
1462 * the delay which allows clusters to grow.
1463 */
1464 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1465
1466 /* check if a monitor detected a delegation conflict */
1467 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1468 if (in_crit)
1469 nbl_end_crit(vp);
1470 VN_RELE(vp);
1471 /* mark as wouldblock so response is dropped */
1472 curthread->t_flag |= T_WOULDBLOCK;
1473 mutex_enter(&nsrv->async_write_lock);
1474 if (nsrv->async_write_head == nlp)
1475 nsrv->async_write_head = nlp->next;
1476 else {
1477 lp = nsrv->async_write_head;
1478 while (lp->next != nlp)
1479 lp = lp->next;
1480 lp->next = nlp->next;
1481 }
1482 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1483 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1484 rp->ns->ns_status = puterrno(error);
1485 rp->thread->t_flag |= T_WOULDBLOCK;
1486 }
1487 }
1488 cv_broadcast(&nlp->cv);
1489 mutex_exit(&nsrv->async_write_lock);
1490
1491 return;
1492 }
1493
1494 /*
1495 * Disconnect this cluster from the list of clusters.
1496 * The cluster that is being dealt with must be fixed
1497 * in size after this point, so there is no reason
1498 * to leave it on the list so that new requests can
1499 * find it.
1500 *
1501 * The algorithm is that the first write request will
1502 * create a cluster, convert the file handle to a
1503 * vnode pointer, and then lock the file for writing.
1504 * This request is not likely to be clustered with
1505 * any others. However, the next request will create
1506 * a new cluster and be blocked in VOP_RWLOCK while
1507 * the first request is being processed. This delay
1508 * will allow more requests to be clustered in this
1509 * second cluster.
1510 */
1511 mutex_enter(&nsrv->async_write_lock);
1512 if (nsrv->async_write_head == nlp)
1513 nsrv->async_write_head = nlp->next;
1514 else {
1515 lp = nsrv->async_write_head;
1516 while (lp->next != nlp)
1517 lp = lp->next;
1518 lp->next = nlp->next;
1519 }
1520 mutex_exit(&nsrv->async_write_lock);
1521
1522 /*
1523 * Step through the list of requests in this cluster.
1524 * We need to check permissions to make sure that all
1525 * of the requests have sufficient permission to write
1526 * the file. A cluster can be composed of requests
1527 * from different clients and different users on each
1528 * client.
1529 *
1530 * As a side effect, we also calculate the size of the
1531 * byte range that this cluster encompasses.
1532 */
1533 rp = nlp->list;
1534 off = rp->wa->wa_offset;
1535 len = (uint_t)0;
1536 do {
1537 if (rdonly(rp->ro, vp)) {
1538 rp->ns->ns_status = NFSERR_ROFS;
1539 t_flag = curthread->t_flag & T_WOULDBLOCK;
1540 rp->thread->t_flag |= t_flag;
1745
1746 /*
1747 * If any data was written at all, then we need to flush
1748 * the data and metadata to stable storage.
1749 */
1750 if (data_written) {
1751 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1752
1753 if (!error) {
1754 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1755 }
1756 }
1757
1758 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1759
1760 if (in_crit)
1761 nbl_end_crit(vp);
1762 VN_RELE(vp);
1763
1764 t_flag = curthread->t_flag & T_WOULDBLOCK;
1765 mutex_enter(&nsrv->async_write_lock);
1766 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1767 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1768 rp->ns->ns_status = puterrno(error);
1769 rp->thread->t_flag |= t_flag;
1770 }
1771 }
1772 cv_broadcast(&nlp->cv);
1773 mutex_exit(&nsrv->async_write_lock);
1774
1775 }
1776
1777 void *
1778 rfs_write_getfh(struct nfswriteargs *wa)
1779 {
1780 return (&wa->wa_fhandle);
1781 }
1782
1783 /*
1784 * Create a file.
1785 * Creates a file with given attributes and returns those attributes
1786 * and an fhandle for the new file.
1787 */
1788 void
1789 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1790 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1791 {
1792 int error;
1793 int lookuperr;
1815 if (dvp == NULL) {
1816 dr->dr_status = NFSERR_STALE;
1817 return;
1818 }
1819
1820 error = sattr_to_vattr(args->ca_sa, &va);
1821 if (error) {
1822 dr->dr_status = puterrno(error);
1823 return;
1824 }
1825
1826 /*
1827 * Must specify the mode.
1828 */
1829 if (!(va.va_mask & AT_MODE)) {
1830 VN_RELE(dvp);
1831 dr->dr_status = NFSERR_INVAL;
1832 return;
1833 }
1834
1835 if (protect_zfs_mntpt(dvp) != 0) {
1836 VN_RELE(dvp);
1837 dr->dr_status = NFSERR_ACCES;
1838 return;
1839 }
1840
1841 /*
1842 * This is a completely gross hack to make mknod
1843 * work over the wire until we can wack the protocol
1844 */
1845 if ((va.va_mode & IFMT) == IFCHR) {
1846 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1847 va.va_type = VFIFO; /* xtra kludge for named pipe */
1848 else {
1849 va.va_type = VCHR;
1850 /*
1851 * uncompress the received dev_t
1852 * if the top half is zero indicating a request
1853 * from an `older style' OS.
1854 */
1855 if ((va.va_size & 0xffff0000) == 0)
1856 va.va_rdev = nfsv2_expdev(va.va_size);
1857 else
1858 va.va_rdev = (dev_t)va.va_size;
1859 }
1860 va.va_mask &= ~AT_SIZE;
2160 vnode_t *tovp;
2161 struct exportinfo *to_exi;
2162 fhandle_t *fh;
2163 vnode_t *srcvp;
2164 vnode_t *targvp;
2165 int in_crit = 0;
2166
2167 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2168 if (fromvp == NULL) {
2169 *status = NFSERR_STALE;
2170 return;
2171 }
2172
2173 fh = args->rna_to.da_fhandle;
2174 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2175 if (to_exi == NULL) {
2176 VN_RELE(fromvp);
2177 *status = NFSERR_ACCES;
2178 return;
2179 }
2180 exi_rele(&to_exi);
2181
2182 if (to_exi != exi) {
2183 VN_RELE(fromvp);
2184 *status = NFSERR_XDEV;
2185 return;
2186 }
2187
2188 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2189 if (tovp == NULL) {
2190 VN_RELE(fromvp);
2191 *status = NFSERR_STALE;
2192 return;
2193 }
2194
2195 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2196 VN_RELE(tovp);
2197 VN_RELE(fromvp);
2198 *status = NFSERR_NOTDIR;
2199 return;
2200 }
2201
2202 /*
2203 * Disallow NULL paths
2204 */
2205 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2206 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2207 VN_RELE(tovp);
2208 VN_RELE(fromvp);
2209 *status = NFSERR_ACCES;
2210 return;
2211 }
2212
2213 if (rdonly(ro, tovp)) {
2214 VN_RELE(tovp);
2215 VN_RELE(fromvp);
2216 *status = NFSERR_ROFS;
2217 return;
2218 }
2219
2220 if (protect_zfs_mntpt(tovp) != 0) {
2221 VN_RELE(tovp);
2222 VN_RELE(fromvp);
2223 *status = NFSERR_ACCES;
2224 return;
2225 }
2226
2227 /*
2228 * Check for a conflict with a non-blocking mandatory share reservation.
2229 */
2230 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2231 NULL, cr, NULL, NULL, NULL);
2232 if (error != 0) {
2233 VN_RELE(tovp);
2234 VN_RELE(fromvp);
2235 *status = puterrno(error);
2236 return;
2237 }
2238
2239 /* Check for delegations on the source file */
2240
2241 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2242 VN_RELE(tovp);
2243 VN_RELE(fromvp);
2244 VN_RELE(srcvp);
2245 curthread->t_flag |= T_WOULDBLOCK;
2246 return;
2247 }
2248
2249 /* Check for delegation on the file being renamed over, if it exists */
2250
2251 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2252 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2253 NULL, NULL, NULL) == 0) {
2254
2255 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2256 VN_RELE(tovp);
2257 VN_RELE(fromvp);
2258 VN_RELE(srcvp);
2259 VN_RELE(targvp);
2260 curthread->t_flag |= T_WOULDBLOCK;
2261 return;
2262 }
2263 VN_RELE(targvp);
2264 }
2265
2266
2267 if (nbl_need_check(srcvp)) {
2268 nbl_start_crit(srcvp, RW_READER);
2269 in_crit = 1;
2270 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2271 error = EACCES;
2313 {
2314 int error;
2315 vnode_t *fromvp;
2316 vnode_t *tovp;
2317 struct exportinfo *to_exi;
2318 fhandle_t *fh;
2319
2320 fromvp = nfs_fhtovp(args->la_from, exi);
2321 if (fromvp == NULL) {
2322 *status = NFSERR_STALE;
2323 return;
2324 }
2325
2326 fh = args->la_to.da_fhandle;
2327 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2328 if (to_exi == NULL) {
2329 VN_RELE(fromvp);
2330 *status = NFSERR_ACCES;
2331 return;
2332 }
2333 exi_rele(&to_exi);
2334
2335 if (to_exi != exi) {
2336 VN_RELE(fromvp);
2337 *status = NFSERR_XDEV;
2338 return;
2339 }
2340
2341 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2342 if (tovp == NULL) {
2343 VN_RELE(fromvp);
2344 *status = NFSERR_STALE;
2345 return;
2346 }
2347
2348 if (tovp->v_type != VDIR) {
2349 VN_RELE(tovp);
2350 VN_RELE(fromvp);
2351 *status = NFSERR_NOTDIR;
2352 return;
2353 }
2354 /*
2355 * Disallow NULL paths
2356 */
2357 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2358 VN_RELE(tovp);
2359 VN_RELE(fromvp);
2360 *status = NFSERR_ACCES;
2361 return;
2362 }
2363
2364 if (rdonly(ro, tovp)) {
2365 VN_RELE(tovp);
2366 VN_RELE(fromvp);
2367 *status = NFSERR_ROFS;
2368 return;
2369 }
2370
2371 if (protect_zfs_mntpt(tovp) != 0) {
2372 VN_RELE(tovp);
2373 VN_RELE(fromvp);
2374 *status = NFSERR_ACCES;
2375 return;
2376 }
2377
2378 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2379
2380 /*
2381 * Force modified data and metadata out to stable storage.
2382 */
2383 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2384 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2385
2386 VN_RELE(tovp);
2387 VN_RELE(fromvp);
2388
2389 *status = puterrno(error);
2390
2391 }
2392 void *
2393 rfs_link_getfh(struct nfslinkargs *args)
2394 {
2395 return (args->la_from);
2396 }
2397
2398 /*
2399 * Symbolicly link to a file.
2400 * Create a file (from) with the given attributes which is a symbolic link
2401 * to the given path name (to).
2402 */
2403 void
2404 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2405 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2406 {
2407 int error;
2408 struct vattr va;
2409 vnode_t *vp;
2410 vnode_t *svp;
2411 int lerror;
2412 struct sockaddr *ca;
2413 char *name = NULL;
2414
2415 /*
2416 * Disallow NULL paths
2417 */
2418 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2419 *status = NFSERR_ACCES;
2420 return;
2428
2429 if (rdonly(ro, vp)) {
2430 VN_RELE(vp);
2431 *status = NFSERR_ROFS;
2432 return;
2433 }
2434
2435 error = sattr_to_vattr(args->sla_sa, &va);
2436 if (error) {
2437 VN_RELE(vp);
2438 *status = puterrno(error);
2439 return;
2440 }
2441
2442 if (!(va.va_mask & AT_MODE)) {
2443 VN_RELE(vp);
2444 *status = NFSERR_INVAL;
2445 return;
2446 }
2447
2448 if (protect_zfs_mntpt(vp) != 0) {
2449 VN_RELE(vp);
2450 *status = NFSERR_ACCES;
2451 return;
2452 }
2453
2454 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2455 name = nfscmd_convname(ca, exi, args->sla_tnm,
2456 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2457
2458 if (name == NULL) {
2459 *status = NFSERR_ACCES;
2460 return;
2461 }
2462
2463 va.va_type = VLNK;
2464 va.va_mask |= AT_TYPE;
2465
2466 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2467
2468 /*
2469 * Force new data and metadata out to stable storage.
2470 */
2471 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2472 NULL, cr, NULL, NULL, NULL);
2473
2526
2527 if (rdonly(ro, vp)) {
2528 VN_RELE(vp);
2529 dr->dr_status = NFSERR_ROFS;
2530 return;
2531 }
2532
2533 error = sattr_to_vattr(args->ca_sa, &va);
2534 if (error) {
2535 VN_RELE(vp);
2536 dr->dr_status = puterrno(error);
2537 return;
2538 }
2539
2540 if (!(va.va_mask & AT_MODE)) {
2541 VN_RELE(vp);
2542 dr->dr_status = NFSERR_INVAL;
2543 return;
2544 }
2545
2546 if (protect_zfs_mntpt(vp) != 0) {
2547 VN_RELE(vp);
2548 dr->dr_status = NFSERR_ACCES;
2549 return;
2550 }
2551
2552 va.va_type = VDIR;
2553 va.va_mask |= AT_TYPE;
2554
2555 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2556
2557 if (!error) {
2558 /*
2559 * Attribtutes of the newly created directory should
2560 * be returned to the client.
2561 */
2562 va.va_mask = AT_ALL; /* We want everything */
2563 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2564
2565 /* check for overflows */
2566 if (!error) {
2567 acl_perm(vp, exi, &va, cr);
2568 error = vattr_to_nattr(&va, &dr->dr_attr);
2569 if (!error) {
2570 error = makefh(&dr->dr_fhandle, dvp, exi);
2571 }
2617 if (vp == NULL) {
2618 *status = NFSERR_STALE;
2619 return;
2620 }
2621
2622 if (rdonly(ro, vp)) {
2623 VN_RELE(vp);
2624 *status = NFSERR_ROFS;
2625 return;
2626 }
2627
2628 /*
2629 * VOP_RMDIR takes a third argument (the current
2630 * directory of the process). That's because someone
2631 * wants to return EINVAL if one tries to remove ".".
2632 * Of course, NFS servers have no idea what their
2633 * clients' current directories are. We fake it by
2634 * supplying a vnode known to exist and illegal to
2635 * remove.
2636 */
2637 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2638
2639 /*
2640 * Force modified data and metadata out to stable storage.
2641 */
2642 (void) VOP_FSYNC(vp, 0, cr, NULL);
2643
2644 VN_RELE(vp);
2645
2646 /*
2647 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2648 * if the directory is not empty. A System V NFS server
2649 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2650 * over the wire.
2651 */
2652 if (error == EEXIST)
2653 *status = NFSERR_NOTEMPTY;
2654 else
2655 *status = puterrno(error);
2656
2657 }
2658 void *
2659 rfs_rmdir_getfh(struct nfsdiropargs *da)
2660 {
2661 return (da->da_fhandle);
2662 }
2663
2664 #ifdef nextdp
2665 #undef nextdp
2666 #endif
2667 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2668
2669 /* ARGSUSED */
2670 void
2671 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2672 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2673 {
2674 int error;
2675 vnode_t *vp;
2676 struct iovec iov;
2677 struct uio uio;
2678 int iseof;
2679
2680 uint32_t count = rda->rda_count;
2681 uint32_t size; /* size of the readdirres structure */
2682 int overflow = 0;
2683
2684 size_t datasz;
2685 char *data = NULL;
2686 dirent64_t *dp;
2687
2688 struct sockaddr *ca;
2689 struct nfsentry **eptr;
2690 struct nfsentry *entry;
2691
2692 vp = nfs_fhtovp(&rda->rda_fh, exi);
2693 if (vp == NULL) {
2694 rd->rd_status = NFSERR_STALE;
2695 return;
2696 }
2697
2698 if (vp->v_type != VDIR) {
2699 VN_RELE(vp);
2700 rd->rd_status = NFSERR_NOTDIR;
2701 return;
2702 }
2703
2704 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2705
2706 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2707 if (error)
2708 goto bad;
2709
2710 /*
2711 * Don't allow arbitrary counts for allocation
2712 */
2713 if (count > NFS_MAXDATA)
2714 count = NFS_MAXDATA;
2715
2716 /*
2717 * struct readdirres:
2718 * status: 1
2719 * entries (bool): 1
2720 * eof: 1
2721 */
2722 size = (1 + 1 + 1) * BYTES_PER_XDR_UNIT;
2723
2724 if (size > count) {
2725 eptr = &rd->rd_entries;
2726 iseof = 0;
2727 size = 0;
2728
2729 goto done;
2730 }
2731
2732 /*
2733 * This is simplification. The dirent64_t size is not the same as the
2734 * size of XDR representation of entry, but the sizes are similar so
2735 * we'll assume they are same. This assumption should not cause any
2736 * harm. In worst case we will need to issue VOP_READDIR() once more.
2737 */
2738 datasz = count;
2739
2740 /*
2741 * Make sure that there is room to read at least one entry
2742 * if any are available.
2743 */
2744 if (datasz < DIRENT64_RECLEN(MAXNAMELEN))
2745 datasz = DIRENT64_RECLEN(MAXNAMELEN);
2746
2747 data = kmem_alloc(datasz, KM_NOSLEEP);
2748 if (data == NULL) {
2749 /* The allocation failed; downsize and wait for it this time */
2750 if (datasz > MAXBSIZE)
2751 datasz = MAXBSIZE;
2752 data = kmem_alloc(datasz, KM_SLEEP);
2753 }
2754
2755 uio.uio_iov = &iov;
2756 uio.uio_iovcnt = 1;
2757 uio.uio_segflg = UIO_SYSSPACE;
2758 uio.uio_extflg = UIO_COPY_CACHED;
2759 uio.uio_loffset = (offset_t)rda->rda_offset;
2760 uio.uio_resid = datasz;
2761
2762 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2763 eptr = &rd->rd_entries;
2764 entry = NULL;
2765
2766 getmoredents:
2767 iov.iov_base = data;
2768 iov.iov_len = datasz;
2769
2770 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2771 if (error) {
2772 iseof = 0;
2773 goto done;
2774 }
2775
2776 if (iov.iov_len == datasz)
2777 goto done;
2778
2779 for (dp = (dirent64_t *)data;
2780 (char *)dp - data < datasz - iov.iov_len && !overflow;
2781 dp = nextdp(dp)) {
2782 char *name;
2783 uint32_t esize;
2784 uint32_t cookie;
2785
2786 overflow = (uint64_t)dp->d_off > UINT32_MAX;
2787 if (overflow) {
2788 cookie = 0;
2789 iseof = 1;
2790 } else
2791 cookie = (uint32_t)dp->d_off;
2792
2793 if (dp->d_ino == 0 || (uint64_t)dp->d_ino > UINT32_MAX) {
2794 if (entry != NULL)
2795 entry->cookie = cookie;
2796 continue;
2797 }
2798
2799 name = nfscmd_convname(ca, exi, dp->d_name,
2800 NFSCMD_CONV_OUTBOUND, NFS_MAXPATHLEN + 1);
2801 if (name == NULL) {
2802 if (entry != NULL)
2803 entry->cookie = cookie;
2804 continue;
2805 }
2806
2807 /*
2808 * struct entry:
2809 * fileid: 1
2810 * name (length): 1
2811 * name (data): length (rounded up)
2812 * cookie: 1
2813 * nextentry (bool): 1
2814 */
2815 esize = (1 + 1 + 1 + 1) * BYTES_PER_XDR_UNIT +
2816 RNDUP(strlen(name));
2817
2818 /* If the new entry does not fit, discard it */
2819 if (esize > count - size) {
2820 if (name != dp->d_name)
2821 kmem_free(name, NFS_MAXPATHLEN + 1);
2822 iseof = 0;
2823 goto done;
2824 }
2825
2826 entry = kmem_alloc(sizeof (struct nfsentry), KM_SLEEP);
2827
2828 entry->fileid = (uint32_t)dp->d_ino;
2829 entry->name = strdup(name);
2830 if (name != dp->d_name)
2831 kmem_free(name, NFS_MAXPATHLEN + 1);
2832 entry->cookie = cookie;
2833
2834 size += esize;
2835
2836 /* Add the entry to the linked list */
2837 *eptr = entry;
2838 eptr = &entry->nextentry;
2839 }
2840
2841 if (!iseof && size < count) {
2842 uio.uio_resid = MIN(datasz, MAXBSIZE);
2843 goto getmoredents;
2844 }
2845
2846 done:
2847 *eptr = NULL;
2848
2849 if (iseof || rd->rd_entries != NULL || !error) {
2850 error = 0;
2851 rd->rd_eof = iseof ? TRUE : FALSE;
2852
2853 /* This is for nfslog only */
2854 rd->rd_offset = rda->rda_offset;
2855 rd->rd_size = size;
2856 }
2857
2858 bad:
2859 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2860
2861 #if 0 /* notyet */
2862 /*
2863 * Don't do this. It causes local disk writes when just
2864 * reading the file and the overhead is deemed larger
2865 * than the benefit.
2866 */
2867 /*
2868 * Force modified metadata out to stable storage.
2869 */
2870 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2871 #endif
2872
2873 VN_RELE(vp);
2874
2875 rd->rd_status = puterrno(error);
2876
2877 if (data != NULL)
2878 kmem_free(data, datasz);
2879 }
2880 void *
2881 rfs_readdir_getfh(struct nfsrddirargs *rda)
2882 {
2883 return (&rda->rda_fh);
2884 }
2885 void
2886 rfs_rddirfree(struct nfsrddirres *rd)
2887 {
2888 if (rd->rd_status == NFS_OK) {
2889 struct nfsentry *entry, *nentry;
2890
2891 for (entry = rd->rd_entries; entry != NULL; entry = nentry) {
2892 nentry = entry->nextentry;
2893 strfree(entry->name);
2894 kmem_free(entry, sizeof (struct nfsentry));
2895 }
2896 }
2897 }
2898
2899 /* ARGSUSED */
2900 void
2901 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2902 struct svc_req *req, cred_t *cr, bool_t ro)
2903 {
2904 int error;
2905 struct statvfs64 sb;
2906 vnode_t *vp;
2907
2908 vp = nfs_fhtovp(fh, exi);
2909 if (vp == NULL) {
2910 fs->fs_status = NFSERR_STALE;
2911 return;
2912 }
2913
2914 error = VFS_STATVFS(vp->v_vfsp, &sb);
2915
2916 if (!error) {
2982 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2983 }
2984 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2985 sa->sa_mtime.tv_usec != (int32_t)-1) {
2986 #ifndef _LP64
2987 /* return error if time overflow */
2988 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2989 return (EOVERFLOW);
2990 #endif
2991 vap->va_mask |= AT_MTIME;
2992 /*
2993 * nfs protocol defines times as unsigned so don't extend sign,
2994 * unless sysadmin set nfs_allow_preepoch_time.
2995 */
2996 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2997 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2998 }
2999 return (0);
3000 }
3001
3002 static const enum nfsftype vt_to_nf[] = {
3003 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
3004 };
3005
3006 /*
3007 * check the following fields for overflow: nodeid, size, and time.
3008 * There could be a problem when converting 64-bit LP64 fields
3009 * into 32-bit ones. Return an error if there is an overflow.
3010 */
3011 int
3012 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
3013 {
3014 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3015 na->na_type = vt_to_nf[vap->va_type];
3016
3017 if (vap->va_mode == (unsigned short) -1)
3018 na->na_mode = (uint32_t)-1;
3019 else
3020 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
3021
3022 if (vap->va_uid == (unsigned short)(-1))
3201 aclentp->a_perm;
3202 break;
3203 default:
3204 break;
3205 }
3206 }
3207 }
3208 /* copy to va */
3209 va->va_mode &= ~077;
3210 va->va_mode |= grp_perm | other_perm;
3211 }
3212 if (vsa.vsa_aclcnt)
3213 kmem_free(vsa.vsa_aclentp,
3214 vsa.vsa_aclcnt * sizeof (aclent_t));
3215 }
3216 }
3217
3218 void
3219 rfs_srvrinit(void)
3220 {
3221 nfs2_srv_caller_id = fs_new_caller_id();
3222 zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini);
3223 }
3224
3225 void
3226 rfs_srvrfini(void)
3227 {
3228 }
3229
3230 /* ARGSUSED */
3231 static void *
3232 rfs_zone_init(zoneid_t zoneid)
3233 {
3234 nfs_srv_t *ns;
3235
3236 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3237
3238 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3239 ns->write_async = 1;
3240
3241 return (ns);
3242 }
3243
3244 /* ARGSUSED */
3245 static void
3246 rfs_zone_fini(zoneid_t zoneid, void *data)
3247 {
3248 nfs_srv_t *ns;
3249
3250 ns = (nfs_srv_t *)data;
3251 mutex_destroy(&ns->async_write_lock);
3252 kmem_free(ns, sizeof (*ns));
3253 }
3254
3255 static int
3256 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3257 {
3258 struct clist *wcl;
3259 int wlist_len;
3260 uint32_t count = rr->rr_count;
3261
3262 wcl = ra->ra_wlist;
3263
3264 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3265 return (FALSE);
3266 }
3267
3268 wcl = ra->ra_wlist;
3269 rr->rr_ok.rrok_wlist_len = wlist_len;
3270 rr->rr_ok.rrok_wlist = wcl;
3271
3272 return (TRUE);
3273 }
|