Print this page
Revert exi_zone to exi_zoneid, and install exi_ne backpointer
Caution with use after exi_rele()
Be far more judicious in the use of curzone-using macros.
(Merge and extra asserts by danmcd.)
curzone reality check and teardown changes to use the RIGHT zone
Try to remove assumption that zone's root vnode is marked VROOT
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs3_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs3_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2018 Nexenta Systems, Inc.
24 24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 29 /* All Rights Reserved */
30 30
31 31
32 32 #include <sys/param.h>
33 33 #include <sys/types.h>
34 34 #include <sys/systm.h>
35 35 #include <sys/cred.h>
36 36 #include <sys/buf.h>
37 37 #include <sys/vfs.h>
38 38 #include <sys/vnode.h>
39 39 #include <sys/uio.h>
40 40 #include <sys/errno.h>
41 41 #include <sys/sysmacros.h>
42 42 #include <sys/statvfs.h>
43 43 #include <sys/kmem.h>
44 44 #include <sys/dirent.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/debug.h>
47 47 #include <sys/systeminfo.h>
48 48 #include <sys/flock.h>
49 49 #include <sys/nbmlock.h>
50 50 #include <sys/policy.h>
51 51 #include <sys/sdt.h>
52 52
53 53 #include <rpc/types.h>
54 54 #include <rpc/auth.h>
55 55 #include <rpc/svc.h>
56 56 #include <rpc/rpc_rdma.h>
57 57
58 58 #include <nfs/nfs.h>
59 59 #include <nfs/export.h>
60 60 #include <nfs/nfs_cmd.h>
61 61
62 62 #include <sys/strsubr.h>
63 63 #include <sys/tsol/label.h>
64 64 #include <sys/tsol/tndb.h>
65 65
66 66 #include <sys/zone.h>
67 67
68 68 #include <inet/ip.h>
69 69 #include <inet/ip6.h>
70 70
71 71 /*
72 72 * Zone global variables of NFSv3 server
73 73 */
74 74 typedef struct nfs3_srv {
75 75 writeverf3 write3verf;
76 76 } nfs3_srv_t;
77 77
78 78 /*
79 79 * These are the interface routines for the server side of the
80 80 * Network File System. See the NFS version 3 protocol specification
81 81 * for a description of this interface.
82 82 */
83 83
84 84 static int sattr3_to_vattr(sattr3 *, struct vattr *);
85 85 static int vattr_to_fattr3(struct vattr *, fattr3 *);
86 86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 89 static int rdma_setup_read_data3(READ3args *, READ3resok *);
90 90
91 91 extern int nfs_loaned_buffers;
92 92
93 93 u_longlong_t nfs3_srv_caller_id;
94 94
95 95 static nfs3_srv_t *
96 96 nfs3_get_srv(void)
97 97 {
98 98 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
99 99 nfs3_srv_t *srv = ng->nfs3_srv;
100 100 ASSERT(srv != NULL);
101 101 return (srv);
102 102 }
103 103
104 104 /* ARGSUSED */
105 105 void
106 106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
107 107 struct svc_req *req, cred_t *cr, bool_t ro)
108 108 {
109 109 int error;
110 110 vnode_t *vp;
111 111 struct vattr va;
112 112
113 113 vp = nfs3_fhtovp(&args->object, exi);
114 114
115 115 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
116 116 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
117 117 GETATTR3args *, args);
118 118
119 119 if (vp == NULL) {
120 120 error = ESTALE;
121 121 goto out;
122 122 }
123 123
124 124 va.va_mask = AT_ALL;
125 125 error = rfs4_delegated_getattr(vp, &va, 0, cr);
126 126
127 127 if (!error) {
128 128 /* Lie about the object type for a referral */
129 129 if (vn_is_nfs_reparse(vp, cr))
130 130 va.va_type = VLNK;
131 131
132 132 /* overflow error if time or size is out of range */
133 133 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
134 134 if (error)
135 135 goto out;
136 136 resp->status = NFS3_OK;
137 137
138 138 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
139 139 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
140 140 GETATTR3res *, resp);
141 141
142 142 VN_RELE(vp);
143 143
144 144 return;
145 145 }
146 146
147 147 out:
148 148 if (curthread->t_flag & T_WOULDBLOCK) {
149 149 curthread->t_flag &= ~T_WOULDBLOCK;
150 150 resp->status = NFS3ERR_JUKEBOX;
151 151 } else
152 152 resp->status = puterrno3(error);
153 153
154 154 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
155 155 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
156 156 GETATTR3res *, resp);
157 157
158 158 if (vp != NULL)
159 159 VN_RELE(vp);
160 160 }
161 161
162 162 void *
163 163 rfs3_getattr_getfh(GETATTR3args *args)
164 164 {
165 165
166 166 return (&args->object);
167 167 }
168 168
169 169 void
170 170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
171 171 struct svc_req *req, cred_t *cr, bool_t ro)
172 172 {
173 173 int error;
174 174 vnode_t *vp;
175 175 struct vattr *bvap;
176 176 struct vattr bva;
177 177 struct vattr *avap;
178 178 struct vattr ava;
179 179 int flag;
180 180 int in_crit = 0;
181 181 struct flock64 bf;
182 182 caller_context_t ct;
183 183
184 184 bvap = NULL;
185 185 avap = NULL;
186 186
187 187 vp = nfs3_fhtovp(&args->object, exi);
188 188
189 189 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
190 190 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
191 191 SETATTR3args *, args);
192 192
193 193 if (vp == NULL) {
194 194 error = ESTALE;
195 195 goto out;
196 196 }
197 197
198 198 error = sattr3_to_vattr(&args->new_attributes, &ava);
199 199 if (error)
200 200 goto out;
201 201
202 202 if (is_system_labeled()) {
203 203 bslabel_t *clabel = req->rq_label;
204 204
205 205 ASSERT(clabel != NULL);
206 206 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
207 207 "got client label from request(1)", struct svc_req *, req);
208 208
209 209 if (!blequal(&l_admin_low->tsl_label, clabel)) {
210 210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
211 211 exi)) {
212 212 resp->status = NFS3ERR_ACCES;
213 213 goto out1;
214 214 }
215 215 }
216 216 }
217 217
218 218 /*
219 219 * We need to specially handle size changes because of
220 220 * possible conflicting NBMAND locks. Get into critical
221 221 * region before VOP_GETATTR, so the size attribute is
222 222 * valid when checking conflicts.
223 223 *
224 224 * Also, check to see if the v4 side of the server has
225 225 * delegated this file. If so, then we return JUKEBOX to
226 226 * allow the client to retrasmit its request.
227 227 */
228 228 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
229 229 if (nbl_need_check(vp)) {
230 230 nbl_start_crit(vp, RW_READER);
231 231 in_crit = 1;
232 232 }
233 233 }
234 234
235 235 bva.va_mask = AT_ALL;
236 236 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
237 237
238 238 /*
239 239 * If we can't get the attributes, then we can't do the
240 240 * right access checking. So, we'll fail the request.
241 241 */
242 242 if (error)
243 243 goto out;
244 244
245 245 bvap = &bva;
246 246
247 247 if (rdonly(ro, vp)) {
248 248 resp->status = NFS3ERR_ROFS;
249 249 goto out1;
250 250 }
251 251
252 252 if (args->guard.check &&
253 253 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
254 254 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
255 255 resp->status = NFS3ERR_NOT_SYNC;
256 256 goto out1;
257 257 }
258 258
259 259 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
260 260 flag = ATTR_UTIME;
261 261 else
262 262 flag = 0;
263 263
264 264 /*
265 265 * If the filesystem is exported with nosuid, then mask off
266 266 * the setuid and setgid bits.
267 267 */
268 268 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
269 269 (exi->exi_export.ex_flags & EX_NOSUID))
270 270 ava.va_mode &= ~(VSUID | VSGID);
271 271
272 272 ct.cc_sysid = 0;
273 273 ct.cc_pid = 0;
274 274 ct.cc_caller_id = nfs3_srv_caller_id;
275 275 ct.cc_flags = CC_DONTBLOCK;
276 276
277 277 /*
278 278 * We need to specially handle size changes because it is
279 279 * possible for the client to create a file with modes
280 280 * which indicate read-only, but with the file opened for
281 281 * writing. If the client then tries to set the size of
282 282 * the file, then the normal access checking done in
283 283 * VOP_SETATTR would prevent the client from doing so,
284 284 * although it should be legal for it to do so. To get
285 285 * around this, we do the access checking for ourselves
286 286 * and then use VOP_SPACE which doesn't do the access
287 287 * checking which VOP_SETATTR does. VOP_SPACE can only
288 288 * operate on VREG files, let VOP_SETATTR handle the other
289 289 * extremely rare cases.
290 290 * Also the client should not be allowed to change the
291 291 * size of the file if there is a conflicting non-blocking
292 292 * mandatory lock in the region the change.
293 293 */
294 294 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
295 295 if (in_crit) {
296 296 u_offset_t offset;
297 297 ssize_t length;
298 298
299 299 if (ava.va_size < bva.va_size) {
300 300 offset = ava.va_size;
301 301 length = bva.va_size - ava.va_size;
302 302 } else {
303 303 offset = bva.va_size;
304 304 length = ava.va_size - bva.va_size;
305 305 }
306 306 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
307 307 NULL)) {
308 308 error = EACCES;
309 309 goto out;
310 310 }
311 311 }
312 312
313 313 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
314 314 ava.va_mask &= ~AT_SIZE;
315 315 bf.l_type = F_WRLCK;
316 316 bf.l_whence = 0;
317 317 bf.l_start = (off64_t)ava.va_size;
318 318 bf.l_len = 0;
319 319 bf.l_sysid = 0;
320 320 bf.l_pid = 0;
321 321 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
322 322 (offset_t)ava.va_size, cr, &ct);
323 323 }
324 324 }
325 325
326 326 if (!error && ava.va_mask)
327 327 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
328 328
329 329 /* check if a monitor detected a delegation conflict */
330 330 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
331 331 resp->status = NFS3ERR_JUKEBOX;
332 332 goto out1;
333 333 }
334 334
335 335 ava.va_mask = AT_ALL;
336 336 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
337 337
338 338 /*
339 339 * Force modified metadata out to stable storage.
340 340 */
341 341 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
342 342
343 343 if (error)
344 344 goto out;
345 345
346 346 if (in_crit)
347 347 nbl_end_crit(vp);
348 348
349 349 resp->status = NFS3_OK;
350 350 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
351 351
352 352 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
353 353 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
354 354 SETATTR3res *, resp);
355 355
356 356 VN_RELE(vp);
357 357
358 358 return;
359 359
360 360 out:
361 361 if (curthread->t_flag & T_WOULDBLOCK) {
362 362 curthread->t_flag &= ~T_WOULDBLOCK;
363 363 resp->status = NFS3ERR_JUKEBOX;
364 364 } else
365 365 resp->status = puterrno3(error);
366 366 out1:
367 367 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
368 368 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
369 369 SETATTR3res *, resp);
370 370
371 371 if (vp != NULL) {
372 372 if (in_crit)
373 373 nbl_end_crit(vp);
374 374 VN_RELE(vp);
375 375 }
376 376 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
377 377 }
378 378
379 379 void *
380 380 rfs3_setattr_getfh(SETATTR3args *args)
381 381 {
382 382
383 383 return (&args->object);
384 384 }
385 385
386 386 /* ARGSUSED */
387 387 void
388 388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
389 389 struct svc_req *req, cred_t *cr, bool_t ro)
390 390 {
391 391 int error;
392 392 vnode_t *vp;
393 393 vnode_t *dvp;
394 394 struct vattr *vap;
395 395 struct vattr va;
396 396 struct vattr *dvap;
397 397 struct vattr dva;
398 398 nfs_fh3 *fhp;
399 399 struct sec_ol sec = {0, 0};
400 400 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
401 401 struct sockaddr *ca;
402 402 char *name = NULL;
403 403
|
↓ open down ↓ |
403 lines elided |
↑ open up ↑ |
404 404 dvap = NULL;
405 405
406 406 if (exi != NULL)
407 407 exi_hold(exi);
408 408
409 409 /*
410 410 * Allow lookups from the root - the default
411 411 * location of the public filehandle.
412 412 */
413 413 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
414 + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
414 415 dvp = ZONE_ROOTVP();
415 416 VN_HOLD(dvp);
416 417
417 418 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
418 419 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
419 420 LOOKUP3args *, args);
420 421 } else {
421 422 dvp = nfs3_fhtovp(&args->what.dir, exi);
422 423
423 424 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
424 425 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
425 426 LOOKUP3args *, args);
426 427
427 428 if (dvp == NULL) {
428 429 error = ESTALE;
429 430 goto out;
430 431 }
431 432 }
432 433
433 434 dva.va_mask = AT_ALL;
434 435 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
435 436
436 437 if (args->what.name == nfs3nametoolong) {
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
437 438 resp->status = NFS3ERR_NAMETOOLONG;
438 439 goto out1;
439 440 }
440 441
441 442 if (args->what.name == NULL || *(args->what.name) == '\0') {
442 443 resp->status = NFS3ERR_ACCES;
443 444 goto out1;
444 445 }
445 446
446 447 fhp = &args->what.dir;
448 + ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */
447 449 if (strcmp(args->what.name, "..") == 0 &&
448 450 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
449 451 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
450 - (dvp->v_flag & VROOT)) {
452 + ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
451 453 /*
452 454 * special case for ".." and 'nohide'exported root
453 455 */
454 456 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
455 457 resp->status = NFS3ERR_ACCES;
456 458 goto out1;
457 459 }
458 460 } else {
459 461 resp->status = NFS3ERR_NOENT;
460 462 goto out1;
461 463 }
462 464 }
463 465
464 466 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
465 467 name = nfscmd_convname(ca, exi, args->what.name,
466 468 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
467 469
468 470 if (name == NULL) {
469 471 resp->status = NFS3ERR_ACCES;
470 472 goto out1;
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
471 473 }
472 474
473 475 /*
474 476 * If the public filehandle is used then allow
475 477 * a multi-component lookup
476 478 */
477 479 if (PUBLIC_FH3(&args->what.dir)) {
478 480 publicfh_flag = TRUE;
479 481
480 482 exi_rele(exi);
483 + exi = NULL;
481 484
482 485 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
483 486 &exi, &sec);
484 487
485 488 /*
486 489 * Since WebNFS may bypass MOUNT, we need to ensure this
487 490 * request didn't come from an unlabeled admin_low client.
488 491 */
489 492 if (is_system_labeled() && error == 0) {
490 493 int addr_type;
491 494 void *ipaddr;
492 495 tsol_tpc_t *tp;
493 496
494 497 if (ca->sa_family == AF_INET) {
495 498 addr_type = IPV4_VERSION;
496 499 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
497 500 } else if (ca->sa_family == AF_INET6) {
498 501 addr_type = IPV6_VERSION;
499 502 ipaddr = &((struct sockaddr_in6 *)
500 503 ca)->sin6_addr;
501 504 }
502 505 tp = find_tpc(ipaddr, addr_type, B_FALSE);
503 506 if (tp == NULL || tp->tpc_tp.tp_doi !=
504 507 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
505 508 SUN_CIPSO) {
506 509 VN_RELE(vp);
507 510 error = EACCES;
508 511 }
509 512 if (tp != NULL)
510 513 TPC_RELE(tp);
511 514 }
512 515 } else {
513 516 error = VOP_LOOKUP(dvp, name, &vp,
514 517 NULL, 0, NULL, cr, NULL, NULL, NULL);
515 518 }
516 519
517 520 if (name != args->what.name)
518 521 kmem_free(name, MAXPATHLEN + 1);
519 522
520 523 if (error == 0 && vn_ismntpt(vp)) {
521 524 error = rfs_cross_mnt(&vp, &exi);
522 525 if (error)
523 526 VN_RELE(vp);
524 527 }
525 528
526 529 if (is_system_labeled() && error == 0) {
527 530 bslabel_t *clabel = req->rq_label;
528 531
529 532 ASSERT(clabel != NULL);
530 533 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
531 534 "got client label from request(1)", struct svc_req *, req);
532 535
533 536 if (!blequal(&l_admin_low->tsl_label, clabel)) {
534 537 if (!do_rfs_label_check(clabel, dvp,
535 538 DOMINANCE_CHECK, exi)) {
536 539 VN_RELE(vp);
537 540 error = EACCES;
538 541 }
539 542 }
540 543 }
541 544
542 545 dva.va_mask = AT_ALL;
543 546 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
544 547
545 548 if (error)
546 549 goto out;
547 550
548 551 if (sec.sec_flags & SEC_QUERY) {
549 552 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
550 553 } else {
551 554 error = makefh3(&resp->resok.object, vp, exi);
552 555 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
553 556 auth_weak = TRUE;
|
↓ open down ↓ |
63 lines elided |
↑ open up ↑ |
554 557 }
555 558
556 559 if (error) {
557 560 VN_RELE(vp);
558 561 goto out;
559 562 }
560 563
561 564 va.va_mask = AT_ALL;
562 565 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
563 566
564 - exi_rele(exi);
565 567 VN_RELE(vp);
566 568
567 569 resp->status = NFS3_OK;
568 570 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
569 571 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
570 572
571 573 /*
572 574 * If it's public fh, no 0x81, and client's flavor is
573 575 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
574 576 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
575 577 */
576 578 if (auth_weak)
577 579 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
578 580
579 581 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
580 582 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
581 583 LOOKUP3res *, resp);
582 584 VN_RELE(dvp);
585 + exi_rele(exi);
583 586
584 587 return;
585 588
586 589 out:
587 590 if (curthread->t_flag & T_WOULDBLOCK) {
588 591 curthread->t_flag &= ~T_WOULDBLOCK;
589 592 resp->status = NFS3ERR_JUKEBOX;
590 593 } else
591 594 resp->status = puterrno3(error);
592 595 out1:
593 596 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
594 597 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
595 598 LOOKUP3res *, resp);
596 599
597 600 if (exi != NULL)
598 601 exi_rele(exi);
599 602
600 603 if (dvp != NULL)
601 604 VN_RELE(dvp);
602 605 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
603 606
604 607 }
605 608
606 609 void *
607 610 rfs3_lookup_getfh(LOOKUP3args *args)
608 611 {
609 612
610 613 return (&args->what.dir);
611 614 }
612 615
613 616 /* ARGSUSED */
614 617 void
615 618 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
616 619 struct svc_req *req, cred_t *cr, bool_t ro)
617 620 {
618 621 int error;
619 622 vnode_t *vp;
620 623 struct vattr *vap;
621 624 struct vattr va;
622 625 int checkwriteperm;
623 626 boolean_t dominant_label = B_FALSE;
624 627 boolean_t equal_label = B_FALSE;
625 628 boolean_t admin_low_client;
626 629
627 630 vap = NULL;
628 631
629 632 vp = nfs3_fhtovp(&args->object, exi);
630 633
631 634 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
632 635 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
633 636 ACCESS3args *, args);
634 637
635 638 if (vp == NULL) {
636 639 error = ESTALE;
637 640 goto out;
638 641 }
639 642
640 643 /*
641 644 * If the file system is exported read only, it is not appropriate
642 645 * to check write permissions for regular files and directories.
643 646 * Special files are interpreted by the client, so the underlying
644 647 * permissions are sent back to the client for interpretation.
645 648 */
646 649 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
647 650 checkwriteperm = 0;
648 651 else
649 652 checkwriteperm = 1;
650 653
651 654 /*
652 655 * We need the mode so that we can correctly determine access
653 656 * permissions relative to a mandatory lock file. Access to
654 657 * mandatory lock files is denied on the server, so it might
655 658 * as well be reflected to the server during the open.
656 659 */
657 660 va.va_mask = AT_MODE;
658 661 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
659 662 if (error)
660 663 goto out;
661 664
662 665 vap = &va;
663 666
664 667 resp->resok.access = 0;
665 668
666 669 if (is_system_labeled()) {
667 670 bslabel_t *clabel = req->rq_label;
668 671
669 672 ASSERT(clabel != NULL);
670 673 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
671 674 "got client label from request(1)", struct svc_req *, req);
672 675
673 676 if (!blequal(&l_admin_low->tsl_label, clabel)) {
674 677 if ((equal_label = do_rfs_label_check(clabel, vp,
675 678 EQUALITY_CHECK, exi)) == B_FALSE) {
676 679 dominant_label = do_rfs_label_check(clabel,
677 680 vp, DOMINANCE_CHECK, exi);
678 681 } else
679 682 dominant_label = B_TRUE;
680 683 admin_low_client = B_FALSE;
681 684 } else
682 685 admin_low_client = B_TRUE;
683 686 }
684 687
685 688 if (args->access & ACCESS3_READ) {
686 689 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
687 690 if (error) {
688 691 if (curthread->t_flag & T_WOULDBLOCK)
689 692 goto out;
690 693 } else if (!MANDLOCK(vp, va.va_mode) &&
691 694 (!is_system_labeled() || admin_low_client ||
692 695 dominant_label))
693 696 resp->resok.access |= ACCESS3_READ;
694 697 }
695 698 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
696 699 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
697 700 if (error) {
698 701 if (curthread->t_flag & T_WOULDBLOCK)
699 702 goto out;
700 703 } else if (!is_system_labeled() || admin_low_client ||
701 704 dominant_label)
702 705 resp->resok.access |= ACCESS3_LOOKUP;
703 706 }
704 707 if (checkwriteperm &&
705 708 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
706 709 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
707 710 if (error) {
708 711 if (curthread->t_flag & T_WOULDBLOCK)
709 712 goto out;
710 713 } else if (!MANDLOCK(vp, va.va_mode) &&
711 714 (!is_system_labeled() || admin_low_client || equal_label)) {
712 715 resp->resok.access |=
713 716 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
714 717 }
715 718 }
716 719 if (checkwriteperm &&
717 720 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
718 721 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
719 722 if (error) {
720 723 if (curthread->t_flag & T_WOULDBLOCK)
721 724 goto out;
722 725 } else if (!is_system_labeled() || admin_low_client ||
723 726 equal_label)
724 727 resp->resok.access |= ACCESS3_DELETE;
725 728 }
726 729 if (args->access & ACCESS3_EXECUTE) {
727 730 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
728 731 if (error) {
729 732 if (curthread->t_flag & T_WOULDBLOCK)
730 733 goto out;
731 734 } else if (!MANDLOCK(vp, va.va_mode) &&
732 735 (!is_system_labeled() || admin_low_client ||
733 736 dominant_label))
734 737 resp->resok.access |= ACCESS3_EXECUTE;
735 738 }
736 739
737 740 va.va_mask = AT_ALL;
738 741 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
739 742
740 743 resp->status = NFS3_OK;
741 744 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
742 745
743 746 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
744 747 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
745 748 ACCESS3res *, resp);
746 749
747 750 VN_RELE(vp);
748 751
749 752 return;
750 753
751 754 out:
752 755 if (curthread->t_flag & T_WOULDBLOCK) {
753 756 curthread->t_flag &= ~T_WOULDBLOCK;
754 757 resp->status = NFS3ERR_JUKEBOX;
755 758 } else
756 759 resp->status = puterrno3(error);
757 760 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
758 761 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
759 762 ACCESS3res *, resp);
760 763 if (vp != NULL)
761 764 VN_RELE(vp);
762 765 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
763 766 }
764 767
765 768 void *
766 769 rfs3_access_getfh(ACCESS3args *args)
767 770 {
768 771
769 772 return (&args->object);
770 773 }
771 774
772 775 /* ARGSUSED */
773 776 void
774 777 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
775 778 struct svc_req *req, cred_t *cr, bool_t ro)
776 779 {
777 780 int error;
778 781 vnode_t *vp;
779 782 struct vattr *vap;
780 783 struct vattr va;
781 784 struct iovec iov;
782 785 struct uio uio;
783 786 char *data;
784 787 struct sockaddr *ca;
785 788 char *name = NULL;
786 789 int is_referral = 0;
787 790
788 791 vap = NULL;
789 792
790 793 vp = nfs3_fhtovp(&args->symlink, exi);
791 794
792 795 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
793 796 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
794 797 READLINK3args *, args);
795 798
796 799 if (vp == NULL) {
797 800 error = ESTALE;
798 801 goto out;
799 802 }
800 803
801 804 va.va_mask = AT_ALL;
802 805 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
803 806 if (error)
804 807 goto out;
805 808
806 809 vap = &va;
807 810
808 811 /* We lied about the object type for a referral */
809 812 if (vn_is_nfs_reparse(vp, cr))
810 813 is_referral = 1;
811 814
812 815 if (vp->v_type != VLNK && !is_referral) {
813 816 resp->status = NFS3ERR_INVAL;
814 817 goto out1;
815 818 }
816 819
817 820 if (MANDLOCK(vp, va.va_mode)) {
818 821 resp->status = NFS3ERR_ACCES;
819 822 goto out1;
820 823 }
821 824
822 825 if (is_system_labeled()) {
823 826 bslabel_t *clabel = req->rq_label;
824 827
825 828 ASSERT(clabel != NULL);
826 829 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
827 830 "got client label from request(1)", struct svc_req *, req);
828 831
829 832 if (!blequal(&l_admin_low->tsl_label, clabel)) {
830 833 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
831 834 exi)) {
832 835 resp->status = NFS3ERR_ACCES;
833 836 goto out1;
834 837 }
835 838 }
836 839 }
837 840
838 841 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
839 842
840 843 if (is_referral) {
841 844 char *s;
842 845 size_t strsz;
843 846
844 847 /* Get an artificial symlink based on a referral */
845 848 s = build_symlink(vp, cr, &strsz);
846 849 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
847 850 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
848 851 vnode_t *, vp, char *, s);
849 852 if (s == NULL)
850 853 error = EINVAL;
851 854 else {
852 855 error = 0;
853 856 (void) strlcpy(data, s, MAXPATHLEN + 1);
854 857 kmem_free(s, strsz);
855 858 }
856 859
857 860 } else {
858 861
859 862 iov.iov_base = data;
860 863 iov.iov_len = MAXPATHLEN;
861 864 uio.uio_iov = &iov;
862 865 uio.uio_iovcnt = 1;
863 866 uio.uio_segflg = UIO_SYSSPACE;
864 867 uio.uio_extflg = UIO_COPY_CACHED;
865 868 uio.uio_loffset = 0;
866 869 uio.uio_resid = MAXPATHLEN;
867 870
868 871 error = VOP_READLINK(vp, &uio, cr, NULL);
869 872
870 873 if (!error)
871 874 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
872 875 }
873 876
874 877 va.va_mask = AT_ALL;
875 878 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
876 879
877 880 /* Lie about object type again just to be consistent */
878 881 if (is_referral && vap != NULL)
879 882 vap->va_type = VLNK;
880 883
881 884 #if 0 /* notyet */
882 885 /*
883 886 * Don't do this. It causes local disk writes when just
884 887 * reading the file and the overhead is deemed larger
885 888 * than the benefit.
886 889 */
887 890 /*
888 891 * Force modified metadata out to stable storage.
889 892 */
890 893 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
891 894 #endif
892 895
893 896 if (error) {
894 897 kmem_free(data, MAXPATHLEN + 1);
895 898 goto out;
896 899 }
897 900
898 901 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
899 902 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
900 903 MAXPATHLEN + 1);
901 904
902 905 if (name == NULL) {
903 906 /*
904 907 * Even though the conversion failed, we return
905 908 * something. We just don't translate it.
906 909 */
907 910 name = data;
908 911 }
909 912
910 913 resp->status = NFS3_OK;
911 914 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
912 915 resp->resok.data = name;
913 916
914 917 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
915 918 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
916 919 READLINK3res *, resp);
917 920 VN_RELE(vp);
918 921
919 922 if (name != data)
920 923 kmem_free(data, MAXPATHLEN + 1);
921 924
922 925 return;
923 926
924 927 out:
925 928 if (curthread->t_flag & T_WOULDBLOCK) {
926 929 curthread->t_flag &= ~T_WOULDBLOCK;
927 930 resp->status = NFS3ERR_JUKEBOX;
928 931 } else
929 932 resp->status = puterrno3(error);
930 933 out1:
931 934 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
932 935 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
933 936 READLINK3res *, resp);
934 937 if (vp != NULL)
935 938 VN_RELE(vp);
936 939 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
937 940 }
938 941
939 942 void *
940 943 rfs3_readlink_getfh(READLINK3args *args)
941 944 {
942 945
943 946 return (&args->symlink);
944 947 }
945 948
946 949 void
947 950 rfs3_readlink_free(READLINK3res *resp)
948 951 {
949 952
950 953 if (resp->status == NFS3_OK)
951 954 kmem_free(resp->resok.data, MAXPATHLEN + 1);
952 955 }
953 956
954 957 /*
955 958 * Server routine to handle read
956 959 * May handle RDMA data as well as mblks
957 960 */
958 961 /* ARGSUSED */
959 962 void
960 963 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
961 964 struct svc_req *req, cred_t *cr, bool_t ro)
962 965 {
963 966 int error;
964 967 vnode_t *vp;
965 968 struct vattr *vap;
966 969 struct vattr va;
967 970 struct iovec iov, *iovp = NULL;
968 971 int iovcnt;
969 972 struct uio uio;
970 973 u_offset_t offset;
971 974 mblk_t *mp = NULL;
972 975 int in_crit = 0;
973 976 int need_rwunlock = 0;
974 977 caller_context_t ct;
975 978 int rdma_used = 0;
976 979 int loaned_buffers;
977 980 struct uio *uiop;
978 981
979 982 vap = NULL;
980 983
981 984 vp = nfs3_fhtovp(&args->file, exi);
982 985
983 986 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
984 987 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
985 988 READ3args *, args);
986 989
987 990
988 991 if (vp == NULL) {
989 992 error = ESTALE;
990 993 goto out;
991 994 }
992 995
993 996 if (args->wlist) {
994 997 if (args->count > clist_len(args->wlist)) {
995 998 error = EINVAL;
996 999 goto out;
997 1000 }
998 1001 rdma_used = 1;
999 1002 }
1000 1003
1001 1004 /* use loaned buffers for TCP */
1002 1005 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1003 1006
1004 1007 if (is_system_labeled()) {
1005 1008 bslabel_t *clabel = req->rq_label;
1006 1009
1007 1010 ASSERT(clabel != NULL);
1008 1011 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1009 1012 "got client label from request(1)", struct svc_req *, req);
1010 1013
1011 1014 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1012 1015 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1013 1016 exi)) {
1014 1017 resp->status = NFS3ERR_ACCES;
1015 1018 goto out1;
1016 1019 }
1017 1020 }
1018 1021 }
1019 1022
1020 1023 ct.cc_sysid = 0;
1021 1024 ct.cc_pid = 0;
1022 1025 ct.cc_caller_id = nfs3_srv_caller_id;
1023 1026 ct.cc_flags = CC_DONTBLOCK;
1024 1027
1025 1028 /*
1026 1029 * Enter the critical region before calling VOP_RWLOCK
1027 1030 * to avoid a deadlock with write requests.
1028 1031 */
1029 1032 if (nbl_need_check(vp)) {
1030 1033 nbl_start_crit(vp, RW_READER);
1031 1034 in_crit = 1;
1032 1035 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1033 1036 NULL)) {
1034 1037 error = EACCES;
1035 1038 goto out;
1036 1039 }
1037 1040 }
1038 1041
1039 1042 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1040 1043
1041 1044 /* check if a monitor detected a delegation conflict */
1042 1045 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1043 1046 resp->status = NFS3ERR_JUKEBOX;
1044 1047 goto out1;
1045 1048 }
1046 1049
1047 1050 need_rwunlock = 1;
1048 1051
1049 1052 va.va_mask = AT_ALL;
1050 1053 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1051 1054
1052 1055 /*
1053 1056 * If we can't get the attributes, then we can't do the
1054 1057 * right access checking. So, we'll fail the request.
1055 1058 */
1056 1059 if (error)
1057 1060 goto out;
1058 1061
1059 1062 vap = &va;
1060 1063
1061 1064 if (vp->v_type != VREG) {
1062 1065 resp->status = NFS3ERR_INVAL;
1063 1066 goto out1;
1064 1067 }
1065 1068
1066 1069 if (crgetuid(cr) != va.va_uid) {
1067 1070 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1068 1071 if (error) {
1069 1072 if (curthread->t_flag & T_WOULDBLOCK)
1070 1073 goto out;
1071 1074 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1072 1075 if (error)
1073 1076 goto out;
1074 1077 }
1075 1078 }
1076 1079
1077 1080 if (MANDLOCK(vp, va.va_mode)) {
1078 1081 resp->status = NFS3ERR_ACCES;
1079 1082 goto out1;
1080 1083 }
1081 1084
1082 1085 offset = args->offset;
1083 1086 if (offset >= va.va_size) {
1084 1087 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1085 1088 if (in_crit)
1086 1089 nbl_end_crit(vp);
1087 1090 resp->status = NFS3_OK;
1088 1091 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1089 1092 resp->resok.count = 0;
1090 1093 resp->resok.eof = TRUE;
1091 1094 resp->resok.data.data_len = 0;
1092 1095 resp->resok.data.data_val = NULL;
1093 1096 resp->resok.data.mp = NULL;
1094 1097 /* RDMA */
1095 1098 resp->resok.wlist = args->wlist;
1096 1099 resp->resok.wlist_len = resp->resok.count;
1097 1100 if (resp->resok.wlist)
1098 1101 clist_zero_len(resp->resok.wlist);
1099 1102 goto done;
1100 1103 }
1101 1104
1102 1105 if (args->count == 0) {
1103 1106 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1104 1107 if (in_crit)
1105 1108 nbl_end_crit(vp);
1106 1109 resp->status = NFS3_OK;
1107 1110 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1108 1111 resp->resok.count = 0;
1109 1112 resp->resok.eof = FALSE;
1110 1113 resp->resok.data.data_len = 0;
1111 1114 resp->resok.data.data_val = NULL;
1112 1115 resp->resok.data.mp = NULL;
1113 1116 /* RDMA */
1114 1117 resp->resok.wlist = args->wlist;
1115 1118 resp->resok.wlist_len = resp->resok.count;
1116 1119 if (resp->resok.wlist)
1117 1120 clist_zero_len(resp->resok.wlist);
1118 1121 goto done;
1119 1122 }
1120 1123
1121 1124 /*
1122 1125 * do not allocate memory more the max. allowed
1123 1126 * transfer size
1124 1127 */
1125 1128 if (args->count > rfs3_tsize(req))
1126 1129 args->count = rfs3_tsize(req);
1127 1130
1128 1131 if (loaned_buffers) {
1129 1132 uiop = (uio_t *)rfs_setup_xuio(vp);
1130 1133 ASSERT(uiop != NULL);
1131 1134 uiop->uio_segflg = UIO_SYSSPACE;
1132 1135 uiop->uio_loffset = args->offset;
1133 1136 uiop->uio_resid = args->count;
1134 1137
1135 1138 /* Jump to do the read if successful */
1136 1139 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1137 1140 /*
1138 1141 * Need to hold the vnode until after VOP_RETZCBUF()
1139 1142 * is called.
1140 1143 */
1141 1144 VN_HOLD(vp);
1142 1145 goto doio_read;
1143 1146 }
1144 1147
1145 1148 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1146 1149 uiop->uio_loffset, int, uiop->uio_resid);
1147 1150
1148 1151 uiop->uio_extflg = 0;
1149 1152 /* failure to setup for zero copy */
1150 1153 rfs_free_xuio((void *)uiop);
1151 1154 loaned_buffers = 0;
1152 1155 }
1153 1156
1154 1157 /*
1155 1158 * If returning data via RDMA Write, then grab the chunk list.
1156 1159 * If we aren't returning READ data w/RDMA_WRITE, then grab
1157 1160 * a mblk.
1158 1161 */
1159 1162 if (rdma_used) {
1160 1163 (void) rdma_get_wchunk(req, &iov, args->wlist);
1161 1164 uio.uio_iov = &iov;
1162 1165 uio.uio_iovcnt = 1;
1163 1166 } else {
1164 1167 /*
1165 1168 * mp will contain the data to be sent out in the read reply.
1166 1169 * For UDP, this will be freed after the reply has been sent
1167 1170 * out by the driver. For TCP, it will be freed after the last
1168 1171 * segment associated with the reply has been ACKed by the
1169 1172 * client.
1170 1173 */
1171 1174 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1172 1175 uio.uio_iov = iovp;
1173 1176 uio.uio_iovcnt = iovcnt;
1174 1177 }
1175 1178
1176 1179 uio.uio_segflg = UIO_SYSSPACE;
1177 1180 uio.uio_extflg = UIO_COPY_CACHED;
1178 1181 uio.uio_loffset = args->offset;
1179 1182 uio.uio_resid = args->count;
1180 1183 uiop = &uio;
1181 1184
1182 1185 doio_read:
1183 1186 error = VOP_READ(vp, uiop, 0, cr, &ct);
1184 1187
1185 1188 if (error) {
1186 1189 if (mp)
1187 1190 freemsg(mp);
1188 1191 /* check if a monitor detected a delegation conflict */
1189 1192 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1190 1193 resp->status = NFS3ERR_JUKEBOX;
1191 1194 goto out1;
1192 1195 }
1193 1196 goto out;
1194 1197 }
1195 1198
1196 1199 /* make mblk using zc buffers */
1197 1200 if (loaned_buffers) {
1198 1201 mp = uio_to_mblk(uiop);
1199 1202 ASSERT(mp != NULL);
1200 1203 }
1201 1204
1202 1205 va.va_mask = AT_ALL;
1203 1206 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1204 1207
1205 1208 if (error)
1206 1209 vap = NULL;
1207 1210 else
1208 1211 vap = &va;
1209 1212
1210 1213 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1211 1214
1212 1215 if (in_crit)
1213 1216 nbl_end_crit(vp);
1214 1217
1215 1218 resp->status = NFS3_OK;
1216 1219 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1217 1220 resp->resok.count = args->count - uiop->uio_resid;
1218 1221 if (!error && offset + resp->resok.count == va.va_size)
1219 1222 resp->resok.eof = TRUE;
1220 1223 else
1221 1224 resp->resok.eof = FALSE;
1222 1225 resp->resok.data.data_len = resp->resok.count;
1223 1226
1224 1227 if (mp)
1225 1228 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1226 1229
1227 1230 resp->resok.data.mp = mp;
1228 1231 resp->resok.size = (uint_t)args->count;
1229 1232
1230 1233 if (rdma_used) {
1231 1234 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1232 1235 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1233 1236 resp->status = NFS3ERR_INVAL;
1234 1237 }
1235 1238 } else {
1236 1239 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1237 1240 (resp->resok).wlist = NULL;
1238 1241 }
1239 1242
1240 1243 done:
1241 1244 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1242 1245 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1243 1246 READ3res *, resp);
1244 1247
1245 1248 VN_RELE(vp);
1246 1249
1247 1250 if (iovp != NULL)
1248 1251 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1249 1252
1250 1253 return;
1251 1254
1252 1255 out:
1253 1256 if (curthread->t_flag & T_WOULDBLOCK) {
1254 1257 curthread->t_flag &= ~T_WOULDBLOCK;
1255 1258 resp->status = NFS3ERR_JUKEBOX;
1256 1259 } else
1257 1260 resp->status = puterrno3(error);
1258 1261 out1:
1259 1262 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1260 1263 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1261 1264 READ3res *, resp);
1262 1265
1263 1266 if (vp != NULL) {
1264 1267 if (need_rwunlock)
1265 1268 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1266 1269 if (in_crit)
1267 1270 nbl_end_crit(vp);
1268 1271 VN_RELE(vp);
1269 1272 }
1270 1273 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1271 1274
1272 1275 if (iovp != NULL)
1273 1276 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1274 1277 }
1275 1278
1276 1279 void
1277 1280 rfs3_read_free(READ3res *resp)
1278 1281 {
1279 1282 mblk_t *mp;
1280 1283
1281 1284 if (resp->status == NFS3_OK) {
1282 1285 mp = resp->resok.data.mp;
1283 1286 if (mp != NULL)
1284 1287 freemsg(mp);
1285 1288 }
1286 1289 }
1287 1290
1288 1291 void *
1289 1292 rfs3_read_getfh(READ3args *args)
1290 1293 {
1291 1294
1292 1295 return (&args->file);
1293 1296 }
1294 1297
1295 1298 #define MAX_IOVECS 12
1296 1299
1297 1300 #ifdef DEBUG
1298 1301 static int rfs3_write_hits = 0;
1299 1302 static int rfs3_write_misses = 0;
1300 1303 #endif
1301 1304
1302 1305 void
1303 1306 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1304 1307 struct svc_req *req, cred_t *cr, bool_t ro)
1305 1308 {
1306 1309 nfs3_srv_t *ns;
1307 1310 int error;
1308 1311 vnode_t *vp;
1309 1312 struct vattr *bvap = NULL;
1310 1313 struct vattr bva;
1311 1314 struct vattr *avap = NULL;
1312 1315 struct vattr ava;
1313 1316 u_offset_t rlimit;
1314 1317 struct uio uio;
1315 1318 struct iovec iov[MAX_IOVECS];
1316 1319 mblk_t *m;
1317 1320 struct iovec *iovp;
1318 1321 int iovcnt;
1319 1322 int ioflag;
1320 1323 cred_t *savecred;
1321 1324 int in_crit = 0;
1322 1325 int rwlock_ret = -1;
1323 1326 caller_context_t ct;
1324 1327
1325 1328 vp = nfs3_fhtovp(&args->file, exi);
|
↓ open down ↓ |
733 lines elided |
↑ open up ↑ |
1326 1329
1327 1330 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1328 1331 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1329 1332 WRITE3args *, args);
1330 1333
1331 1334 if (vp == NULL) {
1332 1335 error = ESTALE;
1333 1336 goto err;
1334 1337 }
1335 1338
1339 + ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
1336 1340 ns = nfs3_get_srv();
1341 +
1337 1342 if (is_system_labeled()) {
1338 1343 bslabel_t *clabel = req->rq_label;
1339 1344
1340 1345 ASSERT(clabel != NULL);
1341 1346 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1342 1347 "got client label from request(1)", struct svc_req *, req);
1343 1348
1344 1349 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1345 1350 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1346 1351 exi)) {
1347 1352 resp->status = NFS3ERR_ACCES;
1348 1353 goto err1;
1349 1354 }
1350 1355 }
1351 1356 }
1352 1357
1353 1358 ct.cc_sysid = 0;
1354 1359 ct.cc_pid = 0;
1355 1360 ct.cc_caller_id = nfs3_srv_caller_id;
1356 1361 ct.cc_flags = CC_DONTBLOCK;
1357 1362
1358 1363 /*
1359 1364 * We have to enter the critical region before calling VOP_RWLOCK
1360 1365 * to avoid a deadlock with ufs.
1361 1366 */
1362 1367 if (nbl_need_check(vp)) {
1363 1368 nbl_start_crit(vp, RW_READER);
1364 1369 in_crit = 1;
1365 1370 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1366 1371 NULL)) {
1367 1372 error = EACCES;
1368 1373 goto err;
1369 1374 }
1370 1375 }
1371 1376
1372 1377 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1373 1378
1374 1379 /* check if a monitor detected a delegation conflict */
1375 1380 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1376 1381 resp->status = NFS3ERR_JUKEBOX;
1377 1382 rwlock_ret = -1;
1378 1383 goto err1;
1379 1384 }
1380 1385
1381 1386
1382 1387 bva.va_mask = AT_ALL;
1383 1388 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1384 1389
1385 1390 /*
1386 1391 * If we can't get the attributes, then we can't do the
1387 1392 * right access checking. So, we'll fail the request.
1388 1393 */
1389 1394 if (error)
1390 1395 goto err;
1391 1396
1392 1397 bvap = &bva;
1393 1398 avap = bvap;
1394 1399
1395 1400 if (args->count != args->data.data_len) {
1396 1401 resp->status = NFS3ERR_INVAL;
1397 1402 goto err1;
1398 1403 }
1399 1404
1400 1405 if (rdonly(ro, vp)) {
1401 1406 resp->status = NFS3ERR_ROFS;
1402 1407 goto err1;
1403 1408 }
1404 1409
1405 1410 if (vp->v_type != VREG) {
1406 1411 resp->status = NFS3ERR_INVAL;
1407 1412 goto err1;
1408 1413 }
1409 1414
1410 1415 if (crgetuid(cr) != bva.va_uid &&
1411 1416 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1412 1417 goto err;
1413 1418
1414 1419 if (MANDLOCK(vp, bva.va_mode)) {
1415 1420 resp->status = NFS3ERR_ACCES;
1416 1421 goto err1;
1417 1422 }
1418 1423
1419 1424 if (args->count == 0) {
1420 1425 resp->status = NFS3_OK;
1421 1426 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1422 1427 resp->resok.count = 0;
1423 1428 resp->resok.committed = args->stable;
1424 1429 resp->resok.verf = ns->write3verf;
1425 1430 goto out;
1426 1431 }
1427 1432
1428 1433 if (args->mblk != NULL) {
1429 1434 iovcnt = 0;
1430 1435 for (m = args->mblk; m != NULL; m = m->b_cont)
1431 1436 iovcnt++;
1432 1437 if (iovcnt <= MAX_IOVECS) {
1433 1438 #ifdef DEBUG
1434 1439 rfs3_write_hits++;
1435 1440 #endif
1436 1441 iovp = iov;
1437 1442 } else {
1438 1443 #ifdef DEBUG
1439 1444 rfs3_write_misses++;
1440 1445 #endif
1441 1446 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1442 1447 }
1443 1448 mblk_to_iov(args->mblk, iovcnt, iovp);
1444 1449
1445 1450 } else if (args->rlist != NULL) {
1446 1451 iovcnt = 1;
1447 1452 iovp = iov;
1448 1453 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1449 1454 iovp->iov_len = args->count;
1450 1455 } else {
1451 1456 iovcnt = 1;
1452 1457 iovp = iov;
1453 1458 iovp->iov_base = args->data.data_val;
1454 1459 iovp->iov_len = args->count;
1455 1460 }
1456 1461
1457 1462 uio.uio_iov = iovp;
1458 1463 uio.uio_iovcnt = iovcnt;
1459 1464
1460 1465 uio.uio_segflg = UIO_SYSSPACE;
1461 1466 uio.uio_extflg = UIO_COPY_DEFAULT;
1462 1467 uio.uio_loffset = args->offset;
1463 1468 uio.uio_resid = args->count;
1464 1469 uio.uio_llimit = curproc->p_fsz_ctl;
1465 1470 rlimit = uio.uio_llimit - args->offset;
1466 1471 if (rlimit < (u_offset_t)uio.uio_resid)
1467 1472 uio.uio_resid = (int)rlimit;
1468 1473
1469 1474 if (args->stable == UNSTABLE)
1470 1475 ioflag = 0;
1471 1476 else if (args->stable == FILE_SYNC)
1472 1477 ioflag = FSYNC;
1473 1478 else if (args->stable == DATA_SYNC)
1474 1479 ioflag = FDSYNC;
1475 1480 else {
1476 1481 if (iovp != iov)
1477 1482 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1478 1483 resp->status = NFS3ERR_INVAL;
1479 1484 goto err1;
1480 1485 }
1481 1486
1482 1487 /*
1483 1488 * We're changing creds because VM may fault and we need
1484 1489 * the cred of the current thread to be used if quota
1485 1490 * checking is enabled.
1486 1491 */
1487 1492 savecred = curthread->t_cred;
1488 1493 curthread->t_cred = cr;
1489 1494 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1490 1495 curthread->t_cred = savecred;
1491 1496
1492 1497 if (iovp != iov)
1493 1498 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1494 1499
1495 1500 /* check if a monitor detected a delegation conflict */
1496 1501 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1497 1502 resp->status = NFS3ERR_JUKEBOX;
1498 1503 goto err1;
1499 1504 }
1500 1505
1501 1506 ava.va_mask = AT_ALL;
1502 1507 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1503 1508
1504 1509 if (error)
1505 1510 goto err;
1506 1511
1507 1512 /*
1508 1513 * If we were unable to get the V_WRITELOCK_TRUE, then we
1509 1514 * may not have accurate after attrs, so check if
1510 1515 * we have both attributes, they have a non-zero va_seq, and
1511 1516 * va_seq has changed by exactly one,
1512 1517 * if not, turn off the before attr.
1513 1518 */
1514 1519 if (rwlock_ret != V_WRITELOCK_TRUE) {
1515 1520 if (bvap == NULL || avap == NULL ||
1516 1521 bvap->va_seq == 0 || avap->va_seq == 0 ||
1517 1522 avap->va_seq != (bvap->va_seq + 1)) {
1518 1523 bvap = NULL;
1519 1524 }
1520 1525 }
1521 1526
1522 1527 resp->status = NFS3_OK;
1523 1528 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1524 1529 resp->resok.count = args->count - uio.uio_resid;
1525 1530 resp->resok.committed = args->stable;
1526 1531 resp->resok.verf = ns->write3verf;
1527 1532 goto out;
1528 1533
1529 1534 err:
1530 1535 if (curthread->t_flag & T_WOULDBLOCK) {
1531 1536 curthread->t_flag &= ~T_WOULDBLOCK;
1532 1537 resp->status = NFS3ERR_JUKEBOX;
1533 1538 } else
1534 1539 resp->status = puterrno3(error);
1535 1540 err1:
1536 1541 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1537 1542 out:
1538 1543 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1539 1544 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1540 1545 WRITE3res *, resp);
1541 1546
1542 1547 if (vp != NULL) {
1543 1548 if (rwlock_ret != -1)
1544 1549 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1545 1550 if (in_crit)
1546 1551 nbl_end_crit(vp);
1547 1552 VN_RELE(vp);
1548 1553 }
1549 1554 }
1550 1555
1551 1556 void *
1552 1557 rfs3_write_getfh(WRITE3args *args)
1553 1558 {
1554 1559
1555 1560 return (&args->file);
1556 1561 }
1557 1562
1558 1563 void
1559 1564 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1560 1565 struct svc_req *req, cred_t *cr, bool_t ro)
1561 1566 {
1562 1567 int error;
1563 1568 int in_crit = 0;
1564 1569 vnode_t *vp;
1565 1570 vnode_t *tvp = NULL;
1566 1571 vnode_t *dvp;
1567 1572 struct vattr *vap;
1568 1573 struct vattr va;
1569 1574 struct vattr *dbvap;
1570 1575 struct vattr dbva;
1571 1576 struct vattr *davap;
1572 1577 struct vattr dava;
1573 1578 enum vcexcl excl;
1574 1579 nfstime3 *mtime;
1575 1580 len_t reqsize;
1576 1581 bool_t trunc;
1577 1582 struct sockaddr *ca;
1578 1583 char *name = NULL;
1579 1584
1580 1585 dbvap = NULL;
1581 1586 davap = NULL;
1582 1587
1583 1588 dvp = nfs3_fhtovp(&args->where.dir, exi);
1584 1589
1585 1590 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1586 1591 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1587 1592 CREATE3args *, args);
1588 1593
1589 1594 if (dvp == NULL) {
1590 1595 error = ESTALE;
1591 1596 goto out;
1592 1597 }
1593 1598
1594 1599 dbva.va_mask = AT_ALL;
1595 1600 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1596 1601 davap = dbvap;
1597 1602
1598 1603 if (args->where.name == nfs3nametoolong) {
1599 1604 resp->status = NFS3ERR_NAMETOOLONG;
1600 1605 goto out1;
1601 1606 }
1602 1607
1603 1608 if (args->where.name == NULL || *(args->where.name) == '\0') {
1604 1609 resp->status = NFS3ERR_ACCES;
1605 1610 goto out1;
1606 1611 }
1607 1612
1608 1613 if (rdonly(ro, dvp)) {
1609 1614 resp->status = NFS3ERR_ROFS;
1610 1615 goto out1;
1611 1616 }
1612 1617
1613 1618 if (is_system_labeled()) {
1614 1619 bslabel_t *clabel = req->rq_label;
1615 1620
1616 1621 ASSERT(clabel != NULL);
1617 1622 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1618 1623 "got client label from request(1)", struct svc_req *, req);
1619 1624
1620 1625 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1621 1626 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1622 1627 exi)) {
1623 1628 resp->status = NFS3ERR_ACCES;
1624 1629 goto out1;
1625 1630 }
1626 1631 }
1627 1632 }
1628 1633
1629 1634 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1630 1635 name = nfscmd_convname(ca, exi, args->where.name,
1631 1636 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1632 1637
1633 1638 if (name == NULL) {
1634 1639 /* This is really a Solaris EILSEQ */
1635 1640 resp->status = NFS3ERR_INVAL;
1636 1641 goto out1;
1637 1642 }
1638 1643
1639 1644 if (args->how.mode == EXCLUSIVE) {
1640 1645 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1641 1646 va.va_type = VREG;
1642 1647 va.va_mode = (mode_t)0;
1643 1648 /*
1644 1649 * Ensure no time overflows and that types match
1645 1650 */
1646 1651 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1647 1652 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1648 1653 va.va_mtime.tv_nsec = mtime->nseconds;
1649 1654 excl = EXCL;
1650 1655 } else {
1651 1656 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1652 1657 &va);
1653 1658 if (error)
1654 1659 goto out;
1655 1660 va.va_mask |= AT_TYPE;
1656 1661 va.va_type = VREG;
1657 1662 if (args->how.mode == GUARDED)
1658 1663 excl = EXCL;
1659 1664 else {
1660 1665 excl = NONEXCL;
1661 1666
1662 1667 /*
1663 1668 * During creation of file in non-exclusive mode
1664 1669 * if size of file is being set then make sure
1665 1670 * that if the file already exists that no conflicting
1666 1671 * non-blocking mandatory locks exists in the region
1667 1672 * being modified. If there are conflicting locks fail
1668 1673 * the operation with EACCES.
1669 1674 */
1670 1675 if (va.va_mask & AT_SIZE) {
1671 1676 struct vattr tva;
1672 1677
1673 1678 /*
1674 1679 * Does file already exist?
1675 1680 */
1676 1681 error = VOP_LOOKUP(dvp, name, &tvp,
1677 1682 NULL, 0, NULL, cr, NULL, NULL, NULL);
1678 1683
1679 1684 /*
1680 1685 * Check to see if the file has been delegated
1681 1686 * to a v4 client. If so, then begin recall of
1682 1687 * the delegation and return JUKEBOX to allow
1683 1688 * the client to retrasmit its request.
1684 1689 */
1685 1690
1686 1691 trunc = va.va_size == 0;
1687 1692 if (!error &&
1688 1693 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1689 1694 resp->status = NFS3ERR_JUKEBOX;
1690 1695 goto out1;
1691 1696 }
1692 1697
1693 1698 /*
1694 1699 * Check for NBMAND lock conflicts
1695 1700 */
1696 1701 if (!error && nbl_need_check(tvp)) {
1697 1702 u_offset_t offset;
1698 1703 ssize_t len;
1699 1704
1700 1705 nbl_start_crit(tvp, RW_READER);
1701 1706 in_crit = 1;
1702 1707
1703 1708 tva.va_mask = AT_SIZE;
1704 1709 error = VOP_GETATTR(tvp, &tva, 0, cr,
1705 1710 NULL);
1706 1711 /*
1707 1712 * Can't check for conflicts, so return
1708 1713 * error.
1709 1714 */
1710 1715 if (error)
1711 1716 goto out;
1712 1717
1713 1718 offset = tva.va_size < va.va_size ?
1714 1719 tva.va_size : va.va_size;
1715 1720 len = tva.va_size < va.va_size ?
1716 1721 va.va_size - tva.va_size :
1717 1722 tva.va_size - va.va_size;
1718 1723 if (nbl_conflict(tvp, NBL_WRITE,
1719 1724 offset, len, 0, NULL)) {
1720 1725 error = EACCES;
1721 1726 goto out;
1722 1727 }
1723 1728 } else if (tvp) {
1724 1729 VN_RELE(tvp);
1725 1730 tvp = NULL;
1726 1731 }
1727 1732 }
1728 1733 }
1729 1734 if (va.va_mask & AT_SIZE)
1730 1735 reqsize = va.va_size;
1731 1736 }
1732 1737
1733 1738 /*
1734 1739 * Must specify the mode.
1735 1740 */
1736 1741 if (!(va.va_mask & AT_MODE)) {
1737 1742 resp->status = NFS3ERR_INVAL;
1738 1743 goto out1;
1739 1744 }
1740 1745
1741 1746 /*
1742 1747 * If the filesystem is exported with nosuid, then mask off
1743 1748 * the setuid and setgid bits.
1744 1749 */
1745 1750 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1746 1751 va.va_mode &= ~(VSUID | VSGID);
1747 1752
1748 1753 tryagain:
1749 1754 /*
1750 1755 * The file open mode used is VWRITE. If the client needs
1751 1756 * some other semantic, then it should do the access checking
1752 1757 * itself. It would have been nice to have the file open mode
1753 1758 * passed as part of the arguments.
1754 1759 */
1755 1760 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1756 1761 &vp, cr, 0, NULL, NULL);
1757 1762
1758 1763 dava.va_mask = AT_ALL;
1759 1764 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1760 1765
1761 1766 if (error) {
1762 1767 /*
1763 1768 * If we got something other than file already exists
1764 1769 * then just return this error. Otherwise, we got
1765 1770 * EEXIST. If we were doing a GUARDED create, then
1766 1771 * just return this error. Otherwise, we need to
1767 1772 * make sure that this wasn't a duplicate of an
1768 1773 * exclusive create request.
1769 1774 *
1770 1775 * The assumption is made that a non-exclusive create
1771 1776 * request will never return EEXIST.
1772 1777 */
1773 1778 if (error != EEXIST || args->how.mode == GUARDED)
1774 1779 goto out;
1775 1780 /*
1776 1781 * Lookup the file so that we can get a vnode for it.
1777 1782 */
1778 1783 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1779 1784 NULL, cr, NULL, NULL, NULL);
1780 1785 if (error) {
1781 1786 /*
1782 1787 * We couldn't find the file that we thought that
1783 1788 * we just created. So, we'll just try creating
1784 1789 * it again.
1785 1790 */
1786 1791 if (error == ENOENT)
1787 1792 goto tryagain;
1788 1793 goto out;
1789 1794 }
1790 1795
1791 1796 /*
1792 1797 * If the file is delegated to a v4 client, go ahead
1793 1798 * and initiate recall, this create is a hint that a
1794 1799 * conflicting v3 open has occurred.
1795 1800 */
1796 1801
1797 1802 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1798 1803 VN_RELE(vp);
1799 1804 resp->status = NFS3ERR_JUKEBOX;
1800 1805 goto out1;
1801 1806 }
1802 1807
1803 1808 va.va_mask = AT_ALL;
1804 1809 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1805 1810
1806 1811 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1807 1812 /* % with INT32_MAX to prevent overflows */
1808 1813 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1809 1814 vap->va_mtime.tv_sec !=
1810 1815 (mtime->seconds % INT32_MAX) ||
1811 1816 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1812 1817 VN_RELE(vp);
1813 1818 error = EEXIST;
1814 1819 goto out;
1815 1820 }
1816 1821 } else {
1817 1822
1818 1823 if ((args->how.mode == UNCHECKED ||
1819 1824 args->how.mode == GUARDED) &&
1820 1825 args->how.createhow3_u.obj_attributes.size.set_it &&
1821 1826 va.va_size == 0)
1822 1827 trunc = TRUE;
1823 1828 else
1824 1829 trunc = FALSE;
1825 1830
1826 1831 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1827 1832 VN_RELE(vp);
1828 1833 resp->status = NFS3ERR_JUKEBOX;
1829 1834 goto out1;
1830 1835 }
1831 1836
1832 1837 va.va_mask = AT_ALL;
1833 1838 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1834 1839
1835 1840 /*
1836 1841 * We need to check to make sure that the file got
1837 1842 * created to the indicated size. If not, we do a
1838 1843 * setattr to try to change the size, but we don't
1839 1844 * try too hard. This shouldn't a problem as most
1840 1845 * clients will only specifiy a size of zero which
1841 1846 * local file systems handle. However, even if
1842 1847 * the client does specify a non-zero size, it can
1843 1848 * still recover by checking the size of the file
1844 1849 * after it has created it and then issue a setattr
1845 1850 * request of its own to set the size of the file.
1846 1851 */
1847 1852 if (vap != NULL &&
1848 1853 (args->how.mode == UNCHECKED ||
1849 1854 args->how.mode == GUARDED) &&
1850 1855 args->how.createhow3_u.obj_attributes.size.set_it &&
1851 1856 vap->va_size != reqsize) {
1852 1857 va.va_mask = AT_SIZE;
1853 1858 va.va_size = reqsize;
1854 1859 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1855 1860 va.va_mask = AT_ALL;
1856 1861 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1857 1862 }
1858 1863 }
1859 1864
1860 1865 if (name != args->where.name)
1861 1866 kmem_free(name, MAXPATHLEN + 1);
1862 1867
1863 1868 error = makefh3(&resp->resok.obj.handle, vp, exi);
1864 1869 if (error)
1865 1870 resp->resok.obj.handle_follows = FALSE;
1866 1871 else
1867 1872 resp->resok.obj.handle_follows = TRUE;
1868 1873
1869 1874 /*
1870 1875 * Force modified data and metadata out to stable storage.
1871 1876 */
1872 1877 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1873 1878 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1874 1879
1875 1880 VN_RELE(vp);
1876 1881 if (tvp != NULL) {
1877 1882 if (in_crit)
1878 1883 nbl_end_crit(tvp);
1879 1884 VN_RELE(tvp);
1880 1885 }
1881 1886
1882 1887 resp->status = NFS3_OK;
1883 1888 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1884 1889 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1885 1890
1886 1891 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1887 1892 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1888 1893 CREATE3res *, resp);
1889 1894
1890 1895 VN_RELE(dvp);
1891 1896 return;
1892 1897
1893 1898 out:
1894 1899 if (curthread->t_flag & T_WOULDBLOCK) {
1895 1900 curthread->t_flag &= ~T_WOULDBLOCK;
1896 1901 resp->status = NFS3ERR_JUKEBOX;
1897 1902 } else
1898 1903 resp->status = puterrno3(error);
1899 1904 out1:
1900 1905 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1901 1906 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1902 1907 CREATE3res *, resp);
1903 1908
1904 1909 if (name != NULL && name != args->where.name)
1905 1910 kmem_free(name, MAXPATHLEN + 1);
1906 1911
1907 1912 if (tvp != NULL) {
1908 1913 if (in_crit)
1909 1914 nbl_end_crit(tvp);
1910 1915 VN_RELE(tvp);
1911 1916 }
1912 1917 if (dvp != NULL)
1913 1918 VN_RELE(dvp);
1914 1919 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1915 1920 }
1916 1921
1917 1922 void *
1918 1923 rfs3_create_getfh(CREATE3args *args)
1919 1924 {
1920 1925
1921 1926 return (&args->where.dir);
1922 1927 }
1923 1928
1924 1929 void
1925 1930 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1926 1931 struct svc_req *req, cred_t *cr, bool_t ro)
1927 1932 {
1928 1933 int error;
1929 1934 vnode_t *vp = NULL;
1930 1935 vnode_t *dvp;
1931 1936 struct vattr *vap;
1932 1937 struct vattr va;
1933 1938 struct vattr *dbvap;
1934 1939 struct vattr dbva;
1935 1940 struct vattr *davap;
1936 1941 struct vattr dava;
1937 1942 struct sockaddr *ca;
1938 1943 char *name = NULL;
1939 1944
1940 1945 dbvap = NULL;
1941 1946 davap = NULL;
1942 1947
1943 1948 dvp = nfs3_fhtovp(&args->where.dir, exi);
1944 1949
1945 1950 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1946 1951 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1947 1952 MKDIR3args *, args);
1948 1953
1949 1954 if (dvp == NULL) {
1950 1955 error = ESTALE;
1951 1956 goto out;
1952 1957 }
1953 1958
1954 1959 dbva.va_mask = AT_ALL;
1955 1960 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1956 1961 davap = dbvap;
1957 1962
1958 1963 if (args->where.name == nfs3nametoolong) {
1959 1964 resp->status = NFS3ERR_NAMETOOLONG;
1960 1965 goto out1;
1961 1966 }
1962 1967
1963 1968 if (args->where.name == NULL || *(args->where.name) == '\0') {
1964 1969 resp->status = NFS3ERR_ACCES;
1965 1970 goto out1;
1966 1971 }
1967 1972
1968 1973 if (rdonly(ro, dvp)) {
1969 1974 resp->status = NFS3ERR_ROFS;
1970 1975 goto out1;
1971 1976 }
1972 1977
1973 1978 if (is_system_labeled()) {
1974 1979 bslabel_t *clabel = req->rq_label;
1975 1980
1976 1981 ASSERT(clabel != NULL);
1977 1982 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1978 1983 "got client label from request(1)", struct svc_req *, req);
1979 1984
1980 1985 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1981 1986 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1982 1987 exi)) {
1983 1988 resp->status = NFS3ERR_ACCES;
1984 1989 goto out1;
1985 1990 }
1986 1991 }
1987 1992 }
1988 1993
1989 1994 error = sattr3_to_vattr(&args->attributes, &va);
1990 1995 if (error)
1991 1996 goto out;
1992 1997
1993 1998 if (!(va.va_mask & AT_MODE)) {
1994 1999 resp->status = NFS3ERR_INVAL;
1995 2000 goto out1;
1996 2001 }
1997 2002
1998 2003 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1999 2004 name = nfscmd_convname(ca, exi, args->where.name,
2000 2005 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2001 2006
2002 2007 if (name == NULL) {
2003 2008 resp->status = NFS3ERR_INVAL;
2004 2009 goto out1;
2005 2010 }
2006 2011
2007 2012 va.va_mask |= AT_TYPE;
2008 2013 va.va_type = VDIR;
2009 2014
2010 2015 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2011 2016
2012 2017 if (name != args->where.name)
2013 2018 kmem_free(name, MAXPATHLEN + 1);
2014 2019
2015 2020 dava.va_mask = AT_ALL;
2016 2021 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2017 2022
2018 2023 /*
2019 2024 * Force modified data and metadata out to stable storage.
2020 2025 */
2021 2026 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2022 2027
2023 2028 if (error)
2024 2029 goto out;
2025 2030
2026 2031 error = makefh3(&resp->resok.obj.handle, vp, exi);
2027 2032 if (error)
2028 2033 resp->resok.obj.handle_follows = FALSE;
2029 2034 else
2030 2035 resp->resok.obj.handle_follows = TRUE;
2031 2036
2032 2037 va.va_mask = AT_ALL;
2033 2038 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2034 2039
2035 2040 /*
2036 2041 * Force modified data and metadata out to stable storage.
2037 2042 */
2038 2043 (void) VOP_FSYNC(vp, 0, cr, NULL);
2039 2044
2040 2045 VN_RELE(vp);
2041 2046
2042 2047 resp->status = NFS3_OK;
2043 2048 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2044 2049 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2045 2050
2046 2051 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2047 2052 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2048 2053 MKDIR3res *, resp);
2049 2054 VN_RELE(dvp);
2050 2055
2051 2056 return;
2052 2057
2053 2058 out:
2054 2059 if (curthread->t_flag & T_WOULDBLOCK) {
2055 2060 curthread->t_flag &= ~T_WOULDBLOCK;
2056 2061 resp->status = NFS3ERR_JUKEBOX;
2057 2062 } else
2058 2063 resp->status = puterrno3(error);
2059 2064 out1:
2060 2065 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2061 2066 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2062 2067 MKDIR3res *, resp);
2063 2068 if (dvp != NULL)
2064 2069 VN_RELE(dvp);
2065 2070 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2066 2071 }
2067 2072
2068 2073 void *
2069 2074 rfs3_mkdir_getfh(MKDIR3args *args)
2070 2075 {
2071 2076
2072 2077 return (&args->where.dir);
2073 2078 }
2074 2079
2075 2080 void
2076 2081 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2077 2082 struct svc_req *req, cred_t *cr, bool_t ro)
2078 2083 {
2079 2084 int error;
2080 2085 vnode_t *vp;
2081 2086 vnode_t *dvp;
2082 2087 struct vattr *vap;
2083 2088 struct vattr va;
2084 2089 struct vattr *dbvap;
2085 2090 struct vattr dbva;
2086 2091 struct vattr *davap;
2087 2092 struct vattr dava;
2088 2093 struct sockaddr *ca;
2089 2094 char *name = NULL;
2090 2095 char *symdata = NULL;
2091 2096
2092 2097 dbvap = NULL;
2093 2098 davap = NULL;
2094 2099
2095 2100 dvp = nfs3_fhtovp(&args->where.dir, exi);
2096 2101
2097 2102 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2098 2103 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2099 2104 SYMLINK3args *, args);
2100 2105
2101 2106 if (dvp == NULL) {
2102 2107 error = ESTALE;
2103 2108 goto err;
2104 2109 }
2105 2110
2106 2111 dbva.va_mask = AT_ALL;
2107 2112 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2108 2113 davap = dbvap;
2109 2114
2110 2115 if (args->where.name == nfs3nametoolong) {
2111 2116 resp->status = NFS3ERR_NAMETOOLONG;
2112 2117 goto err1;
2113 2118 }
2114 2119
2115 2120 if (args->where.name == NULL || *(args->where.name) == '\0') {
2116 2121 resp->status = NFS3ERR_ACCES;
2117 2122 goto err1;
2118 2123 }
2119 2124
2120 2125 if (rdonly(ro, dvp)) {
2121 2126 resp->status = NFS3ERR_ROFS;
2122 2127 goto err1;
2123 2128 }
2124 2129
2125 2130 if (is_system_labeled()) {
2126 2131 bslabel_t *clabel = req->rq_label;
2127 2132
2128 2133 ASSERT(clabel != NULL);
2129 2134 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2130 2135 "got client label from request(1)", struct svc_req *, req);
2131 2136
2132 2137 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2133 2138 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2134 2139 exi)) {
2135 2140 resp->status = NFS3ERR_ACCES;
2136 2141 goto err1;
2137 2142 }
2138 2143 }
2139 2144 }
2140 2145
2141 2146 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2142 2147 if (error)
2143 2148 goto err;
2144 2149
2145 2150 if (!(va.va_mask & AT_MODE)) {
2146 2151 resp->status = NFS3ERR_INVAL;
2147 2152 goto err1;
2148 2153 }
2149 2154
2150 2155 if (args->symlink.symlink_data == nfs3nametoolong) {
2151 2156 resp->status = NFS3ERR_NAMETOOLONG;
2152 2157 goto err1;
2153 2158 }
2154 2159
2155 2160 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2156 2161 name = nfscmd_convname(ca, exi, args->where.name,
2157 2162 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2158 2163
2159 2164 if (name == NULL) {
2160 2165 /* This is really a Solaris EILSEQ */
2161 2166 resp->status = NFS3ERR_INVAL;
2162 2167 goto err1;
2163 2168 }
2164 2169
2165 2170 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2166 2171 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2167 2172 if (symdata == NULL) {
2168 2173 /* This is really a Solaris EILSEQ */
2169 2174 resp->status = NFS3ERR_INVAL;
2170 2175 goto err1;
2171 2176 }
2172 2177
2173 2178
2174 2179 va.va_mask |= AT_TYPE;
2175 2180 va.va_type = VLNK;
2176 2181
2177 2182 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2178 2183
2179 2184 dava.va_mask = AT_ALL;
2180 2185 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2181 2186
2182 2187 if (error)
2183 2188 goto err;
2184 2189
2185 2190 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2186 2191 NULL, NULL, NULL);
2187 2192
2188 2193 /*
2189 2194 * Force modified data and metadata out to stable storage.
2190 2195 */
2191 2196 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2192 2197
2193 2198
2194 2199 resp->status = NFS3_OK;
2195 2200 if (error) {
2196 2201 resp->resok.obj.handle_follows = FALSE;
2197 2202 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2198 2203 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2199 2204 goto out;
2200 2205 }
2201 2206
2202 2207 error = makefh3(&resp->resok.obj.handle, vp, exi);
2203 2208 if (error)
2204 2209 resp->resok.obj.handle_follows = FALSE;
2205 2210 else
2206 2211 resp->resok.obj.handle_follows = TRUE;
2207 2212
2208 2213 va.va_mask = AT_ALL;
2209 2214 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2210 2215
2211 2216 /*
2212 2217 * Force modified data and metadata out to stable storage.
2213 2218 */
2214 2219 (void) VOP_FSYNC(vp, 0, cr, NULL);
2215 2220
2216 2221 VN_RELE(vp);
2217 2222
2218 2223 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2219 2224 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2220 2225 goto out;
2221 2226
2222 2227 err:
2223 2228 if (curthread->t_flag & T_WOULDBLOCK) {
2224 2229 curthread->t_flag &= ~T_WOULDBLOCK;
2225 2230 resp->status = NFS3ERR_JUKEBOX;
2226 2231 } else
2227 2232 resp->status = puterrno3(error);
2228 2233 err1:
2229 2234 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2230 2235 out:
2231 2236 if (name != NULL && name != args->where.name)
2232 2237 kmem_free(name, MAXPATHLEN + 1);
2233 2238 if (symdata != NULL && symdata != args->symlink.symlink_data)
2234 2239 kmem_free(symdata, MAXPATHLEN + 1);
2235 2240
2236 2241 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2237 2242 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2238 2243 SYMLINK3res *, resp);
2239 2244
2240 2245 if (dvp != NULL)
2241 2246 VN_RELE(dvp);
2242 2247 }
2243 2248
2244 2249 void *
2245 2250 rfs3_symlink_getfh(SYMLINK3args *args)
2246 2251 {
2247 2252
2248 2253 return (&args->where.dir);
2249 2254 }
2250 2255
2251 2256 void
2252 2257 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2253 2258 struct svc_req *req, cred_t *cr, bool_t ro)
2254 2259 {
2255 2260 int error;
2256 2261 vnode_t *vp;
2257 2262 vnode_t *realvp;
2258 2263 vnode_t *dvp;
2259 2264 struct vattr *vap;
2260 2265 struct vattr va;
2261 2266 struct vattr *dbvap;
2262 2267 struct vattr dbva;
2263 2268 struct vattr *davap;
2264 2269 struct vattr dava;
2265 2270 int mode;
2266 2271 enum vcexcl excl;
2267 2272 struct sockaddr *ca;
2268 2273 char *name = NULL;
2269 2274
2270 2275 dbvap = NULL;
2271 2276 davap = NULL;
2272 2277
2273 2278 dvp = nfs3_fhtovp(&args->where.dir, exi);
2274 2279
2275 2280 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2276 2281 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2277 2282 MKNOD3args *, args);
2278 2283
2279 2284 if (dvp == NULL) {
2280 2285 error = ESTALE;
2281 2286 goto out;
2282 2287 }
2283 2288
2284 2289 dbva.va_mask = AT_ALL;
2285 2290 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2286 2291 davap = dbvap;
2287 2292
2288 2293 if (args->where.name == nfs3nametoolong) {
2289 2294 resp->status = NFS3ERR_NAMETOOLONG;
2290 2295 goto out1;
2291 2296 }
2292 2297
2293 2298 if (args->where.name == NULL || *(args->where.name) == '\0') {
2294 2299 resp->status = NFS3ERR_ACCES;
2295 2300 goto out1;
2296 2301 }
2297 2302
2298 2303 if (rdonly(ro, dvp)) {
2299 2304 resp->status = NFS3ERR_ROFS;
2300 2305 goto out1;
2301 2306 }
2302 2307
2303 2308 if (is_system_labeled()) {
2304 2309 bslabel_t *clabel = req->rq_label;
2305 2310
2306 2311 ASSERT(clabel != NULL);
2307 2312 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2308 2313 "got client label from request(1)", struct svc_req *, req);
2309 2314
2310 2315 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2311 2316 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2312 2317 exi)) {
2313 2318 resp->status = NFS3ERR_ACCES;
2314 2319 goto out1;
2315 2320 }
2316 2321 }
2317 2322 }
2318 2323
2319 2324 switch (args->what.type) {
2320 2325 case NF3CHR:
2321 2326 case NF3BLK:
2322 2327 error = sattr3_to_vattr(
2323 2328 &args->what.mknoddata3_u.device.dev_attributes, &va);
2324 2329 if (error)
2325 2330 goto out;
2326 2331 if (secpolicy_sys_devices(cr) != 0) {
2327 2332 resp->status = NFS3ERR_PERM;
2328 2333 goto out1;
2329 2334 }
2330 2335 if (args->what.type == NF3CHR)
2331 2336 va.va_type = VCHR;
2332 2337 else
2333 2338 va.va_type = VBLK;
2334 2339 va.va_rdev = makedevice(
2335 2340 args->what.mknoddata3_u.device.spec.specdata1,
2336 2341 args->what.mknoddata3_u.device.spec.specdata2);
2337 2342 va.va_mask |= AT_TYPE | AT_RDEV;
2338 2343 break;
2339 2344 case NF3SOCK:
2340 2345 error = sattr3_to_vattr(
2341 2346 &args->what.mknoddata3_u.pipe_attributes, &va);
2342 2347 if (error)
2343 2348 goto out;
2344 2349 va.va_type = VSOCK;
2345 2350 va.va_mask |= AT_TYPE;
2346 2351 break;
2347 2352 case NF3FIFO:
2348 2353 error = sattr3_to_vattr(
2349 2354 &args->what.mknoddata3_u.pipe_attributes, &va);
2350 2355 if (error)
2351 2356 goto out;
2352 2357 va.va_type = VFIFO;
2353 2358 va.va_mask |= AT_TYPE;
2354 2359 break;
2355 2360 default:
2356 2361 resp->status = NFS3ERR_BADTYPE;
2357 2362 goto out1;
2358 2363 }
2359 2364
2360 2365 /*
2361 2366 * Must specify the mode.
2362 2367 */
2363 2368 if (!(va.va_mask & AT_MODE)) {
2364 2369 resp->status = NFS3ERR_INVAL;
2365 2370 goto out1;
2366 2371 }
2367 2372
2368 2373 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2369 2374 name = nfscmd_convname(ca, exi, args->where.name,
2370 2375 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2371 2376
2372 2377 if (name == NULL) {
2373 2378 resp->status = NFS3ERR_INVAL;
2374 2379 goto out1;
2375 2380 }
2376 2381
2377 2382 excl = EXCL;
2378 2383
2379 2384 mode = 0;
2380 2385
2381 2386 error = VOP_CREATE(dvp, name, &va, excl, mode,
2382 2387 &vp, cr, 0, NULL, NULL);
2383 2388
2384 2389 if (name != args->where.name)
2385 2390 kmem_free(name, MAXPATHLEN + 1);
2386 2391
2387 2392 dava.va_mask = AT_ALL;
2388 2393 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2389 2394
2390 2395 /*
2391 2396 * Force modified data and metadata out to stable storage.
2392 2397 */
2393 2398 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2394 2399
2395 2400 if (error)
2396 2401 goto out;
2397 2402
2398 2403 resp->status = NFS3_OK;
2399 2404
2400 2405 error = makefh3(&resp->resok.obj.handle, vp, exi);
2401 2406 if (error)
2402 2407 resp->resok.obj.handle_follows = FALSE;
2403 2408 else
2404 2409 resp->resok.obj.handle_follows = TRUE;
2405 2410
2406 2411 va.va_mask = AT_ALL;
2407 2412 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2408 2413
2409 2414 /*
2410 2415 * Force modified metadata out to stable storage.
2411 2416 *
2412 2417 * if a underlying vp exists, pass it to VOP_FSYNC
2413 2418 */
2414 2419 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2415 2420 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2416 2421 else
2417 2422 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2418 2423
2419 2424 VN_RELE(vp);
2420 2425
2421 2426 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2422 2427 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2423 2428 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2424 2429 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2425 2430 MKNOD3res *, resp);
2426 2431 VN_RELE(dvp);
2427 2432 return;
2428 2433
2429 2434 out:
2430 2435 if (curthread->t_flag & T_WOULDBLOCK) {
2431 2436 curthread->t_flag &= ~T_WOULDBLOCK;
2432 2437 resp->status = NFS3ERR_JUKEBOX;
2433 2438 } else
2434 2439 resp->status = puterrno3(error);
2435 2440 out1:
2436 2441 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2437 2442 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2438 2443 MKNOD3res *, resp);
2439 2444 if (dvp != NULL)
2440 2445 VN_RELE(dvp);
2441 2446 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2442 2447 }
2443 2448
2444 2449 void *
2445 2450 rfs3_mknod_getfh(MKNOD3args *args)
2446 2451 {
2447 2452
2448 2453 return (&args->where.dir);
2449 2454 }
2450 2455
2451 2456 void
2452 2457 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2453 2458 struct svc_req *req, cred_t *cr, bool_t ro)
2454 2459 {
2455 2460 int error = 0;
2456 2461 vnode_t *vp;
2457 2462 struct vattr *bvap;
2458 2463 struct vattr bva;
2459 2464 struct vattr *avap;
2460 2465 struct vattr ava;
2461 2466 vnode_t *targvp = NULL;
2462 2467 struct sockaddr *ca;
2463 2468 char *name = NULL;
2464 2469
2465 2470 bvap = NULL;
2466 2471 avap = NULL;
2467 2472
2468 2473 vp = nfs3_fhtovp(&args->object.dir, exi);
2469 2474
2470 2475 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2471 2476 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2472 2477 REMOVE3args *, args);
2473 2478
2474 2479 if (vp == NULL) {
2475 2480 error = ESTALE;
2476 2481 goto err;
2477 2482 }
2478 2483
2479 2484 bva.va_mask = AT_ALL;
2480 2485 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2481 2486 avap = bvap;
2482 2487
2483 2488 if (vp->v_type != VDIR) {
2484 2489 resp->status = NFS3ERR_NOTDIR;
2485 2490 goto err1;
2486 2491 }
2487 2492
2488 2493 if (args->object.name == nfs3nametoolong) {
2489 2494 resp->status = NFS3ERR_NAMETOOLONG;
2490 2495 goto err1;
2491 2496 }
2492 2497
2493 2498 if (args->object.name == NULL || *(args->object.name) == '\0') {
2494 2499 resp->status = NFS3ERR_ACCES;
2495 2500 goto err1;
2496 2501 }
2497 2502
2498 2503 if (rdonly(ro, vp)) {
2499 2504 resp->status = NFS3ERR_ROFS;
2500 2505 goto err1;
2501 2506 }
2502 2507
2503 2508 if (is_system_labeled()) {
2504 2509 bslabel_t *clabel = req->rq_label;
2505 2510
2506 2511 ASSERT(clabel != NULL);
2507 2512 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2508 2513 "got client label from request(1)", struct svc_req *, req);
2509 2514
2510 2515 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2511 2516 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2512 2517 exi)) {
2513 2518 resp->status = NFS3ERR_ACCES;
2514 2519 goto err1;
2515 2520 }
2516 2521 }
2517 2522 }
2518 2523
2519 2524 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2520 2525 name = nfscmd_convname(ca, exi, args->object.name,
2521 2526 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2522 2527
2523 2528 if (name == NULL) {
2524 2529 resp->status = NFS3ERR_INVAL;
2525 2530 goto err1;
2526 2531 }
2527 2532
2528 2533 /*
2529 2534 * Check for a conflict with a non-blocking mandatory share
2530 2535 * reservation and V4 delegations
2531 2536 */
2532 2537 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2533 2538 NULL, cr, NULL, NULL, NULL);
2534 2539 if (error != 0)
2535 2540 goto err;
2536 2541
2537 2542 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2538 2543 resp->status = NFS3ERR_JUKEBOX;
2539 2544 goto err1;
2540 2545 }
2541 2546
2542 2547 if (!nbl_need_check(targvp)) {
2543 2548 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2544 2549 } else {
2545 2550 nbl_start_crit(targvp, RW_READER);
2546 2551 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2547 2552 error = EACCES;
2548 2553 } else {
2549 2554 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2550 2555 }
2551 2556 nbl_end_crit(targvp);
2552 2557 }
2553 2558 VN_RELE(targvp);
2554 2559 targvp = NULL;
2555 2560
2556 2561 ava.va_mask = AT_ALL;
2557 2562 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2558 2563
2559 2564 /*
2560 2565 * Force modified data and metadata out to stable storage.
2561 2566 */
2562 2567 (void) VOP_FSYNC(vp, 0, cr, NULL);
2563 2568
2564 2569 if (error)
2565 2570 goto err;
2566 2571
2567 2572 resp->status = NFS3_OK;
2568 2573 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2569 2574 goto out;
2570 2575
2571 2576 err:
2572 2577 if (curthread->t_flag & T_WOULDBLOCK) {
2573 2578 curthread->t_flag &= ~T_WOULDBLOCK;
2574 2579 resp->status = NFS3ERR_JUKEBOX;
2575 2580 } else
2576 2581 resp->status = puterrno3(error);
2577 2582 err1:
2578 2583 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2579 2584 out:
2580 2585 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2581 2586 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2582 2587 REMOVE3res *, resp);
2583 2588
2584 2589 if (name != NULL && name != args->object.name)
2585 2590 kmem_free(name, MAXPATHLEN + 1);
2586 2591
2587 2592 if (vp != NULL)
2588 2593 VN_RELE(vp);
2589 2594 }
2590 2595
2591 2596 void *
2592 2597 rfs3_remove_getfh(REMOVE3args *args)
2593 2598 {
2594 2599
2595 2600 return (&args->object.dir);
2596 2601 }
2597 2602
2598 2603 void
2599 2604 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2600 2605 struct svc_req *req, cred_t *cr, bool_t ro)
2601 2606 {
2602 2607 int error;
2603 2608 vnode_t *vp;
2604 2609 struct vattr *bvap;
2605 2610 struct vattr bva;
2606 2611 struct vattr *avap;
2607 2612 struct vattr ava;
2608 2613 struct sockaddr *ca;
2609 2614 char *name = NULL;
2610 2615
2611 2616 bvap = NULL;
2612 2617 avap = NULL;
2613 2618
2614 2619 vp = nfs3_fhtovp(&args->object.dir, exi);
2615 2620
2616 2621 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2617 2622 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2618 2623 RMDIR3args *, args);
2619 2624
2620 2625 if (vp == NULL) {
2621 2626 error = ESTALE;
2622 2627 goto err;
2623 2628 }
2624 2629
2625 2630 bva.va_mask = AT_ALL;
2626 2631 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2627 2632 avap = bvap;
2628 2633
2629 2634 if (vp->v_type != VDIR) {
2630 2635 resp->status = NFS3ERR_NOTDIR;
2631 2636 goto err1;
2632 2637 }
2633 2638
2634 2639 if (args->object.name == nfs3nametoolong) {
2635 2640 resp->status = NFS3ERR_NAMETOOLONG;
2636 2641 goto err1;
2637 2642 }
2638 2643
2639 2644 if (args->object.name == NULL || *(args->object.name) == '\0') {
2640 2645 resp->status = NFS3ERR_ACCES;
2641 2646 goto err1;
2642 2647 }
2643 2648
2644 2649 if (rdonly(ro, vp)) {
2645 2650 resp->status = NFS3ERR_ROFS;
2646 2651 goto err1;
2647 2652 }
2648 2653
2649 2654 if (is_system_labeled()) {
2650 2655 bslabel_t *clabel = req->rq_label;
2651 2656
2652 2657 ASSERT(clabel != NULL);
2653 2658 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2654 2659 "got client label from request(1)", struct svc_req *, req);
2655 2660
2656 2661 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2657 2662 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2658 2663 exi)) {
2659 2664 resp->status = NFS3ERR_ACCES;
2660 2665 goto err1;
2661 2666 }
2662 2667 }
2663 2668 }
|
↓ open down ↓ |
1317 lines elided |
↑ open up ↑ |
2664 2669
2665 2670 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2666 2671 name = nfscmd_convname(ca, exi, args->object.name,
2667 2672 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2668 2673
2669 2674 if (name == NULL) {
2670 2675 resp->status = NFS3ERR_INVAL;
2671 2676 goto err1;
2672 2677 }
2673 2678
2679 + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
2674 2680 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2675 2681
2676 2682 if (name != args->object.name)
2677 2683 kmem_free(name, MAXPATHLEN + 1);
2678 2684
2679 2685 ava.va_mask = AT_ALL;
2680 2686 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2681 2687
2682 2688 /*
2683 2689 * Force modified data and metadata out to stable storage.
2684 2690 */
2685 2691 (void) VOP_FSYNC(vp, 0, cr, NULL);
2686 2692
2687 2693 if (error) {
2688 2694 /*
2689 2695 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2690 2696 * if the directory is not empty. A System V NFS server
2691 2697 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2692 2698 * over the wire.
2693 2699 */
2694 2700 if (error == EEXIST)
2695 2701 error = ENOTEMPTY;
2696 2702 goto err;
2697 2703 }
2698 2704
2699 2705 resp->status = NFS3_OK;
2700 2706 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2701 2707 goto out;
2702 2708
2703 2709 err:
2704 2710 if (curthread->t_flag & T_WOULDBLOCK) {
2705 2711 curthread->t_flag &= ~T_WOULDBLOCK;
2706 2712 resp->status = NFS3ERR_JUKEBOX;
2707 2713 } else
2708 2714 resp->status = puterrno3(error);
2709 2715 err1:
2710 2716 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2711 2717 out:
2712 2718 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2713 2719 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2714 2720 RMDIR3res *, resp);
2715 2721 if (vp != NULL)
2716 2722 VN_RELE(vp);
2717 2723
2718 2724 }
2719 2725
2720 2726 void *
2721 2727 rfs3_rmdir_getfh(RMDIR3args *args)
2722 2728 {
2723 2729
2724 2730 return (&args->object.dir);
2725 2731 }
2726 2732
2727 2733 void
2728 2734 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2729 2735 struct svc_req *req, cred_t *cr, bool_t ro)
2730 2736 {
2731 2737 int error = 0;
2732 2738 vnode_t *fvp;
2733 2739 vnode_t *tvp;
2734 2740 vnode_t *targvp;
2735 2741 struct vattr *fbvap;
2736 2742 struct vattr fbva;
2737 2743 struct vattr *favap;
2738 2744 struct vattr fava;
2739 2745 struct vattr *tbvap;
2740 2746 struct vattr tbva;
2741 2747 struct vattr *tavap;
2742 2748 struct vattr tava;
2743 2749 nfs_fh3 *fh3;
2744 2750 struct exportinfo *to_exi;
2745 2751 vnode_t *srcvp = NULL;
2746 2752 bslabel_t *clabel;
2747 2753 struct sockaddr *ca;
2748 2754 char *name = NULL;
2749 2755 char *toname = NULL;
2750 2756
2751 2757 fbvap = NULL;
2752 2758 favap = NULL;
2753 2759 tbvap = NULL;
2754 2760 tavap = NULL;
2755 2761 tvp = NULL;
2756 2762
2757 2763 fvp = nfs3_fhtovp(&args->from.dir, exi);
2758 2764
2759 2765 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2760 2766 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2761 2767 RENAME3args *, args);
2762 2768
2763 2769 if (fvp == NULL) {
2764 2770 error = ESTALE;
2765 2771 goto err;
2766 2772 }
2767 2773
2768 2774 if (is_system_labeled()) {
2769 2775 clabel = req->rq_label;
2770 2776 ASSERT(clabel != NULL);
2771 2777 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2772 2778 "got client label from request(1)", struct svc_req *, req);
2773 2779
2774 2780 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2775 2781 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2776 2782 exi)) {
2777 2783 resp->status = NFS3ERR_ACCES;
2778 2784 goto err1;
2779 2785 }
2780 2786 }
2781 2787 }
2782 2788
2783 2789 fbva.va_mask = AT_ALL;
2784 2790 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2785 2791 favap = fbvap;
2786 2792
2787 2793 fh3 = &args->to.dir;
2788 2794 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2789 2795 if (to_exi == NULL) {
2790 2796 resp->status = NFS3ERR_ACCES;
2791 2797 goto err1;
2792 2798 }
2793 2799 exi_rele(to_exi);
2794 2800
2795 2801 if (to_exi != exi) {
2796 2802 resp->status = NFS3ERR_XDEV;
2797 2803 goto err1;
2798 2804 }
2799 2805
2800 2806 tvp = nfs3_fhtovp(&args->to.dir, exi);
2801 2807 if (tvp == NULL) {
2802 2808 error = ESTALE;
2803 2809 goto err;
2804 2810 }
2805 2811
2806 2812 tbva.va_mask = AT_ALL;
2807 2813 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2808 2814 tavap = tbvap;
2809 2815
2810 2816 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2811 2817 resp->status = NFS3ERR_NOTDIR;
2812 2818 goto err1;
2813 2819 }
2814 2820
2815 2821 if (args->from.name == nfs3nametoolong ||
2816 2822 args->to.name == nfs3nametoolong) {
2817 2823 resp->status = NFS3ERR_NAMETOOLONG;
2818 2824 goto err1;
2819 2825 }
2820 2826 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2821 2827 args->to.name == NULL || *(args->to.name) == '\0') {
2822 2828 resp->status = NFS3ERR_ACCES;
2823 2829 goto err1;
2824 2830 }
2825 2831
2826 2832 if (rdonly(ro, tvp)) {
2827 2833 resp->status = NFS3ERR_ROFS;
2828 2834 goto err1;
2829 2835 }
2830 2836
2831 2837 if (is_system_labeled()) {
2832 2838 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2833 2839 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2834 2840 exi)) {
2835 2841 resp->status = NFS3ERR_ACCES;
2836 2842 goto err1;
2837 2843 }
2838 2844 }
2839 2845 }
2840 2846
2841 2847 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2842 2848 name = nfscmd_convname(ca, exi, args->from.name,
2843 2849 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2844 2850
2845 2851 if (name == NULL) {
2846 2852 resp->status = NFS3ERR_INVAL;
2847 2853 goto err1;
2848 2854 }
2849 2855
2850 2856 toname = nfscmd_convname(ca, exi, args->to.name,
2851 2857 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2852 2858
2853 2859 if (toname == NULL) {
2854 2860 resp->status = NFS3ERR_INVAL;
2855 2861 goto err1;
2856 2862 }
2857 2863
2858 2864 /*
2859 2865 * Check for a conflict with a non-blocking mandatory share
2860 2866 * reservation or V4 delegations.
2861 2867 */
2862 2868 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2863 2869 NULL, cr, NULL, NULL, NULL);
2864 2870 if (error != 0)
2865 2871 goto err;
2866 2872
2867 2873 /*
2868 2874 * If we rename a delegated file we should recall the
2869 2875 * delegation, since future opens should fail or would
2870 2876 * refer to a new file.
2871 2877 */
2872 2878 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2873 2879 resp->status = NFS3ERR_JUKEBOX;
2874 2880 goto err1;
2875 2881 }
2876 2882
2877 2883 /*
2878 2884 * Check for renaming over a delegated file. Check nfs4_deleg_policy
2879 2885 * first to avoid VOP_LOOKUP if possible.
2880 2886 */
2881 2887 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2882 2888 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2883 2889 NULL, NULL, NULL) == 0) {
2884 2890
2885 2891 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2886 2892 VN_RELE(targvp);
2887 2893 resp->status = NFS3ERR_JUKEBOX;
2888 2894 goto err1;
2889 2895 }
2890 2896 VN_RELE(targvp);
2891 2897 }
2892 2898
2893 2899 if (!nbl_need_check(srcvp)) {
2894 2900 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2895 2901 } else {
2896 2902 nbl_start_crit(srcvp, RW_READER);
2897 2903 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2898 2904 error = EACCES;
2899 2905 else
2900 2906 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2901 2907 nbl_end_crit(srcvp);
2902 2908 }
2903 2909 if (error == 0)
2904 2910 vn_renamepath(tvp, srcvp, args->to.name,
2905 2911 strlen(args->to.name));
2906 2912 VN_RELE(srcvp);
2907 2913 srcvp = NULL;
2908 2914
2909 2915 fava.va_mask = AT_ALL;
2910 2916 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2911 2917 tava.va_mask = AT_ALL;
2912 2918 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2913 2919
2914 2920 /*
2915 2921 * Force modified data and metadata out to stable storage.
2916 2922 */
2917 2923 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2918 2924 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2919 2925
2920 2926 if (error)
2921 2927 goto err;
2922 2928
2923 2929 resp->status = NFS3_OK;
2924 2930 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2925 2931 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2926 2932 goto out;
2927 2933
2928 2934 err:
2929 2935 if (curthread->t_flag & T_WOULDBLOCK) {
2930 2936 curthread->t_flag &= ~T_WOULDBLOCK;
2931 2937 resp->status = NFS3ERR_JUKEBOX;
2932 2938 } else {
2933 2939 resp->status = puterrno3(error);
2934 2940 }
2935 2941 err1:
2936 2942 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2937 2943 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2938 2944
2939 2945 out:
2940 2946 if (name != NULL && name != args->from.name)
2941 2947 kmem_free(name, MAXPATHLEN + 1);
2942 2948 if (toname != NULL && toname != args->to.name)
2943 2949 kmem_free(toname, MAXPATHLEN + 1);
2944 2950
2945 2951 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2946 2952 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2947 2953 RENAME3res *, resp);
2948 2954 if (fvp != NULL)
2949 2955 VN_RELE(fvp);
2950 2956 if (tvp != NULL)
2951 2957 VN_RELE(tvp);
2952 2958 }
2953 2959
2954 2960 void *
2955 2961 rfs3_rename_getfh(RENAME3args *args)
2956 2962 {
2957 2963
2958 2964 return (&args->from.dir);
2959 2965 }
2960 2966
2961 2967 void
2962 2968 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2963 2969 struct svc_req *req, cred_t *cr, bool_t ro)
2964 2970 {
2965 2971 int error;
2966 2972 vnode_t *vp;
2967 2973 vnode_t *dvp;
2968 2974 struct vattr *vap;
2969 2975 struct vattr va;
2970 2976 struct vattr *bvap;
2971 2977 struct vattr bva;
2972 2978 struct vattr *avap;
2973 2979 struct vattr ava;
2974 2980 nfs_fh3 *fh3;
2975 2981 struct exportinfo *to_exi;
2976 2982 bslabel_t *clabel;
2977 2983 struct sockaddr *ca;
2978 2984 char *name = NULL;
2979 2985
2980 2986 vap = NULL;
2981 2987 bvap = NULL;
2982 2988 avap = NULL;
2983 2989 dvp = NULL;
2984 2990
2985 2991 vp = nfs3_fhtovp(&args->file, exi);
2986 2992
2987 2993 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2988 2994 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2989 2995 LINK3args *, args);
2990 2996
2991 2997 if (vp == NULL) {
2992 2998 error = ESTALE;
2993 2999 goto out;
2994 3000 }
2995 3001
2996 3002 va.va_mask = AT_ALL;
2997 3003 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2998 3004
2999 3005 fh3 = &args->link.dir;
3000 3006 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3001 3007 if (to_exi == NULL) {
3002 3008 resp->status = NFS3ERR_ACCES;
3003 3009 goto out1;
3004 3010 }
3005 3011 exi_rele(to_exi);
3006 3012
3007 3013 if (to_exi != exi) {
3008 3014 resp->status = NFS3ERR_XDEV;
3009 3015 goto out1;
3010 3016 }
3011 3017
3012 3018 if (is_system_labeled()) {
3013 3019 clabel = req->rq_label;
3014 3020
3015 3021 ASSERT(clabel != NULL);
3016 3022 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3017 3023 "got client label from request(1)", struct svc_req *, req);
3018 3024
3019 3025 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3020 3026 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3021 3027 exi)) {
3022 3028 resp->status = NFS3ERR_ACCES;
3023 3029 goto out1;
3024 3030 }
3025 3031 }
3026 3032 }
3027 3033
3028 3034 dvp = nfs3_fhtovp(&args->link.dir, exi);
3029 3035 if (dvp == NULL) {
3030 3036 error = ESTALE;
3031 3037 goto out;
3032 3038 }
3033 3039
3034 3040 bva.va_mask = AT_ALL;
3035 3041 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3036 3042
3037 3043 if (dvp->v_type != VDIR) {
3038 3044 resp->status = NFS3ERR_NOTDIR;
3039 3045 goto out1;
3040 3046 }
3041 3047
3042 3048 if (args->link.name == nfs3nametoolong) {
3043 3049 resp->status = NFS3ERR_NAMETOOLONG;
3044 3050 goto out1;
3045 3051 }
3046 3052
3047 3053 if (args->link.name == NULL || *(args->link.name) == '\0') {
3048 3054 resp->status = NFS3ERR_ACCES;
3049 3055 goto out1;
3050 3056 }
3051 3057
3052 3058 if (rdonly(ro, dvp)) {
3053 3059 resp->status = NFS3ERR_ROFS;
3054 3060 goto out1;
3055 3061 }
3056 3062
3057 3063 if (is_system_labeled()) {
3058 3064 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3059 3065 "got client label from request(1)", struct svc_req *, req);
3060 3066
3061 3067 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3062 3068 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3063 3069 exi)) {
3064 3070 resp->status = NFS3ERR_ACCES;
3065 3071 goto out1;
3066 3072 }
3067 3073 }
3068 3074 }
3069 3075
3070 3076 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3071 3077 name = nfscmd_convname(ca, exi, args->link.name,
3072 3078 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3073 3079
3074 3080 if (name == NULL) {
3075 3081 resp->status = NFS3ERR_SERVERFAULT;
3076 3082 goto out1;
3077 3083 }
3078 3084
3079 3085 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3080 3086
3081 3087 va.va_mask = AT_ALL;
3082 3088 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3083 3089 ava.va_mask = AT_ALL;
3084 3090 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3085 3091
3086 3092 /*
3087 3093 * Force modified data and metadata out to stable storage.
3088 3094 */
3089 3095 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3090 3096 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3091 3097
3092 3098 if (error)
3093 3099 goto out;
3094 3100
3095 3101 VN_RELE(dvp);
3096 3102
3097 3103 resp->status = NFS3_OK;
3098 3104 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3099 3105 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3100 3106
3101 3107 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3102 3108 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3103 3109 LINK3res *, resp);
3104 3110
3105 3111 VN_RELE(vp);
3106 3112
3107 3113 return;
3108 3114
3109 3115 out:
3110 3116 if (curthread->t_flag & T_WOULDBLOCK) {
3111 3117 curthread->t_flag &= ~T_WOULDBLOCK;
3112 3118 resp->status = NFS3ERR_JUKEBOX;
3113 3119 } else
3114 3120 resp->status = puterrno3(error);
3115 3121 out1:
3116 3122 if (name != NULL && name != args->link.name)
3117 3123 kmem_free(name, MAXPATHLEN + 1);
3118 3124
3119 3125 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3120 3126 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3121 3127 LINK3res *, resp);
3122 3128
3123 3129 if (vp != NULL)
3124 3130 VN_RELE(vp);
3125 3131 if (dvp != NULL)
3126 3132 VN_RELE(dvp);
3127 3133 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3128 3134 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3129 3135 }
3130 3136
3131 3137 void *
3132 3138 rfs3_link_getfh(LINK3args *args)
3133 3139 {
3134 3140
3135 3141 return (&args->file);
3136 3142 }
3137 3143
3138 3144 /*
3139 3145 * This macro defines the size of a response which contains attribute
3140 3146 * information and one directory entry (whose length is specified by
3141 3147 * the macro parameter). If the incoming request is larger than this,
3142 3148 * then we are guaranteed to be able to return at one directory entry
3143 3149 * if one exists. Therefore, we do not need to check for
3144 3150 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3145 3151 * is not, then we need to check to make sure that this error does not
3146 3152 * need to be returned.
3147 3153 *
3148 3154 * NFS3_READDIR_MIN_COUNT is comprised of following :
3149 3155 *
3150 3156 * status - 1 * BYTES_PER_XDR_UNIT
3151 3157 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3152 3158 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3153 3159 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3154 3160 * boolean - 1 * BYTES_PER_XDR_UNIT
3155 3161 * file id - 2 * BYTES_PER_XDR_UNIT
3156 3162 * directory name length - 1 * BYTES_PER_XDR_UNIT
3157 3163 * cookie - 2 * BYTES_PER_XDR_UNIT
3158 3164 * end of list - 1 * BYTES_PER_XDR_UNIT
3159 3165 * end of file - 1 * BYTES_PER_XDR_UNIT
3160 3166 * Name length of directory to the nearest byte
3161 3167 */
3162 3168
3163 3169 #define NFS3_READDIR_MIN_COUNT(length) \
3164 3170 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3165 3171 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3166 3172
3167 3173 /* ARGSUSED */
3168 3174 void
3169 3175 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3170 3176 struct svc_req *req, cred_t *cr, bool_t ro)
3171 3177 {
3172 3178 int error;
3173 3179 vnode_t *vp;
3174 3180 struct vattr *vap;
3175 3181 struct vattr va;
3176 3182 struct iovec iov;
3177 3183 struct uio uio;
3178 3184 char *data;
3179 3185 int iseof;
3180 3186 int bufsize;
3181 3187 int namlen;
3182 3188 uint_t count;
3183 3189 struct sockaddr *ca;
3184 3190
3185 3191 vap = NULL;
3186 3192
3187 3193 vp = nfs3_fhtovp(&args->dir, exi);
3188 3194
3189 3195 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3190 3196 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3191 3197 READDIR3args *, args);
3192 3198
3193 3199 if (vp == NULL) {
3194 3200 error = ESTALE;
3195 3201 goto out;
3196 3202 }
3197 3203
3198 3204 if (is_system_labeled()) {
3199 3205 bslabel_t *clabel = req->rq_label;
3200 3206
3201 3207 ASSERT(clabel != NULL);
3202 3208 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3203 3209 "got client label from request(1)", struct svc_req *, req);
3204 3210
3205 3211 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3206 3212 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3207 3213 exi)) {
3208 3214 resp->status = NFS3ERR_ACCES;
3209 3215 goto out1;
3210 3216 }
3211 3217 }
3212 3218 }
3213 3219
3214 3220 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3215 3221
3216 3222 va.va_mask = AT_ALL;
3217 3223 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3218 3224
3219 3225 if (vp->v_type != VDIR) {
3220 3226 resp->status = NFS3ERR_NOTDIR;
3221 3227 goto out1;
3222 3228 }
3223 3229
3224 3230 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3225 3231 if (error)
3226 3232 goto out;
3227 3233
3228 3234 /*
3229 3235 * Now don't allow arbitrary count to alloc;
3230 3236 * allow the maximum not to exceed rfs3_tsize()
3231 3237 */
3232 3238 if (args->count > rfs3_tsize(req))
3233 3239 args->count = rfs3_tsize(req);
3234 3240
3235 3241 /*
3236 3242 * Make sure that there is room to read at least one entry
3237 3243 * if any are available.
3238 3244 */
3239 3245 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3240 3246 count = DIRENT64_RECLEN(MAXNAMELEN);
3241 3247 else
3242 3248 count = args->count;
3243 3249
3244 3250 data = kmem_alloc(count, KM_SLEEP);
3245 3251
3246 3252 iov.iov_base = data;
3247 3253 iov.iov_len = count;
3248 3254 uio.uio_iov = &iov;
3249 3255 uio.uio_iovcnt = 1;
3250 3256 uio.uio_segflg = UIO_SYSSPACE;
3251 3257 uio.uio_extflg = UIO_COPY_CACHED;
3252 3258 uio.uio_loffset = (offset_t)args->cookie;
3253 3259 uio.uio_resid = count;
3254 3260
3255 3261 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3256 3262
3257 3263 va.va_mask = AT_ALL;
3258 3264 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3259 3265
3260 3266 if (error) {
3261 3267 kmem_free(data, count);
3262 3268 goto out;
3263 3269 }
3264 3270
3265 3271 /*
3266 3272 * If the count was not large enough to be able to guarantee
3267 3273 * to be able to return at least one entry, then need to
3268 3274 * check to see if NFS3ERR_TOOSMALL should be returned.
3269 3275 */
3270 3276 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3271 3277 /*
3272 3278 * bufsize is used to keep track of the size of the response.
3273 3279 * It is primed with:
3274 3280 * 1 for the status +
3275 3281 * 1 for the dir_attributes.attributes boolean +
3276 3282 * 2 for the cookie verifier
3277 3283 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3278 3284 * to bytes. If there are directory attributes to be
3279 3285 * returned, then:
3280 3286 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3281 3287 * time BYTES_PER_XDR_UNIT is added to account for them.
3282 3288 */
3283 3289 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3284 3290 if (vap != NULL)
3285 3291 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3286 3292 /*
3287 3293 * An entry is composed of:
3288 3294 * 1 for the true/false list indicator +
3289 3295 * 2 for the fileid +
3290 3296 * 1 for the length of the name +
3291 3297 * 2 for the cookie +
3292 3298 * all times BYTES_PER_XDR_UNIT to convert from
3293 3299 * XDR units to bytes, plus the length of the name
3294 3300 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3295 3301 */
3296 3302 if (count != uio.uio_resid) {
3297 3303 namlen = strlen(((struct dirent64 *)data)->d_name);
3298 3304 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3299 3305 roundup(namlen, BYTES_PER_XDR_UNIT);
3300 3306 }
3301 3307 /*
3302 3308 * We need to check to see if the number of bytes left
3303 3309 * to go into the buffer will actually fit into the
3304 3310 * buffer. This is calculated as the size of this
3305 3311 * entry plus:
3306 3312 * 1 for the true/false list indicator +
3307 3313 * 1 for the eof indicator
3308 3314 * times BYTES_PER_XDR_UNIT to convert from from
3309 3315 * XDR units to bytes.
3310 3316 */
3311 3317 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3312 3318 if (bufsize > args->count) {
3313 3319 kmem_free(data, count);
3314 3320 resp->status = NFS3ERR_TOOSMALL;
3315 3321 goto out1;
3316 3322 }
3317 3323 }
3318 3324
3319 3325 /*
3320 3326 * Have a valid readir buffer for the native character
3321 3327 * set. Need to check if a conversion is necessary and
3322 3328 * potentially rewrite the whole buffer. Note that if the
3323 3329 * conversion expands names enough, the structure may not
3324 3330 * fit. In this case, we need to drop entries until if fits
3325 3331 * and patch the counts in order that the next readdir will
3326 3332 * get the correct entries.
3327 3333 */
3328 3334 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3329 3335 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3330 3336
3331 3337
3332 3338 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3333 3339
3334 3340 #if 0 /* notyet */
3335 3341 /*
3336 3342 * Don't do this. It causes local disk writes when just
3337 3343 * reading the file and the overhead is deemed larger
3338 3344 * than the benefit.
3339 3345 */
3340 3346 /*
3341 3347 * Force modified metadata out to stable storage.
3342 3348 */
3343 3349 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3344 3350 #endif
3345 3351
3346 3352 resp->status = NFS3_OK;
3347 3353 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3348 3354 resp->resok.cookieverf = 0;
3349 3355 resp->resok.reply.entries = (entry3 *)data;
3350 3356 resp->resok.reply.eof = iseof;
3351 3357 resp->resok.size = count - uio.uio_resid;
3352 3358 resp->resok.count = args->count;
3353 3359 resp->resok.freecount = count;
3354 3360
3355 3361 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3356 3362 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3357 3363 READDIR3res *, resp);
3358 3364
3359 3365 VN_RELE(vp);
3360 3366
3361 3367 return;
3362 3368
3363 3369 out:
3364 3370 if (curthread->t_flag & T_WOULDBLOCK) {
3365 3371 curthread->t_flag &= ~T_WOULDBLOCK;
3366 3372 resp->status = NFS3ERR_JUKEBOX;
3367 3373 } else
3368 3374 resp->status = puterrno3(error);
3369 3375 out1:
3370 3376 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3371 3377
3372 3378 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3373 3379 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3374 3380 READDIR3res *, resp);
3375 3381
3376 3382 if (vp != NULL) {
3377 3383 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3378 3384 VN_RELE(vp);
3379 3385 }
3380 3386 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3381 3387 }
3382 3388
3383 3389 void *
3384 3390 rfs3_readdir_getfh(READDIR3args *args)
3385 3391 {
3386 3392
3387 3393 return (&args->dir);
3388 3394 }
3389 3395
3390 3396 void
3391 3397 rfs3_readdir_free(READDIR3res *resp)
3392 3398 {
3393 3399
3394 3400 if (resp->status == NFS3_OK)
3395 3401 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3396 3402 }
3397 3403
3398 3404 #ifdef nextdp
3399 3405 #undef nextdp
3400 3406 #endif
3401 3407 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3402 3408
3403 3409 /*
3404 3410 * This macro computes the size of a response which contains
3405 3411 * one directory entry including the attributes as well as file handle.
3406 3412 * If the incoming request is larger than this, then we are guaranteed to be
3407 3413 * able to return at least one more directory entry if one exists.
3408 3414 *
3409 3415 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3410 3416 *
3411 3417 * boolean - 1 * BYTES_PER_XDR_UNIT
3412 3418 * file id - 2 * BYTES_PER_XDR_UNIT
3413 3419 * directory name length - 1 * BYTES_PER_XDR_UNIT
3414 3420 * cookie - 2 * BYTES_PER_XDR_UNIT
3415 3421 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3416 3422 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3417 3423 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3418 3424 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3419 3425 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3420 3426 * name length of the entry to the nearest bytes
3421 3427 */
3422 3428 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3423 3429 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3424 3430 BYTES_PER_XDR_UNIT + \
3425 3431 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3426 3432
3427 3433 static int rfs3_readdir_unit = MAXBSIZE;
3428 3434
3429 3435 /* ARGSUSED */
3430 3436 void
3431 3437 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3432 3438 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3433 3439 {
3434 3440 int error;
3435 3441 vnode_t *vp;
3436 3442 struct vattr *vap;
3437 3443 struct vattr va;
3438 3444 struct iovec iov;
3439 3445 struct uio uio;
3440 3446 char *data;
3441 3447 int iseof;
3442 3448 struct dirent64 *dp;
3443 3449 vnode_t *nvp;
3444 3450 struct vattr *nvap;
3445 3451 struct vattr nva;
3446 3452 entryplus3_info *infop = NULL;
3447 3453 int size = 0;
3448 3454 int nents = 0;
3449 3455 int bufsize = 0;
3450 3456 int entrysize = 0;
3451 3457 int tofit = 0;
3452 3458 int rd_unit = rfs3_readdir_unit;
3453 3459 int prev_len;
3454 3460 int space_left;
3455 3461 int i;
3456 3462 uint_t *namlen = NULL;
3457 3463 char *ndata = NULL;
3458 3464 struct sockaddr *ca;
3459 3465 size_t ret;
3460 3466
3461 3467 vap = NULL;
3462 3468
3463 3469 vp = nfs3_fhtovp(&args->dir, exi);
3464 3470
3465 3471 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3466 3472 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3467 3473 READDIRPLUS3args *, args);
3468 3474
3469 3475 if (vp == NULL) {
3470 3476 error = ESTALE;
3471 3477 goto out;
3472 3478 }
3473 3479
3474 3480 if (is_system_labeled()) {
3475 3481 bslabel_t *clabel = req->rq_label;
3476 3482
3477 3483 ASSERT(clabel != NULL);
3478 3484 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3479 3485 char *, "got client label from request(1)",
3480 3486 struct svc_req *, req);
3481 3487
3482 3488 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3483 3489 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3484 3490 exi)) {
3485 3491 resp->status = NFS3ERR_ACCES;
3486 3492 goto out1;
3487 3493 }
3488 3494 }
3489 3495 }
3490 3496
3491 3497 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3492 3498
3493 3499 va.va_mask = AT_ALL;
3494 3500 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3495 3501
3496 3502 if (vp->v_type != VDIR) {
3497 3503 error = ENOTDIR;
3498 3504 goto out;
3499 3505 }
3500 3506
3501 3507 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3502 3508 if (error)
3503 3509 goto out;
3504 3510
3505 3511 /*
3506 3512 * Don't allow arbitrary counts for allocation
3507 3513 */
3508 3514 if (args->maxcount > rfs3_tsize(req))
3509 3515 args->maxcount = rfs3_tsize(req);
3510 3516
3511 3517 /*
3512 3518 * Make sure that there is room to read at least one entry
3513 3519 * if any are available
3514 3520 */
3515 3521 args->dircount = MIN(args->dircount, args->maxcount);
3516 3522
3517 3523 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3518 3524 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3519 3525
3520 3526 /*
3521 3527 * This allocation relies on a minimum directory entry
3522 3528 * being roughly 24 bytes. Therefore, the namlen array
3523 3529 * will have enough space based on the maximum number of
3524 3530 * entries to read.
3525 3531 */
3526 3532 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3527 3533
3528 3534 space_left = args->dircount;
3529 3535 data = kmem_alloc(args->dircount, KM_SLEEP);
3530 3536 dp = (struct dirent64 *)data;
3531 3537 uio.uio_iov = &iov;
3532 3538 uio.uio_iovcnt = 1;
3533 3539 uio.uio_segflg = UIO_SYSSPACE;
3534 3540 uio.uio_extflg = UIO_COPY_CACHED;
3535 3541 uio.uio_loffset = (offset_t)args->cookie;
3536 3542
3537 3543 /*
3538 3544 * bufsize is used to keep track of the size of the response as we
3539 3545 * get post op attributes and filehandles for each entry. This is
3540 3546 * an optimization as the server may have read more entries than will
3541 3547 * fit in the buffer specified by maxcount. We stop calculating
3542 3548 * post op attributes and filehandles once we have exceeded maxcount.
3543 3549 * This will minimize the effect of truncation.
3544 3550 *
3545 3551 * It is primed with:
3546 3552 * 1 for the status +
3547 3553 * 1 for the dir_attributes.attributes boolean +
3548 3554 * 2 for the cookie verifier
3549 3555 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3550 3556 * to bytes. If there are directory attributes to be
3551 3557 * returned, then:
3552 3558 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3553 3559 * time BYTES_PER_XDR_UNIT is added to account for them.
3554 3560 */
3555 3561 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3556 3562 if (vap != NULL)
3557 3563 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3558 3564
3559 3565 getmoredents:
3560 3566 /*
3561 3567 * Here we make a check so that our read unit is not larger than
3562 3568 * the space left in the buffer.
3563 3569 */
3564 3570 rd_unit = MIN(rd_unit, space_left);
3565 3571 iov.iov_base = (char *)dp;
3566 3572 iov.iov_len = rd_unit;
3567 3573 uio.uio_resid = rd_unit;
3568 3574 prev_len = rd_unit;
3569 3575
3570 3576 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3571 3577
3572 3578 if (error) {
3573 3579 kmem_free(data, args->dircount);
3574 3580 goto out;
3575 3581 }
3576 3582
3577 3583 if (uio.uio_resid == prev_len && !iseof) {
3578 3584 if (nents == 0) {
3579 3585 kmem_free(data, args->dircount);
3580 3586 resp->status = NFS3ERR_TOOSMALL;
3581 3587 goto out1;
3582 3588 }
3583 3589
3584 3590 /*
3585 3591 * We could not get any more entries, so get the attributes
3586 3592 * and filehandle for the entries already obtained.
3587 3593 */
3588 3594 goto good;
3589 3595 }
3590 3596
3591 3597 /*
3592 3598 * We estimate the size of the response by assuming the
3593 3599 * entry exists and attributes and filehandle are also valid
3594 3600 */
3595 3601 for (size = prev_len - uio.uio_resid;
3596 3602 size > 0;
3597 3603 size -= dp->d_reclen, dp = nextdp(dp)) {
3598 3604
3599 3605 if (dp->d_ino == 0) {
3600 3606 nents++;
3601 3607 continue;
3602 3608 }
3603 3609
3604 3610 namlen[nents] = strlen(dp->d_name);
3605 3611 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3606 3612
3607 3613 /*
3608 3614 * We need to check to see if the number of bytes left
3609 3615 * to go into the buffer will actually fit into the
3610 3616 * buffer. This is calculated as the size of this
3611 3617 * entry plus:
3612 3618 * 1 for the true/false list indicator +
3613 3619 * 1 for the eof indicator
3614 3620 * times BYTES_PER_XDR_UNIT to convert from XDR units
3615 3621 * to bytes.
3616 3622 *
3617 3623 * Also check the dircount limit against the first entry read
3618 3624 *
3619 3625 */
3620 3626 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3621 3627 if (bufsize + tofit > args->maxcount) {
3622 3628 /*
3623 3629 * We make a check here to see if this was the
3624 3630 * first entry being measured. If so, then maxcount
3625 3631 * was too small to begin with and so we need to
3626 3632 * return with NFS3ERR_TOOSMALL.
3627 3633 */
3628 3634 if (nents == 0) {
3629 3635 kmem_free(data, args->dircount);
3630 3636 resp->status = NFS3ERR_TOOSMALL;
3631 3637 goto out1;
3632 3638 }
3633 3639 iseof = FALSE;
3634 3640 goto good;
3635 3641 }
3636 3642 bufsize += entrysize;
3637 3643 nents++;
3638 3644 }
3639 3645
3640 3646 /*
3641 3647 * If there is enough room to fit at least 1 more entry including
3642 3648 * post op attributes and filehandle in the buffer AND that we haven't
3643 3649 * exceeded dircount then go back and get some more.
3644 3650 */
3645 3651 if (!iseof &&
3646 3652 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3647 3653 space_left -= (prev_len - uio.uio_resid);
3648 3654 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3649 3655 goto getmoredents;
3650 3656
3651 3657 /* else, fall through */
3652 3658 }
3653 3659 good:
3654 3660 va.va_mask = AT_ALL;
3655 3661 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3656 3662
3657 3663 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3658 3664
3659 3665 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3660 3666 resp->resok.infop = infop;
3661 3667
3662 3668 dp = (struct dirent64 *)data;
3663 3669 for (i = 0; i < nents; i++) {
3664 3670
3665 3671 if (dp->d_ino == 0) {
3666 3672 infop[i].attr.attributes = FALSE;
3667 3673 infop[i].fh.handle_follows = FALSE;
3668 3674 dp = nextdp(dp);
3669 3675 continue;
3670 3676 }
3671 3677
3672 3678 infop[i].namelen = namlen[i];
3673 3679
3674 3680 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3675 3681 NULL, NULL, NULL);
3676 3682 if (error) {
3677 3683 infop[i].attr.attributes = FALSE;
3678 3684 infop[i].fh.handle_follows = FALSE;
3679 3685 dp = nextdp(dp);
3680 3686 continue;
3681 3687 }
3682 3688
3683 3689 nva.va_mask = AT_ALL;
3684 3690 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3685 3691
3686 3692 /* Lie about the object type for a referral */
3687 3693 if (vn_is_nfs_reparse(nvp, cr))
3688 3694 nvap->va_type = VLNK;
3689 3695
3690 3696 if (vn_ismntpt(nvp)) {
3691 3697 infop[i].attr.attributes = FALSE;
3692 3698 infop[i].fh.handle_follows = FALSE;
3693 3699 } else {
3694 3700 vattr_to_post_op_attr(nvap, &infop[i].attr);
3695 3701
3696 3702 error = makefh3(&infop[i].fh.handle, nvp, exi);
3697 3703 if (!error)
3698 3704 infop[i].fh.handle_follows = TRUE;
3699 3705 else
3700 3706 infop[i].fh.handle_follows = FALSE;
3701 3707 }
3702 3708
3703 3709 VN_RELE(nvp);
3704 3710 dp = nextdp(dp);
3705 3711 }
3706 3712
3707 3713 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3708 3714 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3709 3715 if (ndata == NULL)
3710 3716 ndata = data;
3711 3717
3712 3718 if (ret > 0) {
3713 3719 /*
3714 3720 * We had to drop one or more entries in order to fit
3715 3721 * during the character conversion. We need to patch
3716 3722 * up the size and eof info.
3717 3723 */
3718 3724 if (iseof)
3719 3725 iseof = FALSE;
3720 3726
3721 3727 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3722 3728 nents, ret);
3723 3729 }
3724 3730
3725 3731
3726 3732 #if 0 /* notyet */
3727 3733 /*
3728 3734 * Don't do this. It causes local disk writes when just
3729 3735 * reading the file and the overhead is deemed larger
3730 3736 * than the benefit.
3731 3737 */
3732 3738 /*
3733 3739 * Force modified metadata out to stable storage.
3734 3740 */
3735 3741 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3736 3742 #endif
3737 3743
3738 3744 kmem_free(namlen, args->dircount);
3739 3745
3740 3746 resp->status = NFS3_OK;
3741 3747 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3742 3748 resp->resok.cookieverf = 0;
3743 3749 resp->resok.reply.entries = (entryplus3 *)ndata;
3744 3750 resp->resok.reply.eof = iseof;
3745 3751 resp->resok.size = nents;
3746 3752 resp->resok.count = args->dircount - ret;
3747 3753 resp->resok.maxcount = args->maxcount;
3748 3754
3749 3755 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3750 3756 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3751 3757 READDIRPLUS3res *, resp);
3752 3758
3753 3759 VN_RELE(vp);
3754 3760
3755 3761 return;
3756 3762
3757 3763 out:
3758 3764 if (curthread->t_flag & T_WOULDBLOCK) {
3759 3765 curthread->t_flag &= ~T_WOULDBLOCK;
3760 3766 resp->status = NFS3ERR_JUKEBOX;
3761 3767 } else {
3762 3768 resp->status = puterrno3(error);
3763 3769 }
3764 3770 out1:
3765 3771 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3766 3772
3767 3773 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3768 3774 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3769 3775 READDIRPLUS3res *, resp);
3770 3776
3771 3777 if (vp != NULL) {
3772 3778 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3773 3779 VN_RELE(vp);
3774 3780 }
3775 3781
3776 3782 if (namlen != NULL)
3777 3783 kmem_free(namlen, args->dircount);
3778 3784
3779 3785 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3780 3786 }
3781 3787
3782 3788 void *
3783 3789 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3784 3790 {
3785 3791
3786 3792 return (&args->dir);
3787 3793 }
3788 3794
3789 3795 void
3790 3796 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3791 3797 {
3792 3798
3793 3799 if (resp->status == NFS3_OK) {
3794 3800 kmem_free(resp->resok.reply.entries, resp->resok.count);
3795 3801 kmem_free(resp->resok.infop,
3796 3802 resp->resok.size * sizeof (struct entryplus3_info));
3797 3803 }
3798 3804 }
3799 3805
3800 3806 /* ARGSUSED */
3801 3807 void
3802 3808 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3803 3809 struct svc_req *req, cred_t *cr, bool_t ro)
3804 3810 {
3805 3811 int error;
3806 3812 vnode_t *vp;
3807 3813 struct vattr *vap;
3808 3814 struct vattr va;
3809 3815 struct statvfs64 sb;
3810 3816
3811 3817 vap = NULL;
3812 3818
3813 3819 vp = nfs3_fhtovp(&args->fsroot, exi);
3814 3820
3815 3821 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3816 3822 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3817 3823 FSSTAT3args *, args);
3818 3824
3819 3825 if (vp == NULL) {
3820 3826 error = ESTALE;
3821 3827 goto out;
3822 3828 }
3823 3829
3824 3830 if (is_system_labeled()) {
3825 3831 bslabel_t *clabel = req->rq_label;
3826 3832
3827 3833 ASSERT(clabel != NULL);
3828 3834 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3829 3835 "got client label from request(1)", struct svc_req *, req);
3830 3836
3831 3837 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3832 3838 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3833 3839 exi)) {
3834 3840 resp->status = NFS3ERR_ACCES;
3835 3841 goto out1;
3836 3842 }
3837 3843 }
3838 3844 }
3839 3845
3840 3846 error = VFS_STATVFS(vp->v_vfsp, &sb);
3841 3847
3842 3848 va.va_mask = AT_ALL;
3843 3849 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3844 3850
3845 3851 if (error)
3846 3852 goto out;
3847 3853
3848 3854 resp->status = NFS3_OK;
3849 3855 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3850 3856 if (sb.f_blocks != (fsblkcnt64_t)-1)
3851 3857 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3852 3858 else
3853 3859 resp->resok.tbytes = (size3)sb.f_blocks;
3854 3860 if (sb.f_bfree != (fsblkcnt64_t)-1)
3855 3861 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3856 3862 else
3857 3863 resp->resok.fbytes = (size3)sb.f_bfree;
3858 3864 if (sb.f_bavail != (fsblkcnt64_t)-1)
3859 3865 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3860 3866 else
3861 3867 resp->resok.abytes = (size3)sb.f_bavail;
3862 3868 resp->resok.tfiles = (size3)sb.f_files;
3863 3869 resp->resok.ffiles = (size3)sb.f_ffree;
3864 3870 resp->resok.afiles = (size3)sb.f_favail;
3865 3871 resp->resok.invarsec = 0;
3866 3872
3867 3873 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3868 3874 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3869 3875 FSSTAT3res *, resp);
3870 3876 VN_RELE(vp);
3871 3877
3872 3878 return;
3873 3879
3874 3880 out:
3875 3881 if (curthread->t_flag & T_WOULDBLOCK) {
3876 3882 curthread->t_flag &= ~T_WOULDBLOCK;
3877 3883 resp->status = NFS3ERR_JUKEBOX;
3878 3884 } else
3879 3885 resp->status = puterrno3(error);
3880 3886 out1:
3881 3887 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3882 3888 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3883 3889 FSSTAT3res *, resp);
3884 3890
3885 3891 if (vp != NULL)
3886 3892 VN_RELE(vp);
3887 3893 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3888 3894 }
3889 3895
3890 3896 void *
3891 3897 rfs3_fsstat_getfh(FSSTAT3args *args)
3892 3898 {
3893 3899
3894 3900 return (&args->fsroot);
3895 3901 }
3896 3902
3897 3903 /* ARGSUSED */
3898 3904 void
3899 3905 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3900 3906 struct svc_req *req, cred_t *cr, bool_t ro)
3901 3907 {
3902 3908 vnode_t *vp;
3903 3909 struct vattr *vap;
3904 3910 struct vattr va;
3905 3911 uint32_t xfer_size;
3906 3912 ulong_t l = 0;
3907 3913 int error;
3908 3914
3909 3915 vp = nfs3_fhtovp(&args->fsroot, exi);
3910 3916
3911 3917 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3912 3918 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3913 3919 FSINFO3args *, args);
3914 3920
3915 3921 if (vp == NULL) {
3916 3922 if (curthread->t_flag & T_WOULDBLOCK) {
3917 3923 curthread->t_flag &= ~T_WOULDBLOCK;
3918 3924 resp->status = NFS3ERR_JUKEBOX;
3919 3925 } else
3920 3926 resp->status = NFS3ERR_STALE;
3921 3927 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3922 3928 goto out;
3923 3929 }
3924 3930
3925 3931 if (is_system_labeled()) {
3926 3932 bslabel_t *clabel = req->rq_label;
3927 3933
3928 3934 ASSERT(clabel != NULL);
3929 3935 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3930 3936 "got client label from request(1)", struct svc_req *, req);
3931 3937
3932 3938 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3933 3939 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3934 3940 exi)) {
3935 3941 resp->status = NFS3ERR_STALE;
3936 3942 vattr_to_post_op_attr(NULL,
3937 3943 &resp->resfail.obj_attributes);
3938 3944 goto out;
3939 3945 }
3940 3946 }
3941 3947 }
3942 3948
3943 3949 va.va_mask = AT_ALL;
3944 3950 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3945 3951
3946 3952 resp->status = NFS3_OK;
3947 3953 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3948 3954 xfer_size = rfs3_tsize(req);
3949 3955 resp->resok.rtmax = xfer_size;
3950 3956 resp->resok.rtpref = xfer_size;
3951 3957 resp->resok.rtmult = DEV_BSIZE;
3952 3958 resp->resok.wtmax = xfer_size;
3953 3959 resp->resok.wtpref = xfer_size;
3954 3960 resp->resok.wtmult = DEV_BSIZE;
3955 3961 resp->resok.dtpref = MAXBSIZE;
3956 3962
3957 3963 /*
3958 3964 * Large file spec: want maxfilesize based on limit of
3959 3965 * underlying filesystem. We can guess 2^31-1 if need be.
3960 3966 */
3961 3967 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3962 3968 if (error) {
3963 3969 resp->status = puterrno3(error);
3964 3970 goto out;
3965 3971 }
3966 3972
3967 3973 /*
3968 3974 * If the underlying file system does not support _PC_FILESIZEBITS,
3969 3975 * return a reasonable default. Note that error code on VOP_PATHCONF
3970 3976 * will be 0, even if the underlying file system does not support
3971 3977 * _PC_FILESIZEBITS.
3972 3978 */
3973 3979 if (l == (ulong_t)-1) {
3974 3980 resp->resok.maxfilesize = MAXOFF32_T;
3975 3981 } else {
3976 3982 if (l >= (sizeof (uint64_t) * 8))
3977 3983 resp->resok.maxfilesize = INT64_MAX;
3978 3984 else
3979 3985 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3980 3986 }
3981 3987
3982 3988 resp->resok.time_delta.seconds = 0;
3983 3989 resp->resok.time_delta.nseconds = 1000;
3984 3990 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3985 3991 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3986 3992
3987 3993 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3988 3994 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3989 3995 FSINFO3res *, resp);
3990 3996
3991 3997 VN_RELE(vp);
3992 3998
3993 3999 return;
3994 4000
3995 4001 out:
3996 4002 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3997 4003 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
3998 4004 FSINFO3res *, resp);
3999 4005 if (vp != NULL)
4000 4006 VN_RELE(vp);
4001 4007 }
4002 4008
4003 4009 void *
4004 4010 rfs3_fsinfo_getfh(FSINFO3args *args)
4005 4011 {
4006 4012 return (&args->fsroot);
4007 4013 }
4008 4014
4009 4015 /* ARGSUSED */
4010 4016 void
4011 4017 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4012 4018 struct svc_req *req, cred_t *cr, bool_t ro)
4013 4019 {
4014 4020 int error;
4015 4021 vnode_t *vp;
4016 4022 struct vattr *vap;
4017 4023 struct vattr va;
4018 4024 ulong_t val;
4019 4025
4020 4026 vap = NULL;
4021 4027
4022 4028 vp = nfs3_fhtovp(&args->object, exi);
4023 4029
4024 4030 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4025 4031 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4026 4032 PATHCONF3args *, args);
4027 4033
4028 4034 if (vp == NULL) {
4029 4035 error = ESTALE;
4030 4036 goto out;
4031 4037 }
4032 4038
4033 4039 if (is_system_labeled()) {
4034 4040 bslabel_t *clabel = req->rq_label;
4035 4041
4036 4042 ASSERT(clabel != NULL);
4037 4043 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4038 4044 "got client label from request(1)", struct svc_req *, req);
4039 4045
4040 4046 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4041 4047 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4042 4048 exi)) {
4043 4049 resp->status = NFS3ERR_ACCES;
4044 4050 goto out1;
4045 4051 }
4046 4052 }
4047 4053 }
4048 4054
4049 4055 va.va_mask = AT_ALL;
4050 4056 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4051 4057
4052 4058 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4053 4059 if (error)
4054 4060 goto out;
4055 4061 resp->resok.info.link_max = (uint32)val;
4056 4062
4057 4063 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4058 4064 if (error)
4059 4065 goto out;
4060 4066 resp->resok.info.name_max = (uint32)val;
4061 4067
4062 4068 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4063 4069 if (error)
4064 4070 goto out;
4065 4071 if (val == 1)
4066 4072 resp->resok.info.no_trunc = TRUE;
4067 4073 else
4068 4074 resp->resok.info.no_trunc = FALSE;
4069 4075
4070 4076 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4071 4077 if (error)
4072 4078 goto out;
4073 4079 if (val == 1)
4074 4080 resp->resok.info.chown_restricted = TRUE;
4075 4081 else
4076 4082 resp->resok.info.chown_restricted = FALSE;
4077 4083
4078 4084 resp->status = NFS3_OK;
4079 4085 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4080 4086 resp->resok.info.case_insensitive = FALSE;
4081 4087 resp->resok.info.case_preserving = TRUE;
4082 4088 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4083 4089 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4084 4090 PATHCONF3res *, resp);
4085 4091 VN_RELE(vp);
4086 4092 return;
4087 4093
4088 4094 out:
4089 4095 if (curthread->t_flag & T_WOULDBLOCK) {
4090 4096 curthread->t_flag &= ~T_WOULDBLOCK;
4091 4097 resp->status = NFS3ERR_JUKEBOX;
4092 4098 } else
4093 4099 resp->status = puterrno3(error);
4094 4100 out1:
4095 4101 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4096 4102 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4097 4103 PATHCONF3res *, resp);
4098 4104 if (vp != NULL)
4099 4105 VN_RELE(vp);
4100 4106 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4101 4107 }
4102 4108
4103 4109 void *
4104 4110 rfs3_pathconf_getfh(PATHCONF3args *args)
4105 4111 {
4106 4112
4107 4113 return (&args->object);
4108 4114 }
4109 4115
4110 4116 void
4111 4117 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4112 4118 struct svc_req *req, cred_t *cr, bool_t ro)
4113 4119 {
4114 4120 nfs3_srv_t *ns;
4115 4121 int error;
4116 4122 vnode_t *vp;
4117 4123 struct vattr *bvap;
4118 4124 struct vattr bva;
4119 4125 struct vattr *avap;
4120 4126 struct vattr ava;
4121 4127
4122 4128 bvap = NULL;
4123 4129 avap = NULL;
4124 4130
4125 4131 vp = nfs3_fhtovp(&args->file, exi);
|
↓ open down ↓ |
1442 lines elided |
↑ open up ↑ |
4126 4132
4127 4133 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4128 4134 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4129 4135 COMMIT3args *, args);
4130 4136
4131 4137 if (vp == NULL) {
4132 4138 error = ESTALE;
4133 4139 goto out;
4134 4140 }
4135 4141
4142 + ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
4136 4143 ns = nfs3_get_srv();
4137 4144 bva.va_mask = AT_ALL;
4138 4145 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4139 4146
4140 4147 /*
4141 4148 * If we can't get the attributes, then we can't do the
4142 4149 * right access checking. So, we'll fail the request.
4143 4150 */
4144 4151 if (error)
4145 4152 goto out;
4146 4153
4147 4154 bvap = &bva;
4148 4155
4149 4156 if (rdonly(ro, vp)) {
4150 4157 resp->status = NFS3ERR_ROFS;
4151 4158 goto out1;
4152 4159 }
4153 4160
4154 4161 if (vp->v_type != VREG) {
4155 4162 resp->status = NFS3ERR_INVAL;
4156 4163 goto out1;
4157 4164 }
4158 4165
4159 4166 if (is_system_labeled()) {
4160 4167 bslabel_t *clabel = req->rq_label;
4161 4168
4162 4169 ASSERT(clabel != NULL);
4163 4170 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4164 4171 "got client label from request(1)", struct svc_req *, req);
4165 4172
4166 4173 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4167 4174 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4168 4175 exi)) {
4169 4176 resp->status = NFS3ERR_ACCES;
4170 4177 goto out1;
4171 4178 }
4172 4179 }
4173 4180 }
4174 4181
4175 4182 if (crgetuid(cr) != bva.va_uid &&
4176 4183 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4177 4184 goto out;
4178 4185
4179 4186 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4180 4187
4181 4188 ava.va_mask = AT_ALL;
4182 4189 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4183 4190
4184 4191 if (error)
4185 4192 goto out;
4186 4193
4187 4194 resp->status = NFS3_OK;
4188 4195 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4189 4196 resp->resok.verf = ns->write3verf;
4190 4197
4191 4198 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4192 4199 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4193 4200 COMMIT3res *, resp);
4194 4201
4195 4202 VN_RELE(vp);
4196 4203
4197 4204 return;
4198 4205
4199 4206 out:
4200 4207 if (curthread->t_flag & T_WOULDBLOCK) {
4201 4208 curthread->t_flag &= ~T_WOULDBLOCK;
4202 4209 resp->status = NFS3ERR_JUKEBOX;
4203 4210 } else
4204 4211 resp->status = puterrno3(error);
4205 4212 out1:
4206 4213 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4207 4214 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4208 4215 COMMIT3res *, resp);
4209 4216
4210 4217 if (vp != NULL)
4211 4218 VN_RELE(vp);
4212 4219 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4213 4220 }
4214 4221
4215 4222 void *
4216 4223 rfs3_commit_getfh(COMMIT3args *args)
4217 4224 {
4218 4225
4219 4226 return (&args->file);
4220 4227 }
4221 4228
4222 4229 static int
4223 4230 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4224 4231 {
4225 4232
4226 4233 vap->va_mask = 0;
4227 4234
4228 4235 if (sap->mode.set_it) {
4229 4236 vap->va_mode = (mode_t)sap->mode.mode;
4230 4237 vap->va_mask |= AT_MODE;
4231 4238 }
4232 4239 if (sap->uid.set_it) {
4233 4240 vap->va_uid = (uid_t)sap->uid.uid;
4234 4241 vap->va_mask |= AT_UID;
4235 4242 }
4236 4243 if (sap->gid.set_it) {
4237 4244 vap->va_gid = (gid_t)sap->gid.gid;
4238 4245 vap->va_mask |= AT_GID;
4239 4246 }
4240 4247 if (sap->size.set_it) {
4241 4248 if (sap->size.size > (size3)((u_longlong_t)-1))
4242 4249 return (EINVAL);
4243 4250 vap->va_size = sap->size.size;
4244 4251 vap->va_mask |= AT_SIZE;
4245 4252 }
4246 4253 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4247 4254 #ifndef _LP64
4248 4255 /* check time validity */
4249 4256 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4250 4257 return (EOVERFLOW);
4251 4258 #endif
4252 4259 /*
4253 4260 * nfs protocol defines times as unsigned so don't extend sign,
4254 4261 * unless sysadmin set nfs_allow_preepoch_time.
4255 4262 */
4256 4263 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4257 4264 sap->atime.atime.seconds);
4258 4265 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4259 4266 vap->va_mask |= AT_ATIME;
4260 4267 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4261 4268 gethrestime(&vap->va_atime);
4262 4269 vap->va_mask |= AT_ATIME;
4263 4270 }
4264 4271 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4265 4272 #ifndef _LP64
4266 4273 /* check time validity */
4267 4274 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4268 4275 return (EOVERFLOW);
4269 4276 #endif
4270 4277 /*
4271 4278 * nfs protocol defines times as unsigned so don't extend sign,
4272 4279 * unless sysadmin set nfs_allow_preepoch_time.
4273 4280 */
4274 4281 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4275 4282 sap->mtime.mtime.seconds);
4276 4283 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4277 4284 vap->va_mask |= AT_MTIME;
4278 4285 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4279 4286 gethrestime(&vap->va_mtime);
4280 4287 vap->va_mask |= AT_MTIME;
4281 4288 }
4282 4289
4283 4290 return (0);
4284 4291 }
4285 4292
4286 4293 static const ftype3 vt_to_nf3[] = {
4287 4294 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4288 4295 };
4289 4296
4290 4297 static int
4291 4298 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4292 4299 {
4293 4300
4294 4301 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4295 4302 /* Return error if time or size overflow */
4296 4303 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4297 4304 return (EOVERFLOW);
4298 4305 }
4299 4306 fap->type = vt_to_nf3[vap->va_type];
4300 4307 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4301 4308 fap->nlink = (uint32)vap->va_nlink;
4302 4309 if (vap->va_uid == UID_NOBODY)
4303 4310 fap->uid = (uid3)NFS_UID_NOBODY;
4304 4311 else
4305 4312 fap->uid = (uid3)vap->va_uid;
4306 4313 if (vap->va_gid == GID_NOBODY)
4307 4314 fap->gid = (gid3)NFS_GID_NOBODY;
4308 4315 else
4309 4316 fap->gid = (gid3)vap->va_gid;
4310 4317 fap->size = (size3)vap->va_size;
4311 4318 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4312 4319 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4313 4320 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4314 4321 fap->fsid = (uint64)vap->va_fsid;
4315 4322 fap->fileid = (fileid3)vap->va_nodeid;
4316 4323 fap->atime.seconds = vap->va_atime.tv_sec;
4317 4324 fap->atime.nseconds = vap->va_atime.tv_nsec;
4318 4325 fap->mtime.seconds = vap->va_mtime.tv_sec;
4319 4326 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4320 4327 fap->ctime.seconds = vap->va_ctime.tv_sec;
4321 4328 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4322 4329 return (0);
4323 4330 }
4324 4331
4325 4332 static int
4326 4333 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4327 4334 {
4328 4335
4329 4336 /* Return error if time or size overflow */
4330 4337 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4331 4338 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4332 4339 NFS3_SIZE_OK(vap->va_size))) {
4333 4340 return (EOVERFLOW);
4334 4341 }
4335 4342 wccap->size = (size3)vap->va_size;
4336 4343 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4337 4344 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4338 4345 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4339 4346 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4340 4347 return (0);
4341 4348 }
4342 4349
4343 4350 static void
4344 4351 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4345 4352 {
4346 4353
4347 4354 /* don't return attrs if time overflow */
4348 4355 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4349 4356 poap->attributes = TRUE;
4350 4357 } else
4351 4358 poap->attributes = FALSE;
4352 4359 }
4353 4360
4354 4361 void
4355 4362 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4356 4363 {
4357 4364
4358 4365 /* don't return attrs if time overflow */
4359 4366 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4360 4367 poap->attributes = TRUE;
4361 4368 } else
4362 4369 poap->attributes = FALSE;
4363 4370 }
4364 4371
4365 4372 static void
4366 4373 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4367 4374 {
4368 4375 vattr_to_pre_op_attr(bvap, &wccp->before);
4369 4376 vattr_to_post_op_attr(avap, &wccp->after);
4370 4377 }
4371 4378
4372 4379 static int
4373 4380 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4374 4381 {
4375 4382 struct clist *wcl;
4376 4383 int wlist_len;
4377 4384 count3 count = rok->count;
4378 4385
4379 4386 wcl = args->wlist;
4380 4387 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4381 4388 return (FALSE);
4382 4389
4383 4390 wcl = args->wlist;
4384 4391 rok->wlist_len = wlist_len;
4385 4392 rok->wlist = wcl;
4386 4393 return (TRUE);
4387 4394 }
4388 4395
4389 4396 void
4390 4397 rfs3_srv_zone_init(nfs_globals_t *ng)
4391 4398 {
4392 4399 nfs3_srv_t *ns;
4393 4400 struct rfs3_verf_overlay {
4394 4401 uint_t id; /* a "unique" identifier */
4395 4402 int ts; /* a unique timestamp */
4396 4403 } *verfp;
4397 4404 timestruc_t now;
4398 4405
4399 4406 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4400 4407
4401 4408 /*
4402 4409 * The following algorithm attempts to find a unique verifier
4403 4410 * to be used as the write verifier returned from the server
4404 4411 * to the client. It is important that this verifier change
4405 4412 * whenever the server reboots. Of secondary importance, it
4406 4413 * is important for the verifier to be unique between two
4407 4414 * different servers.
4408 4415 *
4409 4416 * Thus, an attempt is made to use the system hostid and the
4410 4417 * current time in seconds when the nfssrv kernel module is
4411 4418 * loaded. It is assumed that an NFS server will not be able
4412 4419 * to boot and then to reboot in less than a second. If the
4413 4420 * hostid has not been set, then the current high resolution
4414 4421 * time is used. This will ensure different verifiers each
4415 4422 * time the server reboots and minimize the chances that two
4416 4423 * different servers will have the same verifier.
4417 4424 */
4418 4425
4419 4426 #ifndef lint
4420 4427 /*
4421 4428 * We ASSERT that this constant logic expression is
4422 4429 * always true because in the past, it wasn't.
4423 4430 */
4424 4431 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4425 4432 #endif
4426 4433
4427 4434 gethrestime(&now);
4428 4435 verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4429 4436 verfp->ts = (int)now.tv_sec;
4430 4437 verfp->id = zone_get_hostid(NULL);
4431 4438
4432 4439 if (verfp->id == 0)
4433 4440 verfp->id = (uint_t)now.tv_nsec;
4434 4441
4435 4442 ng->nfs3_srv = ns;
4436 4443 }
4437 4444
4438 4445 void
4439 4446 rfs3_srv_zone_fini(nfs_globals_t *ng)
4440 4447 {
4441 4448 nfs3_srv_t *ns = ng->nfs3_srv;
4442 4449
4443 4450 ng->nfs3_srv = NULL;
4444 4451
4445 4452 kmem_free(ns, sizeof (*ns));
4446 4453 }
4447 4454
4448 4455 void
4449 4456 rfs3_srvrinit(void)
4450 4457 {
4451 4458 nfs3_srv_caller_id = fs_new_caller_id();
4452 4459 }
4453 4460
4454 4461 void
4455 4462 rfs3_srvrfini(void)
4456 4463 {
4457 4464 /* Nothing to do */
4458 4465 }
|
↓ open down ↓ |
313 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX