Print this page
11083 support NFS server in zone
Portions contributed by: Dan Kruchinin <dan.kruchinin@nexenta.com>
Portions contributed by: Stepan Zastupov <stepan.zastupov@gmail.com>
Portions contributed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Portions contributed by: Mike Zeller <mike@mikezeller.net>
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Portions contributed by: Vitaliy Gusev <gusev.vitaliy@gmail.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Change-Id: I22f289d357503f9b48a0bc2482cc4328a6d43d16
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
|
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All rights reserved.
31 31 */
32 32
33 +/*
34 + * Copyright 2018 Nexenta Systems, Inc.
35 + * Copyright (c) 2016 by Delphix. All rights reserved.
36 + */
37 +
33 38 #include <sys/param.h>
34 39 #include <sys/types.h>
35 40 #include <sys/systm.h>
36 41 #include <sys/cred.h>
37 42 #include <sys/buf.h>
38 43 #include <sys/vfs.h>
39 44 #include <sys/vnode.h>
40 45 #include <sys/uio.h>
41 46 #include <sys/stat.h>
42 47 #include <sys/errno.h>
43 48 #include <sys/sysmacros.h>
44 49 #include <sys/statvfs.h>
45 50 #include <sys/kmem.h>
46 51 #include <sys/kstat.h>
47 52 #include <sys/dirent.h>
48 53 #include <sys/cmn_err.h>
49 54 #include <sys/debug.h>
50 55 #include <sys/vtrace.h>
51 56 #include <sys/mode.h>
52 57 #include <sys/acl.h>
53 58 #include <sys/nbmlock.h>
54 59 #include <sys/policy.h>
55 60 #include <sys/sdt.h>
56 61
57 62 #include <rpc/types.h>
58 63 #include <rpc/auth.h>
59 64 #include <rpc/svc.h>
60 65
61 66 #include <nfs/nfs.h>
62 67 #include <nfs/export.h>
|
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
63 68 #include <nfs/nfs_cmd.h>
64 69
65 70 #include <vm/hat.h>
66 71 #include <vm/as.h>
67 72 #include <vm/seg.h>
68 73 #include <vm/seg_map.h>
69 74 #include <vm/seg_kmem.h>
70 75
71 76 #include <sys/strsubr.h>
72 77
78 +struct rfs_async_write_list;
79 +
73 80 /*
81 + * Zone globals of NFSv2 server
82 + */
83 +typedef struct nfs_srv {
84 + kmutex_t async_write_lock;
85 + struct rfs_async_write_list *async_write_head;
86 +
87 + /*
88 + * enables write clustering if == 1
89 + */
90 + int write_async;
91 +} nfs_srv_t;
92 +
93 +/*
74 94 * These are the interface routines for the server side of the
75 95 * Network File System. See the NFS version 2 protocol specification
76 96 * for a description of this interface.
77 97 */
78 98
79 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
80 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
81 101 cred_t *);
82 102
103 +
83 104 /*
84 105 * Some "over the wire" UNIX file types. These are encoded
85 106 * into the mode. This needs to be fixed in the next rev.
86 107 */
87 108 #define IFMT 0170000 /* type of file */
88 109 #define IFCHR 0020000 /* character special */
89 110 #define IFBLK 0060000 /* block special */
90 111 #define IFSOCK 0140000 /* socket */
91 112
92 113 u_longlong_t nfs2_srv_caller_id;
93 114
115 +static nfs_srv_t *
116 +nfs_get_srv(void)
117 +{
118 + nfs_globals_t *ng = nfs_srv_getzg();
119 + nfs_srv_t *srv = ng->nfs_srv;
120 + ASSERT(srv != NULL);
121 + return (srv);
122 +}
123 +
94 124 /*
95 125 * Get file attributes.
96 126 * Returns the current attributes of the file with the given fhandle.
97 127 */
98 128 /* ARGSUSED */
99 129 void
100 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
101 131 struct svc_req *req, cred_t *cr, bool_t ro)
102 132 {
103 133 int error;
104 134 vnode_t *vp;
105 135 struct vattr va;
106 136
107 137 vp = nfs_fhtovp(fhp, exi);
108 138 if (vp == NULL) {
109 139 ns->ns_status = NFSERR_STALE;
110 140 return;
111 141 }
112 142
113 143 /*
114 144 * Do the getattr.
115 145 */
116 146 va.va_mask = AT_ALL; /* we want all the attributes */
117 147
118 148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
119 149
120 150 /* check for overflows */
121 151 if (!error) {
122 152 /* Lie about the object type for a referral */
123 153 if (vn_is_nfs_reparse(vp, cr))
124 154 va.va_type = VLNK;
125 155
126 156 acl_perm(vp, exi, &va, cr);
127 157 error = vattr_to_nattr(&va, &ns->ns_attr);
128 158 }
129 159
130 160 VN_RELE(vp);
131 161
132 162 ns->ns_status = puterrno(error);
133 163 }
134 164 void *
135 165 rfs_getattr_getfh(fhandle_t *fhp)
136 166 {
137 167 return (fhp);
138 168 }
139 169
140 170 /*
141 171 * Set file attributes.
142 172 * Sets the attributes of the file with the given fhandle. Returns
143 173 * the new attributes.
144 174 */
145 175 /* ARGSUSED */
146 176 void
147 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
148 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
149 179 {
150 180 int error;
151 181 int flag;
152 182 int in_crit = 0;
153 183 vnode_t *vp;
154 184 struct vattr va;
155 185 struct vattr bva;
156 186 struct flock64 bf;
157 187 caller_context_t ct;
158 188
159 189
160 190 vp = nfs_fhtovp(&args->saa_fh, exi);
161 191 if (vp == NULL) {
162 192 ns->ns_status = NFSERR_STALE;
163 193 return;
164 194 }
165 195
166 196 if (rdonly(ro, vp)) {
167 197 VN_RELE(vp);
168 198 ns->ns_status = NFSERR_ROFS;
169 199 return;
170 200 }
171 201
172 202 error = sattr_to_vattr(&args->saa_sa, &va);
173 203 if (error) {
174 204 VN_RELE(vp);
175 205 ns->ns_status = puterrno(error);
176 206 return;
177 207 }
178 208
179 209 /*
180 210 * If the client is requesting a change to the mtime,
181 211 * but the nanosecond field is set to 1 billion, then
182 212 * this is a flag to the server that it should set the
183 213 * atime and mtime fields to the server's current time.
184 214 * The 1 billion number actually came from the client
185 215 * as 1 million, but the units in the over the wire
186 216 * request are microseconds instead of nanoseconds.
187 217 *
188 218 * This is an overload of the protocol and should be
189 219 * documented in the NFS Version 2 protocol specification.
190 220 */
191 221 if (va.va_mask & AT_MTIME) {
192 222 if (va.va_mtime.tv_nsec == 1000000000) {
193 223 gethrestime(&va.va_mtime);
194 224 va.va_atime = va.va_mtime;
195 225 va.va_mask |= AT_ATIME;
196 226 flag = 0;
197 227 } else
198 228 flag = ATTR_UTIME;
199 229 } else
200 230 flag = 0;
201 231
202 232 /*
203 233 * If the filesystem is exported with nosuid, then mask off
204 234 * the setuid and setgid bits.
205 235 */
206 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
207 237 (exi->exi_export.ex_flags & EX_NOSUID))
208 238 va.va_mode &= ~(VSUID | VSGID);
209 239
210 240 ct.cc_sysid = 0;
211 241 ct.cc_pid = 0;
212 242 ct.cc_caller_id = nfs2_srv_caller_id;
213 243 ct.cc_flags = CC_DONTBLOCK;
214 244
215 245 /*
216 246 * We need to specially handle size changes because it is
217 247 * possible for the client to create a file with modes
218 248 * which indicate read-only, but with the file opened for
219 249 * writing. If the client then tries to set the size of
220 250 * the file, then the normal access checking done in
221 251 * VOP_SETATTR would prevent the client from doing so,
222 252 * although it should be legal for it to do so. To get
223 253 * around this, we do the access checking for ourselves
224 254 * and then use VOP_SPACE which doesn't do the access
225 255 * checking which VOP_SETATTR does. VOP_SPACE can only
226 256 * operate on VREG files, let VOP_SETATTR handle the other
227 257 * extremely rare cases.
228 258 * Also the client should not be allowed to change the
229 259 * size of the file if there is a conflicting non-blocking
230 260 * mandatory lock in the region of change.
231 261 */
232 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
233 263 if (nbl_need_check(vp)) {
234 264 nbl_start_crit(vp, RW_READER);
235 265 in_crit = 1;
236 266 }
237 267
238 268 bva.va_mask = AT_UID | AT_SIZE;
239 269
240 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
241 271
242 272 if (error) {
243 273 if (in_crit)
244 274 nbl_end_crit(vp);
245 275 VN_RELE(vp);
246 276 ns->ns_status = puterrno(error);
247 277 return;
248 278 }
249 279
250 280 if (in_crit) {
251 281 u_offset_t offset;
252 282 ssize_t length;
253 283
254 284 if (va.va_size < bva.va_size) {
255 285 offset = va.va_size;
256 286 length = bva.va_size - va.va_size;
257 287 } else {
258 288 offset = bva.va_size;
259 289 length = va.va_size - bva.va_size;
260 290 }
261 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
262 292 NULL)) {
263 293 error = EACCES;
264 294 }
265 295 }
266 296
267 297 if (crgetuid(cr) == bva.va_uid && !error &&
268 298 va.va_size != bva.va_size) {
269 299 va.va_mask &= ~AT_SIZE;
270 300 bf.l_type = F_WRLCK;
271 301 bf.l_whence = 0;
272 302 bf.l_start = (off64_t)va.va_size;
273 303 bf.l_len = 0;
274 304 bf.l_sysid = 0;
275 305 bf.l_pid = 0;
276 306
277 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
278 308 (offset_t)va.va_size, cr, &ct);
279 309 }
280 310 if (in_crit)
281 311 nbl_end_crit(vp);
282 312 } else
283 313 error = 0;
284 314
285 315 /*
286 316 * Do the setattr.
287 317 */
288 318 if (!error && va.va_mask) {
289 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
290 320 }
291 321
292 322 /*
293 323 * check if the monitor on either vop_space or vop_setattr detected
294 324 * a delegation conflict and if so, mark the thread flag as
295 325 * wouldblock so that the response is dropped and the client will
296 326 * try again.
297 327 */
298 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
299 329 VN_RELE(vp);
300 330 curthread->t_flag |= T_WOULDBLOCK;
301 331 return;
302 332 }
303 333
304 334 if (!error) {
305 335 va.va_mask = AT_ALL; /* get everything */
306 336
307 337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
308 338
309 339 /* check for overflows */
310 340 if (!error) {
311 341 acl_perm(vp, exi, &va, cr);
312 342 error = vattr_to_nattr(&va, &ns->ns_attr);
313 343 }
314 344 }
315 345
316 346 ct.cc_flags = 0;
317 347
318 348 /*
319 349 * Force modified metadata out to stable storage.
320 350 */
321 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
322 352
323 353 VN_RELE(vp);
324 354
325 355 ns->ns_status = puterrno(error);
326 356 }
327 357 void *
328 358 rfs_setattr_getfh(struct nfssaargs *args)
329 359 {
330 360 return (&args->saa_fh);
331 361 }
332 362
333 363 /* Change and release @exip and @vpp only in success */
334 364 int
335 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
336 366 {
337 367 struct exportinfo *exi;
338 368 vnode_t *vp = *vpp;
339 369 fid_t fid;
340 370 int error;
341 371
342 372 VN_HOLD(vp);
343 373
344 374 if ((error = traverse(&vp)) != 0) {
345 375 VN_RELE(vp);
346 376 return (error);
347 377 }
348 378
349 379 bzero(&fid, sizeof (fid));
350 380 fid.fid_len = MAXFIDSZ;
351 381 error = VOP_FID(vp, &fid, NULL);
352 382 if (error) {
353 383 VN_RELE(vp);
354 384 return (error);
355 385 }
356 386
357 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
358 388 if (exi == NULL ||
359 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
360 390 /*
361 391 * It is not error, just subdir is not exported
362 392 * or "nohide" is not set
363 393 */
364 394 if (exi != NULL)
365 395 exi_rele(exi);
366 396 VN_RELE(vp);
367 397 } else {
368 398 /* go to submount */
369 399 exi_rele(*exip);
370 400 *exip = exi;
371 401
372 402 VN_RELE(*vpp);
373 403 *vpp = vp;
374 404 }
375 405
376 406 return (0);
377 407 }
378 408
|
↓ open down ↓ |
275 lines elided |
↑ open up ↑ |
379 409 /*
380 410 * Given mounted "dvp" and "exi", go upper mountpoint
381 411 * with dvp/exi correction
382 412 * Return 0 in success
383 413 */
384 414 int
385 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
386 416 {
387 417 struct exportinfo *exi;
388 418 vnode_t *dvp = *dvpp;
419 + vnode_t *zone_rootvp;
389 420
390 - ASSERT(dvp->v_flag & VROOT);
421 + zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
422 + ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
391 423
392 424 VN_HOLD(dvp);
393 - dvp = untraverse(dvp);
425 + dvp = untraverse(dvp, zone_rootvp);
394 426 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
395 427 if (exi == NULL) {
396 428 VN_RELE(dvp);
397 429 return (-1);
398 430 }
399 431
432 + ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
400 433 exi_rele(*exip);
401 434 *exip = exi;
402 435 VN_RELE(*dvpp);
403 436 *dvpp = dvp;
404 437
405 438 return (0);
406 439 }
407 440 /*
408 441 * Directory lookup.
409 442 * Returns an fhandle and file attributes for file name in a directory.
410 443 */
411 444 /* ARGSUSED */
412 445 void
413 446 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
414 447 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
415 448 {
416 449 int error;
417 450 vnode_t *dvp;
418 451 vnode_t *vp;
419 452 struct vattr va;
420 453 fhandle_t *fhp = da->da_fhandle;
421 454 struct sec_ol sec = {0, 0};
422 455 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
423 456 char *name;
424 457 struct sockaddr *ca;
425 458
426 459 /*
427 460 * Trusted Extension doesn't support NFSv2. MOUNT
428 461 * will reject v2 clients. Need to prevent v2 client
429 462 * access via WebNFS here.
430 463 */
431 464 if (is_system_labeled() && req->rq_vers == 2) {
432 465 dr->dr_status = NFSERR_ACCES;
433 466 return;
434 467 }
435 468
436 469 /*
437 470 * Disallow NULL paths
438 471 */
|
↓ open down ↓ |
29 lines elided |
↑ open up ↑ |
439 472 if (da->da_name == NULL || *da->da_name == '\0') {
440 473 dr->dr_status = NFSERR_ACCES;
441 474 return;
442 475 }
443 476
444 477 /*
445 478 * Allow lookups from the root - the default
446 479 * location of the public filehandle.
447 480 */
448 481 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
449 - dvp = rootdir;
482 + dvp = ZONE_ROOTVP();
450 483 VN_HOLD(dvp);
451 484 } else {
452 485 dvp = nfs_fhtovp(fhp, exi);
453 486 if (dvp == NULL) {
454 487 dr->dr_status = NFSERR_STALE;
455 488 return;
456 489 }
457 490 }
458 491
459 492 exi_hold(exi);
493 + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
460 494
461 495 /*
462 496 * Not allow lookup beyond root.
463 497 * If the filehandle matches a filehandle of the exi,
464 498 * then the ".." refers beyond the root of an exported filesystem.
465 499 */
466 500 if (strcmp(da->da_name, "..") == 0 &&
467 501 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
468 502 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
469 - (dvp->v_flag & VROOT)) {
503 + ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
470 504 /*
471 505 * special case for ".." and 'nohide'exported root
472 506 */
473 507 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
474 508 error = NFSERR_ACCES;
475 509 goto out;
476 510 }
477 511 } else {
478 512 error = NFSERR_NOENT;
479 513 goto out;
480 514 }
481 515 }
482 516
483 517 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
484 518 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
485 519 MAXPATHLEN);
486 520
487 521 if (name == NULL) {
488 522 error = NFSERR_ACCES;
489 523 goto out;
490 524 }
491 525
492 526 /*
493 527 * If the public filehandle is used then allow
494 528 * a multi-component lookup, i.e. evaluate
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
495 529 * a pathname and follow symbolic links if
496 530 * necessary.
497 531 *
498 532 * This may result in a vnode in another filesystem
499 533 * which is OK as long as the filesystem is exported.
500 534 */
501 535 if (PUBLIC_FH2(fhp)) {
502 536 publicfh_flag = TRUE;
503 537
504 538 exi_rele(exi);
539 + exi = NULL;
505 540
506 541 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
507 542 &sec);
508 543 } else {
509 544 /*
510 545 * Do a normal single component lookup.
511 546 */
512 547 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
513 548 NULL, NULL, NULL);
514 549 }
515 550
516 551 if (name != da->da_name)
517 552 kmem_free(name, MAXPATHLEN);
518 553
519 554 if (error == 0 && vn_ismntpt(vp)) {
520 555 error = rfs_cross_mnt(&vp, &exi);
521 556 if (error)
522 557 VN_RELE(vp);
523 558 }
524 559
525 560 if (!error) {
526 561 va.va_mask = AT_ALL; /* we want everything */
527 562
528 563 error = rfs4_delegated_getattr(vp, &va, 0, cr);
529 564
530 565 /* check for overflows */
531 566 if (!error) {
532 567 acl_perm(vp, exi, &va, cr);
533 568 error = vattr_to_nattr(&va, &dr->dr_attr);
534 569 if (!error) {
535 570 if (sec.sec_flags & SEC_QUERY)
536 571 error = makefh_ol(&dr->dr_fhandle, exi,
537 572 sec.sec_index);
538 573 else {
539 574 error = makefh(&dr->dr_fhandle, vp,
540 575 exi);
541 576 if (!error && publicfh_flag &&
542 577 !chk_clnt_sec(exi, req))
543 578 auth_weak = TRUE;
544 579 }
545 580 }
546 581 }
547 582 VN_RELE(vp);
548 583 }
549 584
550 585 out:
551 586 VN_RELE(dvp);
552 587
553 588 if (exi != NULL)
554 589 exi_rele(exi);
555 590
556 591 /*
557 592 * If it's public fh, no 0x81, and client's flavor is
558 593 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
559 594 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
560 595 */
561 596 if (auth_weak)
562 597 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
563 598 else
564 599 dr->dr_status = puterrno(error);
565 600 }
566 601 void *
567 602 rfs_lookup_getfh(struct nfsdiropargs *da)
568 603 {
569 604 return (da->da_fhandle);
570 605 }
571 606
572 607 /*
573 608 * Read symbolic link.
574 609 * Returns the string in the symbolic link at the given fhandle.
575 610 */
576 611 /* ARGSUSED */
577 612 void
578 613 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
579 614 struct svc_req *req, cred_t *cr, bool_t ro)
580 615 {
581 616 int error;
582 617 struct iovec iov;
583 618 struct uio uio;
584 619 vnode_t *vp;
585 620 struct vattr va;
586 621 struct sockaddr *ca;
587 622 char *name = NULL;
588 623 int is_referral = 0;
589 624
590 625 vp = nfs_fhtovp(fhp, exi);
591 626 if (vp == NULL) {
592 627 rl->rl_data = NULL;
593 628 rl->rl_status = NFSERR_STALE;
594 629 return;
595 630 }
596 631
597 632 va.va_mask = AT_MODE;
598 633
599 634 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
600 635
601 636 if (error) {
602 637 VN_RELE(vp);
603 638 rl->rl_data = NULL;
604 639 rl->rl_status = puterrno(error);
605 640 return;
606 641 }
607 642
608 643 if (MANDLOCK(vp, va.va_mode)) {
609 644 VN_RELE(vp);
610 645 rl->rl_data = NULL;
611 646 rl->rl_status = NFSERR_ACCES;
612 647 return;
613 648 }
614 649
615 650 /* We lied about the object type for a referral */
616 651 if (vn_is_nfs_reparse(vp, cr))
617 652 is_referral = 1;
618 653
619 654 /*
620 655 * XNFS and RFC1094 require us to return ENXIO if argument
621 656 * is not a link. BUGID 1138002.
622 657 */
623 658 if (vp->v_type != VLNK && !is_referral) {
624 659 VN_RELE(vp);
625 660 rl->rl_data = NULL;
626 661 rl->rl_status = NFSERR_NXIO;
627 662 return;
|
↓ open down ↓ |
113 lines elided |
↑ open up ↑ |
628 663 }
629 664
630 665 /*
631 666 * Allocate data for pathname. This will be freed by rfs_rlfree.
632 667 */
633 668 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
634 669
635 670 if (is_referral) {
636 671 char *s;
637 672 size_t strsz;
673 + kstat_named_t *stat =
674 + exi->exi_ne->ne_globals->svstat[NFS_VERSION];
638 675
639 676 /* Get an artificial symlink based on a referral */
640 677 s = build_symlink(vp, cr, &strsz);
641 - global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
678 + stat[NFS_REFERLINKS].value.ui64++;
642 679 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
643 680 vnode_t *, vp, char *, s);
644 681 if (s == NULL)
645 682 error = EINVAL;
646 683 else {
647 684 error = 0;
648 685 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
649 686 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
650 687 kmem_free(s, strsz);
651 688 }
652 689
653 690 } else {
654 691
655 692 /*
656 693 * Set up io vector to read sym link data
657 694 */
658 695 iov.iov_base = rl->rl_data;
659 696 iov.iov_len = NFS_MAXPATHLEN;
660 697 uio.uio_iov = &iov;
661 698 uio.uio_iovcnt = 1;
662 699 uio.uio_segflg = UIO_SYSSPACE;
663 700 uio.uio_extflg = UIO_COPY_CACHED;
664 701 uio.uio_loffset = (offset_t)0;
665 702 uio.uio_resid = NFS_MAXPATHLEN;
666 703
667 704 /*
668 705 * Do the readlink.
669 706 */
670 707 error = VOP_READLINK(vp, &uio, cr, NULL);
671 708
672 709 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
673 710
674 711 if (!error)
675 712 rl->rl_data[rl->rl_count] = '\0';
676 713
677 714 }
678 715
679 716
680 717 VN_RELE(vp);
681 718
682 719 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
683 720 name = nfscmd_convname(ca, exi, rl->rl_data,
684 721 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
685 722
686 723 if (name != NULL && name != rl->rl_data) {
687 724 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
688 725 rl->rl_data = name;
689 726 }
690 727
691 728 /*
692 729 * XNFS and RFC1094 require us to return ENXIO if argument
693 730 * is not a link. UFS returns EINVAL if this is the case,
694 731 * so we do the mapping here. BUGID 1138002.
695 732 */
696 733 if (error == EINVAL)
697 734 rl->rl_status = NFSERR_NXIO;
698 735 else
699 736 rl->rl_status = puterrno(error);
700 737
701 738 }
702 739 void *
703 740 rfs_readlink_getfh(fhandle_t *fhp)
704 741 {
705 742 return (fhp);
706 743 }
707 744 /*
708 745 * Free data allocated by rfs_readlink
709 746 */
710 747 void
711 748 rfs_rlfree(struct nfsrdlnres *rl)
712 749 {
713 750 if (rl->rl_data != NULL)
714 751 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
715 752 }
716 753
717 754 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
718 755
719 756 /*
720 757 * Read data.
721 758 * Returns some data read from the file at the given fhandle.
722 759 */
723 760 /* ARGSUSED */
724 761 void
725 762 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
726 763 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
727 764 {
728 765 vnode_t *vp;
729 766 int error;
730 767 struct vattr va;
731 768 struct iovec iov;
732 769 struct uio uio;
733 770 mblk_t *mp;
734 771 int alloc_err = 0;
735 772 int in_crit = 0;
736 773 caller_context_t ct;
737 774
738 775 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
739 776 if (vp == NULL) {
740 777 rr->rr_data = NULL;
741 778 rr->rr_status = NFSERR_STALE;
742 779 return;
743 780 }
744 781
745 782 if (vp->v_type != VREG) {
746 783 VN_RELE(vp);
747 784 rr->rr_data = NULL;
748 785 rr->rr_status = NFSERR_ISDIR;
749 786 return;
750 787 }
751 788
752 789 ct.cc_sysid = 0;
753 790 ct.cc_pid = 0;
754 791 ct.cc_caller_id = nfs2_srv_caller_id;
755 792 ct.cc_flags = CC_DONTBLOCK;
756 793
757 794 /*
758 795 * Enter the critical region before calling VOP_RWLOCK
759 796 * to avoid a deadlock with write requests.
760 797 */
761 798 if (nbl_need_check(vp)) {
762 799 nbl_start_crit(vp, RW_READER);
763 800 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
764 801 0, NULL)) {
765 802 nbl_end_crit(vp);
766 803 VN_RELE(vp);
767 804 rr->rr_data = NULL;
|
↓ open down ↓ |
116 lines elided |
↑ open up ↑ |
768 805 rr->rr_status = NFSERR_ACCES;
769 806 return;
770 807 }
771 808 in_crit = 1;
772 809 }
773 810
774 811 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
775 812
776 813 /* check if a monitor detected a delegation conflict */
777 814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
815 + if (in_crit)
816 + nbl_end_crit(vp);
778 817 VN_RELE(vp);
779 818 /* mark as wouldblock so response is dropped */
780 819 curthread->t_flag |= T_WOULDBLOCK;
781 820
782 821 rr->rr_data = NULL;
783 822 return;
784 823 }
785 824
786 825 va.va_mask = AT_ALL;
787 826
788 827 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
789 828
790 829 if (error) {
791 830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
792 831 if (in_crit)
793 832 nbl_end_crit(vp);
794 833
795 834 VN_RELE(vp);
796 835 rr->rr_data = NULL;
797 836 rr->rr_status = puterrno(error);
798 837
799 838 return;
800 839 }
801 840
802 841 /*
803 842 * This is a kludge to allow reading of files created
804 843 * with no read permission. The owner of the file
805 844 * is always allowed to read it.
806 845 */
807 846 if (crgetuid(cr) != va.va_uid) {
808 847 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
809 848
810 849 if (error) {
811 850 /*
812 851 * Exec is the same as read over the net because
813 852 * of demand loading.
814 853 */
815 854 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
816 855 }
817 856 if (error) {
818 857 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
819 858 if (in_crit)
820 859 nbl_end_crit(vp);
821 860 VN_RELE(vp);
822 861 rr->rr_data = NULL;
823 862 rr->rr_status = puterrno(error);
824 863
825 864 return;
826 865 }
827 866 }
828 867
829 868 if (MANDLOCK(vp, va.va_mode)) {
830 869 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
831 870 if (in_crit)
832 871 nbl_end_crit(vp);
833 872
834 873 VN_RELE(vp);
835 874 rr->rr_data = NULL;
836 875 rr->rr_status = NFSERR_ACCES;
837 876
838 877 return;
839 878 }
840 879
841 880 rr->rr_ok.rrok_wlist_len = 0;
842 881 rr->rr_ok.rrok_wlist = NULL;
843 882
844 883 if ((u_offset_t)ra->ra_offset >= va.va_size) {
845 884 rr->rr_count = 0;
846 885 rr->rr_data = NULL;
847 886 /*
848 887 * In this case, status is NFS_OK, but there is no data
849 888 * to encode. So set rr_mp to NULL.
850 889 */
851 890 rr->rr_mp = NULL;
852 891 rr->rr_ok.rrok_wlist = ra->ra_wlist;
853 892 if (rr->rr_ok.rrok_wlist)
854 893 clist_zero_len(rr->rr_ok.rrok_wlist);
855 894 goto done;
856 895 }
857 896
858 897 if (ra->ra_wlist) {
859 898 mp = NULL;
860 899 rr->rr_mp = NULL;
861 900 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
862 901 if (ra->ra_count > iov.iov_len) {
863 902 rr->rr_data = NULL;
864 903 rr->rr_status = NFSERR_INVAL;
865 904 goto done;
866 905 }
867 906 } else {
868 907 /*
869 908 * mp will contain the data to be sent out in the read reply.
870 909 * This will be freed after the reply has been sent out (by the
871 910 * driver).
872 911 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
873 912 * that the call to xdrmblk_putmblk() never fails.
874 913 */
875 914 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
876 915 &alloc_err);
877 916 ASSERT(mp != NULL);
878 917 ASSERT(alloc_err == 0);
879 918
880 919 rr->rr_mp = mp;
881 920
882 921 /*
883 922 * Set up io vector
884 923 */
885 924 iov.iov_base = (caddr_t)mp->b_datap->db_base;
886 925 iov.iov_len = ra->ra_count;
887 926 }
888 927
889 928 uio.uio_iov = &iov;
890 929 uio.uio_iovcnt = 1;
891 930 uio.uio_segflg = UIO_SYSSPACE;
892 931 uio.uio_extflg = UIO_COPY_CACHED;
893 932 uio.uio_loffset = (offset_t)ra->ra_offset;
894 933 uio.uio_resid = ra->ra_count;
895 934
896 935 error = VOP_READ(vp, &uio, 0, cr, &ct);
897 936
898 937 if (error) {
899 938 if (mp)
900 939 freeb(mp);
901 940
902 941 /*
903 942 * check if a monitor detected a delegation conflict and
904 943 * mark as wouldblock so response is dropped
905 944 */
906 945 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
907 946 curthread->t_flag |= T_WOULDBLOCK;
908 947 else
909 948 rr->rr_status = puterrno(error);
910 949
911 950 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
912 951 if (in_crit)
913 952 nbl_end_crit(vp);
914 953
915 954 VN_RELE(vp);
916 955 rr->rr_data = NULL;
917 956
918 957 return;
919 958 }
920 959
921 960 /*
922 961 * Get attributes again so we can send the latest access
923 962 * time to the client side for its cache.
924 963 */
925 964 va.va_mask = AT_ALL;
926 965
927 966 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
928 967
929 968 if (error) {
930 969 if (mp)
931 970 freeb(mp);
932 971
933 972 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
934 973 if (in_crit)
935 974 nbl_end_crit(vp);
936 975
937 976 VN_RELE(vp);
938 977 rr->rr_data = NULL;
939 978 rr->rr_status = puterrno(error);
940 979
941 980 return;
942 981 }
943 982
944 983 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
945 984
946 985 if (mp) {
947 986 rr->rr_data = (char *)mp->b_datap->db_base;
948 987 } else {
949 988 if (ra->ra_wlist) {
950 989 rr->rr_data = (caddr_t)iov.iov_base;
951 990 if (!rdma_setup_read_data2(ra, rr)) {
952 991 rr->rr_data = NULL;
953 992 rr->rr_status = puterrno(NFSERR_INVAL);
954 993 }
955 994 }
956 995 }
957 996 done:
958 997 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
959 998 if (in_crit)
960 999 nbl_end_crit(vp);
961 1000
962 1001 acl_perm(vp, exi, &va, cr);
963 1002
964 1003 /* check for overflows */
965 1004 error = vattr_to_nattr(&va, &rr->rr_attr);
966 1005
967 1006 VN_RELE(vp);
968 1007
969 1008 rr->rr_status = puterrno(error);
970 1009 }
971 1010
972 1011 /*
973 1012 * Free data allocated by rfs_read
974 1013 */
975 1014 void
976 1015 rfs_rdfree(struct nfsrdresult *rr)
977 1016 {
978 1017 mblk_t *mp;
979 1018
980 1019 if (rr->rr_status == NFS_OK) {
981 1020 mp = rr->rr_mp;
982 1021 if (mp != NULL)
983 1022 freeb(mp);
984 1023 }
985 1024 }
986 1025
987 1026 void *
988 1027 rfs_read_getfh(struct nfsreadargs *ra)
989 1028 {
990 1029 return (&ra->ra_fhandle);
991 1030 }
992 1031
993 1032 #define MAX_IOVECS 12
994 1033
995 1034 #ifdef DEBUG
996 1035 static int rfs_write_sync_hits = 0;
997 1036 static int rfs_write_sync_misses = 0;
998 1037 #endif
999 1038
1000 1039 /*
1001 1040 * Write data to file.
1002 1041 * Returns attributes of a file after writing some data to it.
1003 1042 *
1004 1043 * Any changes made here, especially in error handling might have
1005 1044 * to also be done in rfs_write (which clusters write requests).
1006 1045 */
1007 1046 /* ARGSUSED */
1008 1047 void
1009 1048 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1010 1049 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1011 1050 {
1012 1051 int error;
1013 1052 vnode_t *vp;
1014 1053 rlim64_t rlimit;
1015 1054 struct vattr va;
1016 1055 struct uio uio;
1017 1056 struct iovec iov[MAX_IOVECS];
1018 1057 mblk_t *m;
1019 1058 struct iovec *iovp;
1020 1059 int iovcnt;
1021 1060 cred_t *savecred;
1022 1061 int in_crit = 0;
1023 1062 caller_context_t ct;
1024 1063
1025 1064 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1026 1065 if (vp == NULL) {
1027 1066 ns->ns_status = NFSERR_STALE;
1028 1067 return;
1029 1068 }
1030 1069
1031 1070 if (rdonly(ro, vp)) {
1032 1071 VN_RELE(vp);
1033 1072 ns->ns_status = NFSERR_ROFS;
1034 1073 return;
1035 1074 }
1036 1075
1037 1076 if (vp->v_type != VREG) {
1038 1077 VN_RELE(vp);
1039 1078 ns->ns_status = NFSERR_ISDIR;
1040 1079 return;
1041 1080 }
1042 1081
1043 1082 ct.cc_sysid = 0;
1044 1083 ct.cc_pid = 0;
1045 1084 ct.cc_caller_id = nfs2_srv_caller_id;
1046 1085 ct.cc_flags = CC_DONTBLOCK;
1047 1086
1048 1087 va.va_mask = AT_UID|AT_MODE;
1049 1088
1050 1089 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1051 1090
1052 1091 if (error) {
1053 1092 VN_RELE(vp);
1054 1093 ns->ns_status = puterrno(error);
1055 1094
1056 1095 return;
1057 1096 }
1058 1097
1059 1098 if (crgetuid(cr) != va.va_uid) {
1060 1099 /*
1061 1100 * This is a kludge to allow writes of files created
1062 1101 * with read only permission. The owner of the file
1063 1102 * is always allowed to write it.
1064 1103 */
1065 1104 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1066 1105
1067 1106 if (error) {
1068 1107 VN_RELE(vp);
1069 1108 ns->ns_status = puterrno(error);
1070 1109 return;
1071 1110 }
1072 1111 }
1073 1112
1074 1113 /*
1075 1114 * Can't access a mandatory lock file. This might cause
1076 1115 * the NFS service thread to block forever waiting for a
1077 1116 * lock to be released that will never be released.
1078 1117 */
1079 1118 if (MANDLOCK(vp, va.va_mode)) {
1080 1119 VN_RELE(vp);
1081 1120 ns->ns_status = NFSERR_ACCES;
1082 1121 return;
1083 1122 }
1084 1123
1085 1124 /*
1086 1125 * We have to enter the critical region before calling VOP_RWLOCK
1087 1126 * to avoid a deadlock with ufs.
1088 1127 */
1089 1128 if (nbl_need_check(vp)) {
1090 1129 nbl_start_crit(vp, RW_READER);
1091 1130 in_crit = 1;
1092 1131 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
|
↓ open down ↓ |
305 lines elided |
↑ open up ↑ |
1093 1132 wa->wa_count, 0, NULL)) {
1094 1133 error = EACCES;
1095 1134 goto out;
1096 1135 }
1097 1136 }
1098 1137
1099 1138 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1100 1139
1101 1140 /* check if a monitor detected a delegation conflict */
1102 1141 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1103 - VN_RELE(vp);
1104 - /* mark as wouldblock so response is dropped */
1105 - curthread->t_flag |= T_WOULDBLOCK;
1106 - return;
1142 + goto out;
1107 1143 }
1108 1144
1109 1145 if (wa->wa_data || wa->wa_rlist) {
1110 1146 /* Do the RDMA thing if necessary */
1111 1147 if (wa->wa_rlist) {
1112 1148 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1113 1149 iov[0].iov_len = wa->wa_count;
1114 1150 } else {
1115 1151 iov[0].iov_base = wa->wa_data;
1116 1152 iov[0].iov_len = wa->wa_count;
1117 1153 }
1118 1154 uio.uio_iov = iov;
1119 1155 uio.uio_iovcnt = 1;
1120 1156 uio.uio_segflg = UIO_SYSSPACE;
1121 1157 uio.uio_extflg = UIO_COPY_DEFAULT;
1122 1158 uio.uio_loffset = (offset_t)wa->wa_offset;
1123 1159 uio.uio_resid = wa->wa_count;
1124 1160 /*
1125 1161 * The limit is checked on the client. We
1126 1162 * should allow any size writes here.
1127 1163 */
1128 1164 uio.uio_llimit = curproc->p_fsz_ctl;
1129 1165 rlimit = uio.uio_llimit - wa->wa_offset;
1130 1166 if (rlimit < (rlim64_t)uio.uio_resid)
1131 1167 uio.uio_resid = (uint_t)rlimit;
1132 1168
1133 1169 /*
1134 1170 * for now we assume no append mode
1135 1171 */
|
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
1136 1172 /*
1137 1173 * We're changing creds because VM may fault and we need
1138 1174 * the cred of the current thread to be used if quota
1139 1175 * checking is enabled.
1140 1176 */
1141 1177 savecred = curthread->t_cred;
1142 1178 curthread->t_cred = cr;
1143 1179 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1144 1180 curthread->t_cred = savecred;
1145 1181 } else {
1182 +
1146 1183 iovcnt = 0;
1147 1184 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1148 1185 iovcnt++;
1149 1186 if (iovcnt <= MAX_IOVECS) {
1150 1187 #ifdef DEBUG
1151 1188 rfs_write_sync_hits++;
1152 1189 #endif
1153 1190 iovp = iov;
1154 1191 } else {
1155 1192 #ifdef DEBUG
1156 1193 rfs_write_sync_misses++;
1157 1194 #endif
1158 1195 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1159 1196 }
1160 1197 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1161 1198 uio.uio_iov = iovp;
1162 1199 uio.uio_iovcnt = iovcnt;
1163 1200 uio.uio_segflg = UIO_SYSSPACE;
1164 1201 uio.uio_extflg = UIO_COPY_DEFAULT;
1165 1202 uio.uio_loffset = (offset_t)wa->wa_offset;
1166 1203 uio.uio_resid = wa->wa_count;
1167 1204 /*
1168 1205 * The limit is checked on the client. We
1169 1206 * should allow any size writes here.
1170 1207 */
1171 1208 uio.uio_llimit = curproc->p_fsz_ctl;
1172 1209 rlimit = uio.uio_llimit - wa->wa_offset;
1173 1210 if (rlimit < (rlim64_t)uio.uio_resid)
1174 1211 uio.uio_resid = (uint_t)rlimit;
1175 1212
1176 1213 /*
1177 1214 * For now we assume no append mode.
1178 1215 */
1179 1216 /*
1180 1217 * We're changing creds because VM may fault and we need
1181 1218 * the cred of the current thread to be used if quota
1182 1219 * checking is enabled.
1183 1220 */
1184 1221 savecred = curthread->t_cred;
1185 1222 curthread->t_cred = cr;
1186 1223 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1187 1224 curthread->t_cred = savecred;
1188 1225
1189 1226 if (iovp != iov)
1190 1227 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1191 1228 }
1192 1229
1193 1230 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1194 1231
1195 1232 if (!error) {
1196 1233 /*
1197 1234 * Get attributes again so we send the latest mod
1198 1235 * time to the client side for its cache.
1199 1236 */
1200 1237 va.va_mask = AT_ALL; /* now we want everything */
1201 1238
1202 1239 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1203 1240
1204 1241 /* check for overflows */
1205 1242 if (!error) {
1206 1243 acl_perm(vp, exi, &va, cr);
1207 1244 error = vattr_to_nattr(&va, &ns->ns_attr);
1208 1245 }
1209 1246 }
1210 1247
1211 1248 out:
1212 1249 if (in_crit)
1213 1250 nbl_end_crit(vp);
1214 1251 VN_RELE(vp);
1215 1252
1216 1253 /* check if a monitor detected a delegation conflict */
1217 1254 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1218 1255 /* mark as wouldblock so response is dropped */
1219 1256 curthread->t_flag |= T_WOULDBLOCK;
1220 1257 else
1221 1258 ns->ns_status = puterrno(error);
1222 1259
1223 1260 }
1224 1261
1225 1262 struct rfs_async_write {
1226 1263 struct nfswriteargs *wa;
1227 1264 struct nfsattrstat *ns;
1228 1265 struct svc_req *req;
1229 1266 cred_t *cr;
1230 1267 bool_t ro;
1231 1268 kthread_t *thread;
1232 1269 struct rfs_async_write *list;
1233 1270 };
1234 1271
1235 1272 struct rfs_async_write_list {
1236 1273 fhandle_t *fhp;
1237 1274 kcondvar_t cv;
1238 1275 struct rfs_async_write *list;
1239 1276 struct rfs_async_write_list *next;
1240 1277 };
1241 1278
1242 1279 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1243 1280 static kmutex_t rfs_async_write_lock;
1244 1281 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1245 1282
1246 1283 #define MAXCLIOVECS 42
1247 1284 #define RFSWRITE_INITVAL (enum nfsstat) -1
1248 1285
1249 1286 #ifdef DEBUG
1250 1287 static int rfs_write_hits = 0;
1251 1288 static int rfs_write_misses = 0;
1252 1289 #endif
1253 1290
1254 1291 /*
1255 1292 * Write data to file.
1256 1293 * Returns attributes of a file after writing some data to it.
1257 1294 */
1258 1295 void
1259 1296 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1260 1297 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1261 1298 {
1262 1299 int error;
1263 1300 vnode_t *vp;
1264 1301 rlim64_t rlimit;
1265 1302 struct vattr va;
1266 1303 struct uio uio;
1267 1304 struct rfs_async_write_list *lp;
1268 1305 struct rfs_async_write_list *nlp;
1269 1306 struct rfs_async_write *rp;
1270 1307 struct rfs_async_write *nrp;
1271 1308 struct rfs_async_write *trp;
1272 1309 struct rfs_async_write *lrp;
1273 1310 int data_written;
1274 1311 int iovcnt;
1275 1312 mblk_t *m;
1276 1313 struct iovec *iovp;
1277 1314 struct iovec *niovp;
1278 1315 struct iovec iov[MAXCLIOVECS];
|
↓ open down ↓ |
123 lines elided |
↑ open up ↑ |
1279 1316 int count;
1280 1317 int rcount;
1281 1318 uint_t off;
1282 1319 uint_t len;
1283 1320 struct rfs_async_write nrpsp;
1284 1321 struct rfs_async_write_list nlpsp;
1285 1322 ushort_t t_flag;
1286 1323 cred_t *savecred;
1287 1324 int in_crit = 0;
1288 1325 caller_context_t ct;
1326 + nfs_srv_t *nsrv;
1289 1327
1290 - if (!rfs_write_async) {
1328 + ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1329 + nsrv = nfs_get_srv();
1330 + if (!nsrv->write_async) {
1291 1331 rfs_write_sync(wa, ns, exi, req, cr, ro);
1292 1332 return;
1293 1333 }
1294 1334
1295 1335 /*
1296 1336 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1297 1337 * is considered an OK.
1298 1338 */
1299 1339 ns->ns_status = RFSWRITE_INITVAL;
1300 1340
1301 1341 nrp = &nrpsp;
1302 1342 nrp->wa = wa;
1303 1343 nrp->ns = ns;
1304 1344 nrp->req = req;
|
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
1305 1345 nrp->cr = cr;
1306 1346 nrp->ro = ro;
1307 1347 nrp->thread = curthread;
1308 1348
1309 1349 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1310 1350
1311 1351 /*
1312 1352 * Look to see if there is already a cluster started
1313 1353 * for this file.
1314 1354 */
1315 - mutex_enter(&rfs_async_write_lock);
1316 - for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1355 + mutex_enter(&nsrv->async_write_lock);
1356 + for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1317 1357 if (bcmp(&wa->wa_fhandle, lp->fhp,
1318 1358 sizeof (fhandle_t)) == 0)
1319 1359 break;
1320 1360 }
1321 1361
1322 1362 /*
1323 1363 * If lp is non-NULL, then there is already a cluster
1324 1364 * started. We need to place ourselves in the cluster
1325 1365 * list in the right place as determined by starting
1326 1366 * offset. Conflicts with non-blocking mandatory locked
1327 1367 * regions will be checked when the cluster is processed.
1328 1368 */
1329 1369 if (lp != NULL) {
1330 1370 rp = lp->list;
1331 1371 trp = NULL;
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
1332 1372 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1333 1373 trp = rp;
1334 1374 rp = rp->list;
1335 1375 }
1336 1376 nrp->list = rp;
1337 1377 if (trp == NULL)
1338 1378 lp->list = nrp;
1339 1379 else
1340 1380 trp->list = nrp;
1341 1381 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1342 - cv_wait(&lp->cv, &rfs_async_write_lock);
1343 - mutex_exit(&rfs_async_write_lock);
1382 + cv_wait(&lp->cv, &nsrv->async_write_lock);
1383 + mutex_exit(&nsrv->async_write_lock);
1344 1384
1345 1385 return;
1346 1386 }
1347 1387
1348 1388 /*
1349 1389 * No cluster started yet, start one and add ourselves
1350 1390 * to the list of clusters.
1351 1391 */
1352 1392 nrp->list = NULL;
1353 1393
1354 1394 nlp = &nlpsp;
1355 1395 nlp->fhp = &wa->wa_fhandle;
1356 1396 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1357 1397 nlp->list = nrp;
1358 1398 nlp->next = NULL;
1359 1399
1360 - if (rfs_async_write_head == NULL) {
1361 - rfs_async_write_head = nlp;
1400 + if (nsrv->async_write_head == NULL) {
1401 + nsrv->async_write_head = nlp;
1362 1402 } else {
1363 - lp = rfs_async_write_head;
1403 + lp = nsrv->async_write_head;
1364 1404 while (lp->next != NULL)
1365 1405 lp = lp->next;
1366 1406 lp->next = nlp;
1367 1407 }
1368 - mutex_exit(&rfs_async_write_lock);
1408 + mutex_exit(&nsrv->async_write_lock);
1369 1409
1370 1410 /*
1371 1411 * Convert the file handle common to all of the requests
1372 1412 * in this cluster to a vnode.
1373 1413 */
1374 1414 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1375 1415 if (vp == NULL) {
1376 - mutex_enter(&rfs_async_write_lock);
1377 - if (rfs_async_write_head == nlp)
1378 - rfs_async_write_head = nlp->next;
1416 + mutex_enter(&nsrv->async_write_lock);
1417 + if (nsrv->async_write_head == nlp)
1418 + nsrv->async_write_head = nlp->next;
1379 1419 else {
1380 - lp = rfs_async_write_head;
1420 + lp = nsrv->async_write_head;
1381 1421 while (lp->next != nlp)
1382 1422 lp = lp->next;
1383 1423 lp->next = nlp->next;
1384 1424 }
1385 1425 t_flag = curthread->t_flag & T_WOULDBLOCK;
1386 1426 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1387 1427 rp->ns->ns_status = NFSERR_STALE;
1388 1428 rp->thread->t_flag |= t_flag;
1389 1429 }
1390 1430 cv_broadcast(&nlp->cv);
1391 - mutex_exit(&rfs_async_write_lock);
1431 + mutex_exit(&nsrv->async_write_lock);
1392 1432
1393 1433 return;
1394 1434 }
1395 1435
1396 1436 /*
1397 1437 * Can only write regular files. Attempts to write any
1398 1438 * other file types fail with EISDIR.
1399 1439 */
1400 1440 if (vp->v_type != VREG) {
1401 1441 VN_RELE(vp);
1402 - mutex_enter(&rfs_async_write_lock);
1403 - if (rfs_async_write_head == nlp)
1404 - rfs_async_write_head = nlp->next;
1442 + mutex_enter(&nsrv->async_write_lock);
1443 + if (nsrv->async_write_head == nlp)
1444 + nsrv->async_write_head = nlp->next;
1405 1445 else {
1406 - lp = rfs_async_write_head;
1446 + lp = nsrv->async_write_head;
1407 1447 while (lp->next != nlp)
1408 1448 lp = lp->next;
1409 1449 lp->next = nlp->next;
1410 1450 }
1411 1451 t_flag = curthread->t_flag & T_WOULDBLOCK;
1412 1452 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1413 1453 rp->ns->ns_status = NFSERR_ISDIR;
1414 1454 rp->thread->t_flag |= t_flag;
1415 1455 }
1416 1456 cv_broadcast(&nlp->cv);
1417 - mutex_exit(&rfs_async_write_lock);
1457 + mutex_exit(&nsrv->async_write_lock);
1418 1458
1419 1459 return;
1420 1460 }
1421 1461
1422 1462 /*
1423 1463 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1424 1464 * deadlock with ufs.
1425 1465 */
1426 1466 if (nbl_need_check(vp)) {
1427 1467 nbl_start_crit(vp, RW_READER);
1428 1468 in_crit = 1;
1429 1469 }
1430 1470
1431 1471 ct.cc_sysid = 0;
1432 1472 ct.cc_pid = 0;
1433 1473 ct.cc_caller_id = nfs2_srv_caller_id;
1434 1474 ct.cc_flags = CC_DONTBLOCK;
1435 1475
1436 1476 /*
1437 1477 * Lock the file for writing. This operation provides
1438 1478 * the delay which allows clusters to grow.
|
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
1439 1479 */
1440 1480 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1441 1481
1442 1482 /* check if a monitor detected a delegation conflict */
1443 1483 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1444 1484 if (in_crit)
1445 1485 nbl_end_crit(vp);
1446 1486 VN_RELE(vp);
1447 1487 /* mark as wouldblock so response is dropped */
1448 1488 curthread->t_flag |= T_WOULDBLOCK;
1449 - mutex_enter(&rfs_async_write_lock);
1450 - if (rfs_async_write_head == nlp)
1451 - rfs_async_write_head = nlp->next;
1489 + mutex_enter(&nsrv->async_write_lock);
1490 + if (nsrv->async_write_head == nlp)
1491 + nsrv->async_write_head = nlp->next;
1452 1492 else {
1453 - lp = rfs_async_write_head;
1493 + lp = nsrv->async_write_head;
1454 1494 while (lp->next != nlp)
1455 1495 lp = lp->next;
1456 1496 lp->next = nlp->next;
1457 1497 }
1458 1498 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1459 1499 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1460 1500 rp->ns->ns_status = puterrno(error);
1461 1501 rp->thread->t_flag |= T_WOULDBLOCK;
1462 1502 }
1463 1503 }
1464 1504 cv_broadcast(&nlp->cv);
1465 - mutex_exit(&rfs_async_write_lock);
1505 + mutex_exit(&nsrv->async_write_lock);
1466 1506
1467 1507 return;
1468 1508 }
1469 1509
1470 1510 /*
1471 1511 * Disconnect this cluster from the list of clusters.
1472 1512 * The cluster that is being dealt with must be fixed
1473 1513 * in size after this point, so there is no reason
1474 1514 * to leave it on the list so that new requests can
1475 1515 * find it.
1476 1516 *
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1477 1517 * The algorithm is that the first write request will
1478 1518 * create a cluster, convert the file handle to a
1479 1519 * vnode pointer, and then lock the file for writing.
1480 1520 * This request is not likely to be clustered with
1481 1521 * any others. However, the next request will create
1482 1522 * a new cluster and be blocked in VOP_RWLOCK while
1483 1523 * the first request is being processed. This delay
1484 1524 * will allow more requests to be clustered in this
1485 1525 * second cluster.
1486 1526 */
1487 - mutex_enter(&rfs_async_write_lock);
1488 - if (rfs_async_write_head == nlp)
1489 - rfs_async_write_head = nlp->next;
1527 + mutex_enter(&nsrv->async_write_lock);
1528 + if (nsrv->async_write_head == nlp)
1529 + nsrv->async_write_head = nlp->next;
1490 1530 else {
1491 - lp = rfs_async_write_head;
1531 + lp = nsrv->async_write_head;
1492 1532 while (lp->next != nlp)
1493 1533 lp = lp->next;
1494 1534 lp->next = nlp->next;
1495 1535 }
1496 - mutex_exit(&rfs_async_write_lock);
1536 + mutex_exit(&nsrv->async_write_lock);
1497 1537
1498 1538 /*
1499 1539 * Step through the list of requests in this cluster.
1500 1540 * We need to check permissions to make sure that all
1501 1541 * of the requests have sufficient permission to write
1502 1542 * the file. A cluster can be composed of requests
1503 1543 * from different clients and different users on each
1504 1544 * client.
1505 1545 *
1506 1546 * As a side effect, we also calculate the size of the
1507 1547 * byte range that this cluster encompasses.
1508 1548 */
1509 1549 rp = nlp->list;
1510 1550 off = rp->wa->wa_offset;
1511 1551 len = (uint_t)0;
1512 1552 do {
1513 1553 if (rdonly(rp->ro, vp)) {
1514 1554 rp->ns->ns_status = NFSERR_ROFS;
1515 1555 t_flag = curthread->t_flag & T_WOULDBLOCK;
1516 1556 rp->thread->t_flag |= t_flag;
1517 1557 continue;
1518 1558 }
1519 1559
1520 1560 va.va_mask = AT_UID|AT_MODE;
1521 1561
1522 1562 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1523 1563
1524 1564 if (!error) {
1525 1565 if (crgetuid(rp->cr) != va.va_uid) {
1526 1566 /*
1527 1567 * This is a kludge to allow writes of files
1528 1568 * created with read only permission. The
1529 1569 * owner of the file is always allowed to
1530 1570 * write it.
1531 1571 */
1532 1572 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1533 1573 }
1534 1574 if (!error && MANDLOCK(vp, va.va_mode))
1535 1575 error = EACCES;
1536 1576 }
1537 1577
1538 1578 /*
1539 1579 * Check for a conflict with a nbmand-locked region.
1540 1580 */
1541 1581 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1542 1582 rp->wa->wa_count, 0, NULL)) {
1543 1583 error = EACCES;
1544 1584 }
1545 1585
1546 1586 if (error) {
1547 1587 rp->ns->ns_status = puterrno(error);
1548 1588 t_flag = curthread->t_flag & T_WOULDBLOCK;
1549 1589 rp->thread->t_flag |= t_flag;
1550 1590 continue;
1551 1591 }
1552 1592 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1553 1593 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1554 1594 } while ((rp = rp->list) != NULL);
1555 1595
1556 1596 /*
1557 1597 * Step through the cluster attempting to gather as many
1558 1598 * requests which are contiguous as possible. These
1559 1599 * contiguous requests are handled via one call to VOP_WRITE
1560 1600 * instead of different calls to VOP_WRITE. We also keep
1561 1601 * track of the fact that any data was written.
1562 1602 */
1563 1603 rp = nlp->list;
1564 1604 data_written = 0;
1565 1605 do {
1566 1606 /*
1567 1607 * Skip any requests which are already marked as having an
1568 1608 * error.
1569 1609 */
1570 1610 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1571 1611 rp = rp->list;
1572 1612 continue;
1573 1613 }
1574 1614
1575 1615 /*
1576 1616 * Count the number of iovec's which are required
1577 1617 * to handle this set of requests. One iovec is
1578 1618 * needed for each data buffer, whether addressed
1579 1619 * by wa_data or by the b_rptr pointers in the
1580 1620 * mblk chains.
1581 1621 */
1582 1622 iovcnt = 0;
1583 1623 lrp = rp;
1584 1624 for (;;) {
1585 1625 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1586 1626 iovcnt++;
1587 1627 else {
1588 1628 m = lrp->wa->wa_mblk;
1589 1629 while (m != NULL) {
1590 1630 iovcnt++;
1591 1631 m = m->b_cont;
1592 1632 }
1593 1633 }
1594 1634 if (lrp->list == NULL ||
1595 1635 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1596 1636 lrp->wa->wa_offset + lrp->wa->wa_count !=
1597 1637 lrp->list->wa->wa_offset) {
1598 1638 lrp = lrp->list;
1599 1639 break;
1600 1640 }
1601 1641 lrp = lrp->list;
1602 1642 }
1603 1643
1604 1644 if (iovcnt <= MAXCLIOVECS) {
1605 1645 #ifdef DEBUG
1606 1646 rfs_write_hits++;
1607 1647 #endif
1608 1648 niovp = iov;
1609 1649 } else {
1610 1650 #ifdef DEBUG
1611 1651 rfs_write_misses++;
1612 1652 #endif
1613 1653 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1614 1654 }
1615 1655 /*
1616 1656 * Put together the scatter/gather iovecs.
1617 1657 */
1618 1658 iovp = niovp;
1619 1659 trp = rp;
1620 1660 count = 0;
1621 1661 do {
1622 1662 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1623 1663 if (trp->wa->wa_rlist) {
1624 1664 iovp->iov_base =
1625 1665 (char *)((trp->wa->wa_rlist)->
1626 1666 u.c_daddr3);
1627 1667 iovp->iov_len = trp->wa->wa_count;
1628 1668 } else {
1629 1669 iovp->iov_base = trp->wa->wa_data;
1630 1670 iovp->iov_len = trp->wa->wa_count;
1631 1671 }
1632 1672 iovp++;
1633 1673 } else {
1634 1674 m = trp->wa->wa_mblk;
1635 1675 rcount = trp->wa->wa_count;
1636 1676 while (m != NULL) {
1637 1677 iovp->iov_base = (caddr_t)m->b_rptr;
1638 1678 iovp->iov_len = (m->b_wptr - m->b_rptr);
1639 1679 rcount -= iovp->iov_len;
1640 1680 if (rcount < 0)
1641 1681 iovp->iov_len += rcount;
1642 1682 iovp++;
1643 1683 if (rcount <= 0)
1644 1684 break;
1645 1685 m = m->b_cont;
1646 1686 }
1647 1687 }
1648 1688 count += trp->wa->wa_count;
1649 1689 trp = trp->list;
1650 1690 } while (trp != lrp);
1651 1691
1652 1692 uio.uio_iov = niovp;
1653 1693 uio.uio_iovcnt = iovcnt;
1654 1694 uio.uio_segflg = UIO_SYSSPACE;
1655 1695 uio.uio_extflg = UIO_COPY_DEFAULT;
1656 1696 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1657 1697 uio.uio_resid = count;
1658 1698 /*
1659 1699 * The limit is checked on the client. We
1660 1700 * should allow any size writes here.
1661 1701 */
1662 1702 uio.uio_llimit = curproc->p_fsz_ctl;
1663 1703 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1664 1704 if (rlimit < (rlim64_t)uio.uio_resid)
1665 1705 uio.uio_resid = (uint_t)rlimit;
1666 1706
1667 1707 /*
1668 1708 * For now we assume no append mode.
1669 1709 */
1670 1710
1671 1711 /*
1672 1712 * We're changing creds because VM may fault
1673 1713 * and we need the cred of the current
1674 1714 * thread to be used if quota * checking is
1675 1715 * enabled.
1676 1716 */
1677 1717 savecred = curthread->t_cred;
1678 1718 curthread->t_cred = cr;
1679 1719 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1680 1720 curthread->t_cred = savecred;
1681 1721
1682 1722 /* check if a monitor detected a delegation conflict */
1683 1723 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1684 1724 /* mark as wouldblock so response is dropped */
1685 1725 curthread->t_flag |= T_WOULDBLOCK;
1686 1726
1687 1727 if (niovp != iov)
1688 1728 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1689 1729
1690 1730 if (!error) {
1691 1731 data_written = 1;
1692 1732 /*
1693 1733 * Get attributes again so we send the latest mod
1694 1734 * time to the client side for its cache.
1695 1735 */
1696 1736 va.va_mask = AT_ALL; /* now we want everything */
1697 1737
1698 1738 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1699 1739
1700 1740 if (!error)
1701 1741 acl_perm(vp, exi, &va, rp->cr);
1702 1742 }
1703 1743
1704 1744 /*
1705 1745 * Fill in the status responses for each request
1706 1746 * which was just handled. Also, copy the latest
1707 1747 * attributes in to the attribute responses if
1708 1748 * appropriate.
1709 1749 */
1710 1750 t_flag = curthread->t_flag & T_WOULDBLOCK;
1711 1751 do {
1712 1752 rp->thread->t_flag |= t_flag;
1713 1753 /* check for overflows */
1714 1754 if (!error) {
1715 1755 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1716 1756 }
1717 1757 rp->ns->ns_status = puterrno(error);
1718 1758 rp = rp->list;
1719 1759 } while (rp != lrp);
1720 1760 } while (rp != NULL);
1721 1761
1722 1762 /*
1723 1763 * If any data was written at all, then we need to flush
1724 1764 * the data and metadata to stable storage.
1725 1765 */
1726 1766 if (data_written) {
1727 1767 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1728 1768
1729 1769 if (!error) {
1730 1770 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
|
↓ open down ↓ |
224 lines elided |
↑ open up ↑ |
1731 1771 }
1732 1772 }
1733 1773
1734 1774 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1735 1775
1736 1776 if (in_crit)
1737 1777 nbl_end_crit(vp);
1738 1778 VN_RELE(vp);
1739 1779
1740 1780 t_flag = curthread->t_flag & T_WOULDBLOCK;
1741 - mutex_enter(&rfs_async_write_lock);
1781 + mutex_enter(&nsrv->async_write_lock);
1742 1782 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1743 1783 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1744 1784 rp->ns->ns_status = puterrno(error);
1745 1785 rp->thread->t_flag |= t_flag;
1746 1786 }
1747 1787 }
1748 1788 cv_broadcast(&nlp->cv);
1749 - mutex_exit(&rfs_async_write_lock);
1789 + mutex_exit(&nsrv->async_write_lock);
1750 1790
1751 1791 }
1752 1792
1753 1793 void *
1754 1794 rfs_write_getfh(struct nfswriteargs *wa)
1755 1795 {
1756 1796 return (&wa->wa_fhandle);
1757 1797 }
1758 1798
1759 1799 /*
1760 1800 * Create a file.
1761 1801 * Creates a file with given attributes and returns those attributes
1762 1802 * and an fhandle for the new file.
1763 1803 */
1764 1804 void
1765 1805 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1766 1806 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1767 1807 {
1768 1808 int error;
1769 1809 int lookuperr;
1770 1810 int in_crit = 0;
1771 1811 struct vattr va;
1772 1812 vnode_t *vp;
1773 1813 vnode_t *realvp;
1774 1814 vnode_t *dvp;
1775 1815 char *name = args->ca_da.da_name;
1776 1816 vnode_t *tvp = NULL;
1777 1817 int mode;
1778 1818 int lookup_ok;
1779 1819 bool_t trunc;
1780 1820 struct sockaddr *ca;
1781 1821
1782 1822 /*
1783 1823 * Disallow NULL paths
1784 1824 */
1785 1825 if (name == NULL || *name == '\0') {
1786 1826 dr->dr_status = NFSERR_ACCES;
1787 1827 return;
1788 1828 }
1789 1829
1790 1830 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1791 1831 if (dvp == NULL) {
1792 1832 dr->dr_status = NFSERR_STALE;
1793 1833 return;
1794 1834 }
1795 1835
1796 1836 error = sattr_to_vattr(args->ca_sa, &va);
1797 1837 if (error) {
1798 1838 dr->dr_status = puterrno(error);
1799 1839 return;
1800 1840 }
1801 1841
1802 1842 /*
1803 1843 * Must specify the mode.
1804 1844 */
1805 1845 if (!(va.va_mask & AT_MODE)) {
1806 1846 VN_RELE(dvp);
1807 1847 dr->dr_status = NFSERR_INVAL;
1808 1848 return;
1809 1849 }
1810 1850
1811 1851 /*
1812 1852 * This is a completely gross hack to make mknod
1813 1853 * work over the wire until we can wack the protocol
1814 1854 */
1815 1855 if ((va.va_mode & IFMT) == IFCHR) {
1816 1856 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1817 1857 va.va_type = VFIFO; /* xtra kludge for named pipe */
1818 1858 else {
1819 1859 va.va_type = VCHR;
1820 1860 /*
1821 1861 * uncompress the received dev_t
1822 1862 * if the top half is zero indicating a request
1823 1863 * from an `older style' OS.
1824 1864 */
1825 1865 if ((va.va_size & 0xffff0000) == 0)
1826 1866 va.va_rdev = nfsv2_expdev(va.va_size);
1827 1867 else
1828 1868 va.va_rdev = (dev_t)va.va_size;
1829 1869 }
1830 1870 va.va_mask &= ~AT_SIZE;
1831 1871 } else if ((va.va_mode & IFMT) == IFBLK) {
1832 1872 va.va_type = VBLK;
1833 1873 /*
1834 1874 * uncompress the received dev_t
1835 1875 * if the top half is zero indicating a request
1836 1876 * from an `older style' OS.
1837 1877 */
1838 1878 if ((va.va_size & 0xffff0000) == 0)
1839 1879 va.va_rdev = nfsv2_expdev(va.va_size);
1840 1880 else
1841 1881 va.va_rdev = (dev_t)va.va_size;
1842 1882 va.va_mask &= ~AT_SIZE;
1843 1883 } else if ((va.va_mode & IFMT) == IFSOCK) {
1844 1884 va.va_type = VSOCK;
1845 1885 } else {
1846 1886 va.va_type = VREG;
1847 1887 }
1848 1888 va.va_mode &= ~IFMT;
1849 1889 va.va_mask |= AT_TYPE;
1850 1890
1851 1891 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1852 1892 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1853 1893 MAXPATHLEN);
1854 1894 if (name == NULL) {
1855 1895 dr->dr_status = puterrno(EINVAL);
1856 1896 return;
1857 1897 }
1858 1898
1859 1899 /*
1860 1900 * Why was the choice made to use VWRITE as the mode to the
1861 1901 * call to VOP_CREATE ? This results in a bug. When a client
1862 1902 * opens a file that already exists and is RDONLY, the second
1863 1903 * open fails with an EACESS because of the mode.
1864 1904 * bug ID 1054648.
1865 1905 */
1866 1906 lookup_ok = 0;
1867 1907 mode = VWRITE;
1868 1908 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1869 1909 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1870 1910 NULL, NULL, NULL);
1871 1911 if (!error) {
1872 1912 struct vattr at;
1873 1913
1874 1914 lookup_ok = 1;
1875 1915 at.va_mask = AT_MODE;
1876 1916 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1877 1917 if (!error)
1878 1918 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1879 1919 VN_RELE(tvp);
1880 1920 tvp = NULL;
1881 1921 }
1882 1922 }
1883 1923
1884 1924 if (!lookup_ok) {
1885 1925 if (rdonly(ro, dvp)) {
1886 1926 error = EROFS;
1887 1927 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1888 1928 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1889 1929 error = EPERM;
1890 1930 } else {
1891 1931 error = 0;
1892 1932 }
1893 1933 }
1894 1934
1895 1935 /*
1896 1936 * If file size is being modified on an already existing file
1897 1937 * make sure that there are no conflicting non-blocking mandatory
1898 1938 * locks in the region being manipulated. Return EACCES if there
1899 1939 * are conflicting locks.
1900 1940 */
1901 1941 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1902 1942 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1903 1943 NULL, NULL, NULL);
1904 1944
1905 1945 if (!lookuperr &&
1906 1946 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1907 1947 VN_RELE(tvp);
1908 1948 curthread->t_flag |= T_WOULDBLOCK;
1909 1949 goto out;
1910 1950 }
1911 1951
1912 1952 if (!lookuperr && nbl_need_check(tvp)) {
1913 1953 /*
1914 1954 * The file exists. Now check if it has any
1915 1955 * conflicting non-blocking mandatory locks
1916 1956 * in the region being changed.
1917 1957 */
1918 1958 struct vattr bva;
1919 1959 u_offset_t offset;
1920 1960 ssize_t length;
1921 1961
1922 1962 nbl_start_crit(tvp, RW_READER);
1923 1963 in_crit = 1;
1924 1964
1925 1965 bva.va_mask = AT_SIZE;
1926 1966 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1927 1967 if (!error) {
1928 1968 if (va.va_size < bva.va_size) {
1929 1969 offset = va.va_size;
1930 1970 length = bva.va_size - va.va_size;
1931 1971 } else {
1932 1972 offset = bva.va_size;
1933 1973 length = va.va_size - bva.va_size;
1934 1974 }
1935 1975 if (length) {
1936 1976 if (nbl_conflict(tvp, NBL_WRITE,
1937 1977 offset, length, 0, NULL)) {
1938 1978 error = EACCES;
1939 1979 }
1940 1980 }
1941 1981 }
1942 1982 if (error) {
1943 1983 nbl_end_crit(tvp);
1944 1984 VN_RELE(tvp);
1945 1985 in_crit = 0;
1946 1986 }
1947 1987 } else if (tvp != NULL) {
1948 1988 VN_RELE(tvp);
1949 1989 }
1950 1990 }
1951 1991
1952 1992 if (!error) {
1953 1993 /*
1954 1994 * If filesystem is shared with nosuid the remove any
1955 1995 * setuid/setgid bits on create.
1956 1996 */
1957 1997 if (va.va_type == VREG &&
1958 1998 exi->exi_export.ex_flags & EX_NOSUID)
1959 1999 va.va_mode &= ~(VSUID | VSGID);
1960 2000
1961 2001 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1962 2002 NULL, NULL);
1963 2003
1964 2004 if (!error) {
1965 2005
1966 2006 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1967 2007 trunc = TRUE;
1968 2008 else
1969 2009 trunc = FALSE;
1970 2010
1971 2011 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1972 2012 VN_RELE(vp);
1973 2013 curthread->t_flag |= T_WOULDBLOCK;
1974 2014 goto out;
1975 2015 }
1976 2016 va.va_mask = AT_ALL;
1977 2017
1978 2018 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1979 2019
1980 2020 /* check for overflows */
1981 2021 if (!error) {
1982 2022 acl_perm(vp, exi, &va, cr);
1983 2023 error = vattr_to_nattr(&va, &dr->dr_attr);
1984 2024 if (!error) {
1985 2025 error = makefh(&dr->dr_fhandle, vp,
1986 2026 exi);
1987 2027 }
1988 2028 }
1989 2029 /*
1990 2030 * Force modified metadata out to stable storage.
1991 2031 *
1992 2032 * if a underlying vp exists, pass it to VOP_FSYNC
1993 2033 */
1994 2034 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1995 2035 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1996 2036 else
1997 2037 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1998 2038 VN_RELE(vp);
1999 2039 }
2000 2040
2001 2041 if (in_crit) {
2002 2042 nbl_end_crit(tvp);
2003 2043 VN_RELE(tvp);
2004 2044 }
2005 2045 }
2006 2046
2007 2047 /*
2008 2048 * Force modified data and metadata out to stable storage.
2009 2049 */
2010 2050 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2011 2051
2012 2052 out:
2013 2053
2014 2054 VN_RELE(dvp);
2015 2055
2016 2056 dr->dr_status = puterrno(error);
2017 2057
2018 2058 if (name != args->ca_da.da_name)
2019 2059 kmem_free(name, MAXPATHLEN);
2020 2060 }
2021 2061 void *
2022 2062 rfs_create_getfh(struct nfscreatargs *args)
2023 2063 {
2024 2064 return (args->ca_da.da_fhandle);
2025 2065 }
2026 2066
2027 2067 /*
2028 2068 * Remove a file.
2029 2069 * Remove named file from parent directory.
2030 2070 */
2031 2071 /* ARGSUSED */
2032 2072 void
2033 2073 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2034 2074 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2035 2075 {
2036 2076 int error = 0;
2037 2077 vnode_t *vp;
2038 2078 vnode_t *targvp;
2039 2079 int in_crit = 0;
2040 2080
2041 2081 /*
2042 2082 * Disallow NULL paths
2043 2083 */
2044 2084 if (da->da_name == NULL || *da->da_name == '\0') {
2045 2085 *status = NFSERR_ACCES;
2046 2086 return;
2047 2087 }
2048 2088
2049 2089 vp = nfs_fhtovp(da->da_fhandle, exi);
2050 2090 if (vp == NULL) {
2051 2091 *status = NFSERR_STALE;
2052 2092 return;
2053 2093 }
2054 2094
2055 2095 if (rdonly(ro, vp)) {
2056 2096 VN_RELE(vp);
2057 2097 *status = NFSERR_ROFS;
2058 2098 return;
2059 2099 }
2060 2100
2061 2101 /*
2062 2102 * Check for a conflict with a non-blocking mandatory share reservation.
2063 2103 */
2064 2104 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2065 2105 NULL, cr, NULL, NULL, NULL);
2066 2106 if (error != 0) {
2067 2107 VN_RELE(vp);
2068 2108 *status = puterrno(error);
2069 2109 return;
2070 2110 }
2071 2111
2072 2112 /*
2073 2113 * If the file is delegated to an v4 client, then initiate
2074 2114 * recall and drop this request (by setting T_WOULDBLOCK).
2075 2115 * The client will eventually re-transmit the request and
2076 2116 * (hopefully), by then, the v4 client will have returned
2077 2117 * the delegation.
2078 2118 */
2079 2119
2080 2120 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2081 2121 VN_RELE(vp);
2082 2122 VN_RELE(targvp);
2083 2123 curthread->t_flag |= T_WOULDBLOCK;
2084 2124 return;
2085 2125 }
2086 2126
2087 2127 if (nbl_need_check(targvp)) {
2088 2128 nbl_start_crit(targvp, RW_READER);
2089 2129 in_crit = 1;
2090 2130 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2091 2131 error = EACCES;
2092 2132 goto out;
2093 2133 }
2094 2134 }
2095 2135
2096 2136 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2097 2137
2098 2138 /*
2099 2139 * Force modified data and metadata out to stable storage.
2100 2140 */
2101 2141 (void) VOP_FSYNC(vp, 0, cr, NULL);
2102 2142
2103 2143 out:
2104 2144 if (in_crit)
2105 2145 nbl_end_crit(targvp);
2106 2146 VN_RELE(targvp);
2107 2147 VN_RELE(vp);
2108 2148
2109 2149 *status = puterrno(error);
2110 2150
2111 2151 }
2112 2152
2113 2153 void *
2114 2154 rfs_remove_getfh(struct nfsdiropargs *da)
2115 2155 {
2116 2156 return (da->da_fhandle);
2117 2157 }
2118 2158
2119 2159 /*
2120 2160 * rename a file
2121 2161 * Give a file (from) a new name (to).
2122 2162 */
2123 2163 /* ARGSUSED */
2124 2164 void
2125 2165 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2126 2166 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2127 2167 {
2128 2168 int error = 0;
2129 2169 vnode_t *fromvp;
2130 2170 vnode_t *tovp;
2131 2171 struct exportinfo *to_exi;
2132 2172 fhandle_t *fh;
2133 2173 vnode_t *srcvp;
2134 2174 vnode_t *targvp;
2135 2175 int in_crit = 0;
2136 2176
2137 2177 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2138 2178 if (fromvp == NULL) {
2139 2179 *status = NFSERR_STALE;
2140 2180 return;
2141 2181 }
2142 2182
2143 2183 fh = args->rna_to.da_fhandle;
2144 2184 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2145 2185 if (to_exi == NULL) {
2146 2186 VN_RELE(fromvp);
2147 2187 *status = NFSERR_ACCES;
2148 2188 return;
2149 2189 }
2150 2190 exi_rele(to_exi);
2151 2191
2152 2192 if (to_exi != exi) {
2153 2193 VN_RELE(fromvp);
2154 2194 *status = NFSERR_XDEV;
2155 2195 return;
2156 2196 }
2157 2197
2158 2198 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2159 2199 if (tovp == NULL) {
2160 2200 VN_RELE(fromvp);
2161 2201 *status = NFSERR_STALE;
2162 2202 return;
2163 2203 }
2164 2204
2165 2205 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2166 2206 VN_RELE(tovp);
2167 2207 VN_RELE(fromvp);
2168 2208 *status = NFSERR_NOTDIR;
2169 2209 return;
2170 2210 }
2171 2211
2172 2212 /*
2173 2213 * Disallow NULL paths
2174 2214 */
2175 2215 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2176 2216 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2177 2217 VN_RELE(tovp);
2178 2218 VN_RELE(fromvp);
2179 2219 *status = NFSERR_ACCES;
2180 2220 return;
2181 2221 }
2182 2222
2183 2223 if (rdonly(ro, tovp)) {
2184 2224 VN_RELE(tovp);
2185 2225 VN_RELE(fromvp);
2186 2226 *status = NFSERR_ROFS;
2187 2227 return;
2188 2228 }
2189 2229
2190 2230 /*
2191 2231 * Check for a conflict with a non-blocking mandatory share reservation.
2192 2232 */
2193 2233 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2194 2234 NULL, cr, NULL, NULL, NULL);
2195 2235 if (error != 0) {
2196 2236 VN_RELE(tovp);
2197 2237 VN_RELE(fromvp);
2198 2238 *status = puterrno(error);
2199 2239 return;
2200 2240 }
2201 2241
2202 2242 /* Check for delegations on the source file */
2203 2243
|
↓ open down ↓ |
444 lines elided |
↑ open up ↑ |
2204 2244 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2205 2245 VN_RELE(tovp);
2206 2246 VN_RELE(fromvp);
2207 2247 VN_RELE(srcvp);
2208 2248 curthread->t_flag |= T_WOULDBLOCK;
2209 2249 return;
2210 2250 }
2211 2251
2212 2252 /* Check for delegation on the file being renamed over, if it exists */
2213 2253
2214 - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2254 + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2215 2255 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2216 2256 NULL, NULL, NULL) == 0) {
2217 2257
2218 2258 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2219 2259 VN_RELE(tovp);
2220 2260 VN_RELE(fromvp);
2221 2261 VN_RELE(srcvp);
2222 2262 VN_RELE(targvp);
2223 2263 curthread->t_flag |= T_WOULDBLOCK;
2224 2264 return;
2225 2265 }
2226 2266 VN_RELE(targvp);
2227 2267 }
2228 2268
2229 2269
2230 2270 if (nbl_need_check(srcvp)) {
2231 2271 nbl_start_crit(srcvp, RW_READER);
2232 2272 in_crit = 1;
2233 2273 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2234 2274 error = EACCES;
2235 2275 goto out;
2236 2276 }
2237 2277 }
2238 2278
2239 2279 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2240 2280 tovp, args->rna_to.da_name, cr, NULL, 0);
2241 2281
2242 2282 if (error == 0)
2243 2283 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2244 2284 strlen(args->rna_to.da_name));
2245 2285
2246 2286 /*
2247 2287 * Force modified data and metadata out to stable storage.
2248 2288 */
2249 2289 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2250 2290 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2251 2291
2252 2292 out:
2253 2293 if (in_crit)
2254 2294 nbl_end_crit(srcvp);
2255 2295 VN_RELE(srcvp);
2256 2296 VN_RELE(tovp);
2257 2297 VN_RELE(fromvp);
2258 2298
2259 2299 *status = puterrno(error);
2260 2300
2261 2301 }
2262 2302 void *
2263 2303 rfs_rename_getfh(struct nfsrnmargs *args)
2264 2304 {
2265 2305 return (args->rna_from.da_fhandle);
2266 2306 }
2267 2307
2268 2308 /*
2269 2309 * Link to a file.
2270 2310 * Create a file (to) which is a hard link to the given file (from).
2271 2311 */
2272 2312 /* ARGSUSED */
2273 2313 void
2274 2314 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2275 2315 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2276 2316 {
2277 2317 int error;
2278 2318 vnode_t *fromvp;
2279 2319 vnode_t *tovp;
2280 2320 struct exportinfo *to_exi;
2281 2321 fhandle_t *fh;
2282 2322
2283 2323 fromvp = nfs_fhtovp(args->la_from, exi);
2284 2324 if (fromvp == NULL) {
2285 2325 *status = NFSERR_STALE;
2286 2326 return;
2287 2327 }
2288 2328
2289 2329 fh = args->la_to.da_fhandle;
2290 2330 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2291 2331 if (to_exi == NULL) {
2292 2332 VN_RELE(fromvp);
2293 2333 *status = NFSERR_ACCES;
2294 2334 return;
2295 2335 }
2296 2336 exi_rele(to_exi);
2297 2337
2298 2338 if (to_exi != exi) {
2299 2339 VN_RELE(fromvp);
2300 2340 *status = NFSERR_XDEV;
2301 2341 return;
2302 2342 }
2303 2343
2304 2344 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2305 2345 if (tovp == NULL) {
2306 2346 VN_RELE(fromvp);
2307 2347 *status = NFSERR_STALE;
2308 2348 return;
2309 2349 }
2310 2350
2311 2351 if (tovp->v_type != VDIR) {
2312 2352 VN_RELE(tovp);
2313 2353 VN_RELE(fromvp);
2314 2354 *status = NFSERR_NOTDIR;
2315 2355 return;
2316 2356 }
2317 2357 /*
2318 2358 * Disallow NULL paths
2319 2359 */
2320 2360 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2321 2361 VN_RELE(tovp);
2322 2362 VN_RELE(fromvp);
2323 2363 *status = NFSERR_ACCES;
2324 2364 return;
2325 2365 }
2326 2366
2327 2367 if (rdonly(ro, tovp)) {
2328 2368 VN_RELE(tovp);
2329 2369 VN_RELE(fromvp);
2330 2370 *status = NFSERR_ROFS;
2331 2371 return;
2332 2372 }
2333 2373
2334 2374 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2335 2375
2336 2376 /*
2337 2377 * Force modified data and metadata out to stable storage.
2338 2378 */
2339 2379 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2340 2380 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2341 2381
2342 2382 VN_RELE(tovp);
2343 2383 VN_RELE(fromvp);
2344 2384
2345 2385 *status = puterrno(error);
2346 2386
2347 2387 }
2348 2388 void *
2349 2389 rfs_link_getfh(struct nfslinkargs *args)
2350 2390 {
2351 2391 return (args->la_from);
2352 2392 }
2353 2393
2354 2394 /*
2355 2395 * Symbolicly link to a file.
2356 2396 * Create a file (to) with the given attributes which is a symbolic link
2357 2397 * to the given path name (to).
2358 2398 */
2359 2399 void
2360 2400 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2361 2401 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2362 2402 {
2363 2403 int error;
2364 2404 struct vattr va;
2365 2405 vnode_t *vp;
2366 2406 vnode_t *svp;
2367 2407 int lerror;
2368 2408 struct sockaddr *ca;
2369 2409 char *name = NULL;
2370 2410
2371 2411 /*
2372 2412 * Disallow NULL paths
2373 2413 */
2374 2414 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2375 2415 *status = NFSERR_ACCES;
2376 2416 return;
2377 2417 }
2378 2418
2379 2419 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2380 2420 if (vp == NULL) {
2381 2421 *status = NFSERR_STALE;
2382 2422 return;
2383 2423 }
2384 2424
2385 2425 if (rdonly(ro, vp)) {
2386 2426 VN_RELE(vp);
2387 2427 *status = NFSERR_ROFS;
2388 2428 return;
2389 2429 }
2390 2430
2391 2431 error = sattr_to_vattr(args->sla_sa, &va);
2392 2432 if (error) {
2393 2433 VN_RELE(vp);
2394 2434 *status = puterrno(error);
2395 2435 return;
2396 2436 }
2397 2437
2398 2438 if (!(va.va_mask & AT_MODE)) {
2399 2439 VN_RELE(vp);
2400 2440 *status = NFSERR_INVAL;
2401 2441 return;
2402 2442 }
2403 2443
2404 2444 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2405 2445 name = nfscmd_convname(ca, exi, args->sla_tnm,
2406 2446 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2407 2447
2408 2448 if (name == NULL) {
2409 2449 *status = NFSERR_ACCES;
2410 2450 return;
2411 2451 }
2412 2452
2413 2453 va.va_type = VLNK;
2414 2454 va.va_mask |= AT_TYPE;
2415 2455
2416 2456 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2417 2457
2418 2458 /*
2419 2459 * Force new data and metadata out to stable storage.
2420 2460 */
2421 2461 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2422 2462 NULL, cr, NULL, NULL, NULL);
2423 2463
2424 2464 if (!lerror) {
2425 2465 (void) VOP_FSYNC(svp, 0, cr, NULL);
2426 2466 VN_RELE(svp);
2427 2467 }
2428 2468
2429 2469 /*
2430 2470 * Force modified data and metadata out to stable storage.
2431 2471 */
2432 2472 (void) VOP_FSYNC(vp, 0, cr, NULL);
2433 2473
2434 2474 VN_RELE(vp);
2435 2475
2436 2476 *status = puterrno(error);
2437 2477 if (name != args->sla_tnm)
2438 2478 kmem_free(name, MAXPATHLEN);
2439 2479
2440 2480 }
2441 2481 void *
2442 2482 rfs_symlink_getfh(struct nfsslargs *args)
2443 2483 {
2444 2484 return (args->sla_from.da_fhandle);
2445 2485 }
2446 2486
2447 2487 /*
2448 2488 * Make a directory.
2449 2489 * Create a directory with the given name, parent directory, and attributes.
2450 2490 * Returns a file handle and attributes for the new directory.
2451 2491 */
2452 2492 /* ARGSUSED */
2453 2493 void
2454 2494 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2455 2495 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2456 2496 {
2457 2497 int error;
2458 2498 struct vattr va;
2459 2499 vnode_t *dvp = NULL;
2460 2500 vnode_t *vp;
2461 2501 char *name = args->ca_da.da_name;
2462 2502
2463 2503 /*
2464 2504 * Disallow NULL paths
2465 2505 */
2466 2506 if (name == NULL || *name == '\0') {
2467 2507 dr->dr_status = NFSERR_ACCES;
2468 2508 return;
2469 2509 }
2470 2510
2471 2511 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2472 2512 if (vp == NULL) {
2473 2513 dr->dr_status = NFSERR_STALE;
2474 2514 return;
2475 2515 }
2476 2516
2477 2517 if (rdonly(ro, vp)) {
2478 2518 VN_RELE(vp);
2479 2519 dr->dr_status = NFSERR_ROFS;
2480 2520 return;
2481 2521 }
2482 2522
2483 2523 error = sattr_to_vattr(args->ca_sa, &va);
2484 2524 if (error) {
2485 2525 VN_RELE(vp);
2486 2526 dr->dr_status = puterrno(error);
2487 2527 return;
2488 2528 }
2489 2529
2490 2530 if (!(va.va_mask & AT_MODE)) {
2491 2531 VN_RELE(vp);
2492 2532 dr->dr_status = NFSERR_INVAL;
2493 2533 return;
2494 2534 }
2495 2535
2496 2536 va.va_type = VDIR;
2497 2537 va.va_mask |= AT_TYPE;
2498 2538
2499 2539 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2500 2540
2501 2541 if (!error) {
2502 2542 /*
2503 2543 * Attribtutes of the newly created directory should
2504 2544 * be returned to the client.
2505 2545 */
2506 2546 va.va_mask = AT_ALL; /* We want everything */
2507 2547 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2508 2548
2509 2549 /* check for overflows */
2510 2550 if (!error) {
2511 2551 acl_perm(vp, exi, &va, cr);
2512 2552 error = vattr_to_nattr(&va, &dr->dr_attr);
2513 2553 if (!error) {
2514 2554 error = makefh(&dr->dr_fhandle, dvp, exi);
2515 2555 }
2516 2556 }
2517 2557 /*
2518 2558 * Force new data and metadata out to stable storage.
2519 2559 */
2520 2560 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2521 2561 VN_RELE(dvp);
2522 2562 }
2523 2563
2524 2564 /*
2525 2565 * Force modified data and metadata out to stable storage.
2526 2566 */
2527 2567 (void) VOP_FSYNC(vp, 0, cr, NULL);
2528 2568
2529 2569 VN_RELE(vp);
2530 2570
2531 2571 dr->dr_status = puterrno(error);
2532 2572
2533 2573 }
2534 2574 void *
2535 2575 rfs_mkdir_getfh(struct nfscreatargs *args)
2536 2576 {
2537 2577 return (args->ca_da.da_fhandle);
2538 2578 }
2539 2579
2540 2580 /*
2541 2581 * Remove a directory.
2542 2582 * Remove the given directory name from the given parent directory.
2543 2583 */
2544 2584 /* ARGSUSED */
2545 2585 void
2546 2586 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2547 2587 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2548 2588 {
2549 2589 int error;
2550 2590 vnode_t *vp;
2551 2591
2552 2592 /*
2553 2593 * Disallow NULL paths
2554 2594 */
2555 2595 if (da->da_name == NULL || *da->da_name == '\0') {
2556 2596 *status = NFSERR_ACCES;
2557 2597 return;
2558 2598 }
2559 2599
2560 2600 vp = nfs_fhtovp(da->da_fhandle, exi);
2561 2601 if (vp == NULL) {
2562 2602 *status = NFSERR_STALE;
2563 2603 return;
2564 2604 }
2565 2605
2566 2606 if (rdonly(ro, vp)) {
2567 2607 VN_RELE(vp);
2568 2608 *status = NFSERR_ROFS;
2569 2609 return;
2570 2610 }
|
↓ open down ↓ |
346 lines elided |
↑ open up ↑ |
2571 2611
2572 2612 /*
2573 2613 * VOP_RMDIR takes a third argument (the current
2574 2614 * directory of the process). That's because someone
2575 2615 * wants to return EINVAL if one tries to remove ".".
2576 2616 * Of course, NFS servers have no idea what their
2577 2617 * clients' current directories are. We fake it by
2578 2618 * supplying a vnode known to exist and illegal to
2579 2619 * remove.
2580 2620 */
2581 - error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2621 + error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2582 2622
2583 2623 /*
2584 2624 * Force modified data and metadata out to stable storage.
2585 2625 */
2586 2626 (void) VOP_FSYNC(vp, 0, cr, NULL);
2587 2627
2588 2628 VN_RELE(vp);
2589 2629
2590 2630 /*
2591 2631 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2592 2632 * if the directory is not empty. A System V NFS server
2593 2633 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2594 2634 * over the wire.
2595 2635 */
2596 2636 if (error == EEXIST)
2597 2637 *status = NFSERR_NOTEMPTY;
2598 2638 else
2599 2639 *status = puterrno(error);
2600 2640
2601 2641 }
2602 2642 void *
2603 2643 rfs_rmdir_getfh(struct nfsdiropargs *da)
2604 2644 {
2605 2645 return (da->da_fhandle);
2606 2646 }
2607 2647
2608 2648 /* ARGSUSED */
2609 2649 void
2610 2650 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2611 2651 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2612 2652 {
2613 2653 int error;
2614 2654 int iseof;
2615 2655 struct iovec iov;
2616 2656 struct uio uio;
2617 2657 vnode_t *vp;
2618 2658 char *ndata = NULL;
2619 2659 struct sockaddr *ca;
2620 2660 size_t nents;
2621 2661 int ret;
2622 2662
2623 2663 vp = nfs_fhtovp(&rda->rda_fh, exi);
2624 2664 if (vp == NULL) {
2625 2665 rd->rd_entries = NULL;
2626 2666 rd->rd_status = NFSERR_STALE;
2627 2667 return;
2628 2668 }
2629 2669
2630 2670 if (vp->v_type != VDIR) {
2631 2671 VN_RELE(vp);
2632 2672 rd->rd_entries = NULL;
2633 2673 rd->rd_status = NFSERR_NOTDIR;
2634 2674 return;
2635 2675 }
2636 2676
2637 2677 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2638 2678
2639 2679 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2640 2680
2641 2681 if (error) {
2642 2682 rd->rd_entries = NULL;
2643 2683 goto bad;
2644 2684 }
2645 2685
2646 2686 if (rda->rda_count == 0) {
2647 2687 rd->rd_entries = NULL;
2648 2688 rd->rd_size = 0;
2649 2689 rd->rd_eof = FALSE;
2650 2690 goto bad;
2651 2691 }
2652 2692
2653 2693 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2654 2694
2655 2695 /*
2656 2696 * Allocate data for entries. This will be freed by rfs_rddirfree.
2657 2697 */
2658 2698 rd->rd_bufsize = (uint_t)rda->rda_count;
2659 2699 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2660 2700
2661 2701 /*
2662 2702 * Set up io vector to read directory data
2663 2703 */
2664 2704 iov.iov_base = (caddr_t)rd->rd_entries;
2665 2705 iov.iov_len = rda->rda_count;
2666 2706 uio.uio_iov = &iov;
2667 2707 uio.uio_iovcnt = 1;
2668 2708 uio.uio_segflg = UIO_SYSSPACE;
2669 2709 uio.uio_extflg = UIO_COPY_CACHED;
2670 2710 uio.uio_loffset = (offset_t)rda->rda_offset;
2671 2711 uio.uio_resid = rda->rda_count;
2672 2712
2673 2713 /*
2674 2714 * read directory
2675 2715 */
2676 2716 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2677 2717
2678 2718 /*
2679 2719 * Clean up
2680 2720 */
2681 2721 if (!error) {
2682 2722 /*
2683 2723 * set size and eof
2684 2724 */
2685 2725 if (uio.uio_resid == rda->rda_count) {
2686 2726 rd->rd_size = 0;
2687 2727 rd->rd_eof = TRUE;
2688 2728 } else {
2689 2729 rd->rd_size = (uint32_t)(rda->rda_count -
2690 2730 uio.uio_resid);
2691 2731 rd->rd_eof = iseof ? TRUE : FALSE;
2692 2732 }
2693 2733 }
2694 2734
2695 2735 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2696 2736 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2697 2737 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2698 2738 rda->rda_count, &ndata);
2699 2739
2700 2740 if (ret != 0) {
2701 2741 size_t dropbytes;
2702 2742 /*
2703 2743 * We had to drop one or more entries in order to fit
2704 2744 * during the character conversion. We need to patch
2705 2745 * up the size and eof info.
2706 2746 */
2707 2747 if (rd->rd_eof)
2708 2748 rd->rd_eof = FALSE;
2709 2749 dropbytes = nfscmd_dropped_entrysize(
2710 2750 (struct dirent64 *)rd->rd_entries, nents, ret);
2711 2751 rd->rd_size -= dropbytes;
2712 2752 }
2713 2753 if (ndata == NULL) {
2714 2754 ndata = (char *)rd->rd_entries;
2715 2755 } else if (ndata != (char *)rd->rd_entries) {
2716 2756 kmem_free(rd->rd_entries, rd->rd_bufsize);
2717 2757 rd->rd_entries = (void *)ndata;
2718 2758 rd->rd_bufsize = rda->rda_count;
2719 2759 }
2720 2760
2721 2761 bad:
2722 2762 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2723 2763
2724 2764 #if 0 /* notyet */
2725 2765 /*
2726 2766 * Don't do this. It causes local disk writes when just
2727 2767 * reading the file and the overhead is deemed larger
2728 2768 * than the benefit.
2729 2769 */
2730 2770 /*
2731 2771 * Force modified metadata out to stable storage.
2732 2772 */
2733 2773 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2734 2774 #endif
2735 2775
2736 2776 VN_RELE(vp);
2737 2777
2738 2778 rd->rd_status = puterrno(error);
2739 2779
2740 2780 }
2741 2781 void *
2742 2782 rfs_readdir_getfh(struct nfsrddirargs *rda)
2743 2783 {
2744 2784 return (&rda->rda_fh);
2745 2785 }
2746 2786 void
2747 2787 rfs_rddirfree(struct nfsrddirres *rd)
2748 2788 {
2749 2789 if (rd->rd_entries != NULL)
2750 2790 kmem_free(rd->rd_entries, rd->rd_bufsize);
2751 2791 }
2752 2792
2753 2793 /* ARGSUSED */
2754 2794 void
2755 2795 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2756 2796 struct svc_req *req, cred_t *cr, bool_t ro)
2757 2797 {
2758 2798 int error;
2759 2799 struct statvfs64 sb;
2760 2800 vnode_t *vp;
2761 2801
2762 2802 vp = nfs_fhtovp(fh, exi);
2763 2803 if (vp == NULL) {
2764 2804 fs->fs_status = NFSERR_STALE;
2765 2805 return;
2766 2806 }
2767 2807
2768 2808 error = VFS_STATVFS(vp->v_vfsp, &sb);
2769 2809
2770 2810 if (!error) {
2771 2811 fs->fs_tsize = nfstsize();
2772 2812 fs->fs_bsize = sb.f_frsize;
2773 2813 fs->fs_blocks = sb.f_blocks;
2774 2814 fs->fs_bfree = sb.f_bfree;
2775 2815 fs->fs_bavail = sb.f_bavail;
2776 2816 }
2777 2817
2778 2818 VN_RELE(vp);
2779 2819
2780 2820 fs->fs_status = puterrno(error);
2781 2821
2782 2822 }
2783 2823 void *
2784 2824 rfs_statfs_getfh(fhandle_t *fh)
2785 2825 {
2786 2826 return (fh);
2787 2827 }
2788 2828
2789 2829 static int
2790 2830 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2791 2831 {
2792 2832 vap->va_mask = 0;
2793 2833
2794 2834 /*
2795 2835 * There was a sign extension bug in some VFS based systems
2796 2836 * which stored the mode as a short. When it would get
2797 2837 * assigned to a u_long, no sign extension would occur.
2798 2838 * It needed to, but this wasn't noticed because sa_mode
2799 2839 * would then get assigned back to the short, thus ignoring
2800 2840 * the upper 16 bits of sa_mode.
2801 2841 *
2802 2842 * To make this implementation work for both broken
2803 2843 * clients and good clients, we check for both versions
2804 2844 * of the mode.
2805 2845 */
2806 2846 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2807 2847 sa->sa_mode != (uint32_t)-1) {
2808 2848 vap->va_mask |= AT_MODE;
2809 2849 vap->va_mode = sa->sa_mode;
2810 2850 }
2811 2851 if (sa->sa_uid != (uint32_t)-1) {
2812 2852 vap->va_mask |= AT_UID;
2813 2853 vap->va_uid = sa->sa_uid;
2814 2854 }
2815 2855 if (sa->sa_gid != (uint32_t)-1) {
2816 2856 vap->va_mask |= AT_GID;
2817 2857 vap->va_gid = sa->sa_gid;
2818 2858 }
2819 2859 if (sa->sa_size != (uint32_t)-1) {
2820 2860 vap->va_mask |= AT_SIZE;
2821 2861 vap->va_size = sa->sa_size;
2822 2862 }
2823 2863 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2824 2864 sa->sa_atime.tv_usec != (int32_t)-1) {
2825 2865 #ifndef _LP64
2826 2866 /* return error if time overflow */
2827 2867 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2828 2868 return (EOVERFLOW);
2829 2869 #endif
2830 2870 vap->va_mask |= AT_ATIME;
2831 2871 /*
2832 2872 * nfs protocol defines times as unsigned so don't extend sign,
2833 2873 * unless sysadmin set nfs_allow_preepoch_time.
2834 2874 */
2835 2875 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2836 2876 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2837 2877 }
2838 2878 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2839 2879 sa->sa_mtime.tv_usec != (int32_t)-1) {
2840 2880 #ifndef _LP64
2841 2881 /* return error if time overflow */
2842 2882 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2843 2883 return (EOVERFLOW);
2844 2884 #endif
2845 2885 vap->va_mask |= AT_MTIME;
|
↓ open down ↓ |
254 lines elided |
↑ open up ↑ |
2846 2886 /*
2847 2887 * nfs protocol defines times as unsigned so don't extend sign,
2848 2888 * unless sysadmin set nfs_allow_preepoch_time.
2849 2889 */
2850 2890 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2851 2891 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2852 2892 }
2853 2893 return (0);
2854 2894 }
2855 2895
2856 -static enum nfsftype vt_to_nf[] = {
2896 +static const enum nfsftype vt_to_nf[] = {
2857 2897 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2858 2898 };
2859 2899
2860 2900 /*
2861 2901 * check the following fields for overflow: nodeid, size, and time.
2862 2902 * There could be a problem when converting 64-bit LP64 fields
2863 2903 * into 32-bit ones. Return an error if there is an overflow.
2864 2904 */
2865 2905 int
2866 2906 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2867 2907 {
2868 2908 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2869 2909 na->na_type = vt_to_nf[vap->va_type];
2870 2910
2871 2911 if (vap->va_mode == (unsigned short) -1)
2872 2912 na->na_mode = (uint32_t)-1;
2873 2913 else
2874 2914 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2875 2915
2876 2916 if (vap->va_uid == (unsigned short)(-1))
2877 2917 na->na_uid = (uint32_t)(-1);
2878 2918 else if (vap->va_uid == UID_NOBODY)
2879 2919 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2880 2920 else
2881 2921 na->na_uid = vap->va_uid;
2882 2922
2883 2923 if (vap->va_gid == (unsigned short)(-1))
2884 2924 na->na_gid = (uint32_t)-1;
2885 2925 else if (vap->va_gid == GID_NOBODY)
2886 2926 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2887 2927 else
2888 2928 na->na_gid = vap->va_gid;
2889 2929
2890 2930 /*
2891 2931 * Do we need to check fsid for overflow? It is 64-bit in the
2892 2932 * vattr, but are bigger than 32 bit values supported?
2893 2933 */
2894 2934 na->na_fsid = vap->va_fsid;
2895 2935
2896 2936 na->na_nodeid = vap->va_nodeid;
2897 2937
2898 2938 /*
2899 2939 * Check to make sure that the nodeid is representable over the
2900 2940 * wire without losing bits.
2901 2941 */
2902 2942 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2903 2943 return (EFBIG);
2904 2944 na->na_nlink = vap->va_nlink;
2905 2945
2906 2946 /*
2907 2947 * Check for big files here, instead of at the caller. See
2908 2948 * comments in cstat for large special file explanation.
2909 2949 */
2910 2950 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2911 2951 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2912 2952 return (EFBIG);
2913 2953 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2914 2954 /* UNKNOWN_SIZE | OVERFLOW */
2915 2955 na->na_size = MAXOFF32_T;
2916 2956 } else
2917 2957 na->na_size = vap->va_size;
2918 2958 } else
2919 2959 na->na_size = vap->va_size;
2920 2960
2921 2961 /*
2922 2962 * If the vnode times overflow the 32-bit times that NFS2
2923 2963 * uses on the wire then return an error.
2924 2964 */
2925 2965 if (!NFS_VAP_TIME_OK(vap)) {
2926 2966 return (EOVERFLOW);
2927 2967 }
2928 2968 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2929 2969 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2930 2970
2931 2971 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2932 2972 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2933 2973
2934 2974 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2935 2975 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2936 2976
2937 2977 /*
2938 2978 * If the dev_t will fit into 16 bits then compress
2939 2979 * it, otherwise leave it alone. See comments in
2940 2980 * nfs_client.c.
2941 2981 */
2942 2982 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2943 2983 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2944 2984 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2945 2985 else
2946 2986 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2947 2987
2948 2988 na->na_blocks = vap->va_nblocks;
2949 2989 na->na_blocksize = vap->va_blksize;
2950 2990
2951 2991 /*
2952 2992 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2953 2993 * over-the-wire protocols for named-pipe vnodes. It remaps the
2954 2994 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2955 2995 *
2956 2996 * BUYER BEWARE:
2957 2997 * If you are porting the NFS to a non-Sun server, you probably
2958 2998 * don't want to include the following block of code. The
2959 2999 * over-the-wire special file types will be changing with the
2960 3000 * NFS Protocol Revision.
2961 3001 */
2962 3002 if (vap->va_type == VFIFO)
2963 3003 NA_SETFIFO(na);
2964 3004 return (0);
2965 3005 }
2966 3006
2967 3007 /*
2968 3008 * acl v2 support: returns approximate permission.
2969 3009 * default: returns minimal permission (more restrictive)
2970 3010 * aclok: returns maximal permission (less restrictive)
2971 3011 * This routine changes the permissions that are alaredy in *va.
2972 3012 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2973 3013 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2974 3014 */
2975 3015 static void
2976 3016 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2977 3017 {
2978 3018 vsecattr_t vsa;
2979 3019 int aclcnt;
2980 3020 aclent_t *aclentp;
2981 3021 mode_t mask_perm;
2982 3022 mode_t grp_perm;
2983 3023 mode_t other_perm;
2984 3024 mode_t other_orig;
2985 3025 int error;
2986 3026
2987 3027 /* dont care default acl */
2988 3028 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2989 3029 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2990 3030
2991 3031 if (!error) {
2992 3032 aclcnt = vsa.vsa_aclcnt;
2993 3033 if (aclcnt > MIN_ACL_ENTRIES) {
2994 3034 /* non-trivial ACL */
2995 3035 aclentp = vsa.vsa_aclentp;
2996 3036 if (exi->exi_export.ex_flags & EX_ACLOK) {
2997 3037 /* maximal permissions */
2998 3038 grp_perm = 0;
2999 3039 other_perm = 0;
3000 3040 for (; aclcnt > 0; aclcnt--, aclentp++) {
3001 3041 switch (aclentp->a_type) {
3002 3042 case USER_OBJ:
3003 3043 break;
3004 3044 case USER:
3005 3045 grp_perm |=
3006 3046 aclentp->a_perm << 3;
3007 3047 other_perm |= aclentp->a_perm;
3008 3048 break;
3009 3049 case GROUP_OBJ:
3010 3050 grp_perm |=
3011 3051 aclentp->a_perm << 3;
3012 3052 break;
3013 3053 case GROUP:
3014 3054 other_perm |= aclentp->a_perm;
3015 3055 break;
3016 3056 case OTHER_OBJ:
3017 3057 other_orig = aclentp->a_perm;
3018 3058 break;
3019 3059 case CLASS_OBJ:
3020 3060 mask_perm = aclentp->a_perm;
3021 3061 break;
3022 3062 default:
3023 3063 break;
3024 3064 }
3025 3065 }
3026 3066 grp_perm &= mask_perm << 3;
3027 3067 other_perm &= mask_perm;
3028 3068 other_perm |= other_orig;
3029 3069
3030 3070 } else {
3031 3071 /* minimal permissions */
3032 3072 grp_perm = 070;
3033 3073 other_perm = 07;
3034 3074 for (; aclcnt > 0; aclcnt--, aclentp++) {
3035 3075 switch (aclentp->a_type) {
3036 3076 case USER_OBJ:
3037 3077 break;
3038 3078 case USER:
3039 3079 case CLASS_OBJ:
3040 3080 grp_perm &=
3041 3081 aclentp->a_perm << 3;
3042 3082 other_perm &=
3043 3083 aclentp->a_perm;
3044 3084 break;
3045 3085 case GROUP_OBJ:
3046 3086 grp_perm &=
3047 3087 aclentp->a_perm << 3;
3048 3088 break;
3049 3089 case GROUP:
3050 3090 other_perm &=
3051 3091 aclentp->a_perm;
3052 3092 break;
3053 3093 case OTHER_OBJ:
3054 3094 other_perm &=
3055 3095 aclentp->a_perm;
3056 3096 break;
3057 3097 default:
3058 3098 break;
3059 3099 }
3060 3100 }
3061 3101 }
3062 3102 /* copy to va */
3063 3103 va->va_mode &= ~077;
3064 3104 va->va_mode |= grp_perm | other_perm;
|
↓ open down ↓ |
198 lines elided |
↑ open up ↑ |
3065 3105 }
3066 3106 if (vsa.vsa_aclcnt)
3067 3107 kmem_free(vsa.vsa_aclentp,
3068 3108 vsa.vsa_aclcnt * sizeof (aclent_t));
3069 3109 }
3070 3110 }
3071 3111
3072 3112 void
3073 3113 rfs_srvrinit(void)
3074 3114 {
3075 - mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3076 3115 nfs2_srv_caller_id = fs_new_caller_id();
3077 3116 }
3078 3117
3079 3118 void
3080 3119 rfs_srvrfini(void)
3081 3120 {
3082 - mutex_destroy(&rfs_async_write_lock);
3083 3121 }
3084 3122
3123 +/* ARGSUSED */
3124 +void
3125 +rfs_srv_zone_init(nfs_globals_t *ng)
3126 +{
3127 + nfs_srv_t *ns;
3128 +
3129 + ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3130 +
3131 + mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3132 + ns->write_async = 1;
3133 +
3134 + ng->nfs_srv = ns;
3135 +}
3136 +
3137 +/* ARGSUSED */
3138 +void
3139 +rfs_srv_zone_fini(nfs_globals_t *ng)
3140 +{
3141 + nfs_srv_t *ns = ng->nfs_srv;
3142 +
3143 + ng->nfs_srv = NULL;
3144 +
3145 + mutex_destroy(&ns->async_write_lock);
3146 + kmem_free(ns, sizeof (*ns));
3147 +}
3148 +
3085 3149 static int
3086 3150 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3087 3151 {
3088 3152 struct clist *wcl;
3089 3153 int wlist_len;
3090 3154 uint32_t count = rr->rr_count;
3091 3155
3092 3156 wcl = ra->ra_wlist;
3093 3157
3094 3158 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3095 3159 return (FALSE);
3096 3160 }
3097 3161
3098 3162 wcl = ra->ra_wlist;
3099 3163 rr->rr_ok.rrok_wlist_len = wlist_len;
3100 3164 rr->rr_ok.rrok_wlist = wcl;
3101 3165
3102 3166 return (TRUE);
3103 3167 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX