3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All Rights Reserved
31 */
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 #include <sys/sdt.h>
58 #include <sys/ddi.h>
59 #include <sys/zone.h>
60
61 #include <fs/fs_reparse.h>
62
63 #include <rpc/types.h>
64 #include <rpc/auth.h>
65 #include <rpc/rpcsec_gss.h>
66 #include <rpc/svc.h>
67
68 #include <nfs/nfs.h>
69 #include <nfs/export.h>
70 #include <nfs/nfs_cmd.h>
71 #include <nfs/lm.h>
72 #include <nfs/nfs4.h>
73
74 #include <sys/strsubr.h>
75 #include <sys/strsun.h>
76
77 #include <inet/common.h>
78 #include <inet/ip.h>
79 #include <inet/ip6.h>
80
81 #include <sys/tsol/label.h>
82 #include <sys/tsol/tndb.h>
83
84 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
86 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
87 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
88 extern struct svc_ops rdma_svc_ops;
89 extern int nfs_loaned_buffers;
90 /* End of Tunables */
91
92 static int rdma_setup_read_data4(READ4args *, READ4res *);
130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
131 #define RFS4_MINLEN_RDDIR_BUF \
132 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
133
134 /*
135 * It would be better to pad to 4 bytes since that's what XDR would do,
136 * but the dirents UFS gives us are already padded to 8, so just take
137 * what we're given. Dircount is only a hint anyway. Currently the
138 * solaris kernel is ASCII only, so there's no point in calling the
139 * UTF8 functions.
140 *
141 * dirent64: named padded to provide 8 byte struct alignment
142 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
143 *
144 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
145 *
146 */
147 #define DIRENT64_TO_DIRCOUNT(dp) \
148 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
149
150 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
151
152 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
153
154 u_longlong_t nfs4_srv_caller_id;
155 uint_t nfs4_srv_vkey = 0;
156
157 verifier4 Write4verf;
158 verifier4 Readdir4verf;
159
160 void rfs4_init_compound_state(struct compound_state *);
161
162 static void nullfree(caddr_t);
163 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 struct compound_state *);
165 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create_free(nfs_resop4 *resop);
174 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
175 struct svc_req *, struct compound_state *);
176 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 struct compound_state *);
228 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
241 struct svc_req *, struct compound_state *);
242 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *req, struct compound_state *);
244 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
245 struct compound_state *);
246 static void rfs4_op_secinfo_free(nfs_resop4 *);
247
248 static nfsstat4 check_open_access(uint32_t,
249 struct compound_state *, struct svc_req *);
250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
251 void rfs4_ss_clid(rfs4_client_t *);
252
253 /*
254 * translation table for attrs
255 */
256 struct nfs4_ntov_table {
257 union nfs4_attr_u *na;
258 uint8_t amap[NFS4_MAXNUM_ATTRS];
259 int attrcnt;
260 bool_t vfsstat;
261 };
262
263 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
264 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
265 struct nfs4_svgetit_arg *sargp);
266
267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
268 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
269 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
270
271 fem_t *deleg_rdops;
272 fem_t *deleg_wrops;
273
274 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
275 kmutex_t rfs4_servinst_lock; /* protects linked list */
276 int rfs4_seen_first_compound; /* set first time we see one */
277
278 /*
279 * NFS4 op dispatch table
280 */
281
282 struct rfsv4disp {
283 void (*dis_proc)(); /* proc to call */
284 void (*dis_resfree)(); /* frees space allocated by proc */
285 int dis_flags; /* RPC_IDEMPOTENT, etc... */
286 };
287
288 static struct rfsv4disp rfsv4disptab[] = {
289 /*
290 * NFS VERSION 4
291 */
292
293 /* RFS_NULL = 0 */
294 {rfs4_op_illegal, nullfree, 0},
295
296 /* UNUSED = 1 */
297 {rfs4_op_illegal, nullfree, 0},
298
299 /* UNUSED = 2 */
300 {rfs4_op_illegal, nullfree, 0},
301
302 /* OP_ACCESS = 3 */
303 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
304
305 /* OP_CLOSE = 4 */
306 {rfs4_op_close, nullfree, 0},
307
308 /* OP_COMMIT = 5 */
309 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
310
311 /* OP_CREATE = 6 */
312 {rfs4_op_create, nullfree, 0},
313
314 /* OP_DELEGPURGE = 7 */
315 {rfs4_op_delegpurge, nullfree, 0},
316
317 /* OP_DELEGRETURN = 8 */
318 {rfs4_op_delegreturn, nullfree, 0},
319
320 /* OP_GETATTR = 9 */
321 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
322
323 /* OP_GETFH = 10 */
324 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
325
326 /* OP_LINK = 11 */
327 {rfs4_op_link, nullfree, 0},
328
329 /* OP_LOCK = 12 */
330 {rfs4_op_lock, lock_denied_free, 0},
331
332 /* OP_LOCKT = 13 */
333 {rfs4_op_lockt, lock_denied_free, 0},
334
335 /* OP_LOCKU = 14 */
336 {rfs4_op_locku, nullfree, 0},
337
338 /* OP_LOOKUP = 15 */
339 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
340
341 /* OP_LOOKUPP = 16 */
342 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
343
344 /* OP_NVERIFY = 17 */
345 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
346
347 /* OP_OPEN = 18 */
348 {rfs4_op_open, rfs4_free_reply, 0},
349
350 /* OP_OPENATTR = 19 */
351 {rfs4_op_openattr, nullfree, 0},
352
353 /* OP_OPEN_CONFIRM = 20 */
354 {rfs4_op_open_confirm, nullfree, 0},
355
356 /* OP_OPEN_DOWNGRADE = 21 */
357 {rfs4_op_open_downgrade, nullfree, 0},
358
359 /* OP_OPEN_PUTFH = 22 */
360 {rfs4_op_putfh, nullfree, RPC_ALL},
361
362 /* OP_PUTPUBFH = 23 */
363 {rfs4_op_putpubfh, nullfree, RPC_ALL},
364
365 /* OP_PUTROOTFH = 24 */
366 {rfs4_op_putrootfh, nullfree, RPC_ALL},
367
368 /* OP_READ = 25 */
369 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
370
371 /* OP_READDIR = 26 */
372 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
373
374 /* OP_READLINK = 27 */
375 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
376
377 /* OP_REMOVE = 28 */
378 {rfs4_op_remove, nullfree, 0},
379
380 /* OP_RENAME = 29 */
381 {rfs4_op_rename, nullfree, 0},
382
383 /* OP_RENEW = 30 */
384 {rfs4_op_renew, nullfree, 0},
385
386 /* OP_RESTOREFH = 31 */
387 {rfs4_op_restorefh, nullfree, RPC_ALL},
388
389 /* OP_SAVEFH = 32 */
390 {rfs4_op_savefh, nullfree, RPC_ALL},
391
392 /* OP_SECINFO = 33 */
393 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
394
395 /* OP_SETATTR = 34 */
396 {rfs4_op_setattr, nullfree, 0},
397
398 /* OP_SETCLIENTID = 35 */
399 {rfs4_op_setclientid, nullfree, 0},
400
401 /* OP_SETCLIENTID_CONFIRM = 36 */
402 {rfs4_op_setclientid_confirm, nullfree, 0},
403
404 /* OP_VERIFY = 37 */
405 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
406
407 /* OP_WRITE = 38 */
408 {rfs4_op_write, nullfree, 0},
409
410 /* OP_RELEASE_LOCKOWNER = 39 */
411 {rfs4_op_release_lockowner, nullfree, 0},
412 };
413
414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
415
416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
417
418 #ifdef DEBUG
419
420 int rfs4_fillone_debug = 0;
421 int rfs4_no_stub_access = 1;
422 int rfs4_rddir_debug = 0;
423
424 static char *rfs4_op_string[] = {
425 "rfs4_op_null",
426 "rfs4_op_1 unused",
427 "rfs4_op_2 unused",
428 "rfs4_op_access",
429 "rfs4_op_close",
430 "rfs4_op_commit",
431 "rfs4_op_create",
449 "rfs4_op_putrootfh",
450 "rfs4_op_read",
451 "rfs4_op_readdir",
452 "rfs4_op_readlink",
453 "rfs4_op_remove",
454 "rfs4_op_rename",
455 "rfs4_op_renew",
456 "rfs4_op_restorefh",
457 "rfs4_op_savefh",
458 "rfs4_op_secinfo",
459 "rfs4_op_setattr",
460 "rfs4_op_setclientid",
461 "rfs4_op_setclient_confirm",
462 "rfs4_op_verify",
463 "rfs4_op_write",
464 "rfs4_op_release_lockowner",
465 "rfs4_op_illegal"
466 };
467 #endif
468
469 void rfs4_ss_chkclid(rfs4_client_t *);
470
471 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
472
473 extern void rfs4_free_fs_locations4(fs_locations4 *);
474
475 #ifdef nextdp
476 #undef nextdp
477 #endif
478 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
479
480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
481 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
482 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
483 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
484 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
485 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
486 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
487 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
488 NULL, NULL
489 };
490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
491 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
492 VOPNAME_READ, { .femop_read = deleg_wr_read },
493 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
494 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
495 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
496 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
497 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
498 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
499 NULL, NULL
500 };
501
502 int
503 rfs4_srvrinit(void)
504 {
505 timespec32_t verf;
506 int error;
507 extern void rfs4_attr_init();
508 extern krwlock_t rfs4_deleg_policy_lock;
509
510 /*
511 * The following algorithm attempts to find a unique verifier
512 * to be used as the write verifier returned from the server
513 * to the client. It is important that this verifier change
514 * whenever the server reboots. Of secondary importance, it
515 * is important for the verifier to be unique between two
516 * different servers.
517 *
518 * Thus, an attempt is made to use the system hostid and the
519 * current time in seconds when the nfssrv kernel module is
520 * loaded. It is assumed that an NFS server will not be able
521 * to boot and then to reboot in less than a second. If the
522 * hostid has not been set, then the current high resolution
523 * time is used. This will ensure different verifiers each
524 * time the server reboots and minimize the chances that two
525 * different servers will have the same verifier.
526 * XXX - this is broken on LP64 kernels.
527 */
528 verf.tv_sec = (time_t)zone_get_hostid(NULL);
529 if (verf.tv_sec != 0) {
530 verf.tv_nsec = gethrestime_sec();
531 } else {
532 timespec_t tverf;
533
534 gethrestime(&tverf);
535 verf.tv_sec = (time_t)tverf.tv_sec;
536 verf.tv_nsec = tverf.tv_nsec;
537 }
538
539 Write4verf = *(uint64_t *)&verf;
540
541 rfs4_attr_init();
542 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
543
544 /* Used to manage create/destroy of server state */
545 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
546
547 /* Used to manage access to server instance linked list */
548 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
549
550 /* Used to manage access to rfs4_deleg_policy */
551 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
552
553 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
554 if (error != 0) {
555 rfs4_disable_delegation();
556 } else {
557 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
558 &deleg_wrops);
559 if (error != 0) {
560 rfs4_disable_delegation();
561 fem_free(deleg_rdops);
562 }
563 }
564
565 nfs4_srv_caller_id = fs_new_caller_id();
566
567 lockt_sysid = lm_alloc_sysidt();
568
569 vsd_create(&nfs4_srv_vkey, NULL);
570
571 return (0);
572 }
573
574 void
575 rfs4_srvrfini(void)
576 {
577 extern krwlock_t rfs4_deleg_policy_lock;
578
579 if (lockt_sysid != LM_NOSYSID) {
580 lm_free_sysidt(lockt_sysid);
581 lockt_sysid = LM_NOSYSID;
582 }
583
584 mutex_destroy(&rfs4_deleg_lock);
585 mutex_destroy(&rfs4_state_lock);
586 rw_destroy(&rfs4_deleg_policy_lock);
587
588 fem_free(deleg_rdops);
589 fem_free(deleg_wrops);
590 }
591
592 void
593 rfs4_init_compound_state(struct compound_state *cs)
594 {
595 bzero(cs, sizeof (*cs));
596 cs->cont = TRUE;
597 cs->access = CS_ACCESS_DENIED;
598 cs->deleg = FALSE;
599 cs->mandlock = FALSE;
600 cs->fh.nfs_fh4_val = cs->fhbuf;
601 }
602
603 void
604 rfs4_grace_start(rfs4_servinst_t *sip)
605 {
606 rw_enter(&sip->rwlock, RW_WRITER);
607 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
608 sip->grace_period = rfs4_grace_period;
609 rw_exit(&sip->rwlock);
610 }
611
612 /*
613 * returns true if the instance's grace period has never been started
614 */
615 int
616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
617 {
618 time_t start_time;
619
620 rw_enter(&sip->rwlock, RW_READER);
635
636 rw_enter(&sip->rwlock, RW_READER);
637 grace_expiry = sip->start_time + sip->grace_period;
638 rw_exit(&sip->rwlock);
639
640 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
641 }
642
643 int
644 rfs4_clnt_in_grace(rfs4_client_t *cp)
645 {
646 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
647
648 return (rfs4_servinst_in_grace(cp->rc_server_instance));
649 }
650
651 /*
652 * reset all currently active grace periods
653 */
654 void
655 rfs4_grace_reset_all(void)
656 {
657 rfs4_servinst_t *sip;
658
659 mutex_enter(&rfs4_servinst_lock);
660 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
661 if (rfs4_servinst_in_grace(sip))
662 rfs4_grace_start(sip);
663 mutex_exit(&rfs4_servinst_lock);
664 }
665
666 /*
667 * start any new instances' grace periods
668 */
669 void
670 rfs4_grace_start_new(void)
671 {
672 rfs4_servinst_t *sip;
673
674 mutex_enter(&rfs4_servinst_lock);
675 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
676 if (rfs4_servinst_grace_new(sip))
677 rfs4_grace_start(sip);
678 mutex_exit(&rfs4_servinst_lock);
679 }
680
681 static rfs4_dss_path_t *
682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
683 {
684 size_t len;
685 rfs4_dss_path_t *dss_path;
686
687 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
688
689 /*
690 * Take a copy of the string, since the original may be overwritten.
691 * Sadly, no strdup() in the kernel.
692 */
693 /* allow for NUL */
694 len = strlen(path) + 1;
695 dss_path->path = kmem_alloc(len, KM_SLEEP);
696 (void) strlcpy(dss_path->path, path, len);
697
698 /* associate with servinst */
699 dss_path->sip = sip;
700 dss_path->index = index;
701
702 /*
703 * Add to list of served paths.
704 * No locking required, as we're only ever called at startup.
705 */
706 if (rfs4_dss_pathlist == NULL) {
707 /* this is the first dss_path_t */
708
709 /* needed for insque/remque */
710 dss_path->next = dss_path->prev = dss_path;
711
712 rfs4_dss_pathlist = dss_path;
713 } else {
714 insque(dss_path, rfs4_dss_pathlist);
715 }
716
717 return (dss_path);
718 }
719
720 /*
721 * Create a new server instance, and make it the currently active instance.
722 * Note that starting the grace period too early will reduce the clients'
723 * recovery window.
724 */
725 void
726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
727 {
728 unsigned i;
729 rfs4_servinst_t *sip;
730 rfs4_oldstate_t *oldstate;
731
732 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
733 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
734
735 sip->start_time = (time_t)0;
736 sip->grace_period = (time_t)0;
737 sip->next = NULL;
738 sip->prev = NULL;
739
740 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
741 /*
742 * This initial dummy entry is required to setup for insque/remque.
743 * It must be skipped over whenever the list is traversed.
744 */
745 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
746 /* insque/remque require initial list entry to be self-terminated */
747 oldstate->next = oldstate;
748 oldstate->prev = oldstate;
749 sip->oldstate = oldstate;
750
751
752 sip->dss_npaths = dss_npaths;
753 sip->dss_paths = kmem_alloc(dss_npaths *
754 sizeof (rfs4_dss_path_t *), KM_SLEEP);
755
756 for (i = 0; i < dss_npaths; i++) {
757 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
758 }
759
760 mutex_enter(&rfs4_servinst_lock);
761 if (rfs4_cur_servinst != NULL) {
762 /* add to linked list */
763 sip->prev = rfs4_cur_servinst;
764 rfs4_cur_servinst->next = sip;
765 }
766 if (start_grace)
767 rfs4_grace_start(sip);
768 /* make the new instance "current" */
769 rfs4_cur_servinst = sip;
770
771 mutex_exit(&rfs4_servinst_lock);
772 }
773
774 /*
775 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
776 * all instances directly.
777 */
778 void
779 rfs4_servinst_destroy_all(void)
780 {
781 rfs4_servinst_t *sip, *prev, *current;
782 #ifdef DEBUG
783 int n = 0;
784 #endif
785
786 mutex_enter(&rfs4_servinst_lock);
787 ASSERT(rfs4_cur_servinst != NULL);
788 current = rfs4_cur_servinst;
789 rfs4_cur_servinst = NULL;
790 for (sip = current; sip != NULL; sip = prev) {
791 prev = sip->prev;
792 rw_destroy(&sip->rwlock);
793 if (sip->oldstate)
794 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
795 if (sip->dss_paths)
796 kmem_free(sip->dss_paths,
797 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
798 kmem_free(sip, sizeof (rfs4_servinst_t));
799 #ifdef DEBUG
800 n++;
801 #endif
802 }
803 mutex_exit(&rfs4_servinst_lock);
804 }
805
806 /*
807 * Assign the current server instance to a client_t.
808 * Should be called with cp->rc_dbe held.
809 */
810 void
811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
812 {
813 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
814
815 /*
816 * The lock ensures that if the current instance is in the process
817 * of changing, we will see the new one.
818 */
819 mutex_enter(&rfs4_servinst_lock);
820 cp->rc_server_instance = sip;
821 mutex_exit(&rfs4_servinst_lock);
822 }
823
824 rfs4_servinst_t *
825 rfs4_servinst(rfs4_client_t *cp)
826 {
827 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
828
829 return (cp->rc_server_instance);
830 }
831
832 /* ARGSUSED */
833 static void
834 nullfree(caddr_t resop)
835 {
836 }
837
838 /*
839 * This is a fall-through for invalid or not implemented (yet) ops
840 */
841 /* ARGSUSED */
862 }
863
864 /*
865 * Used by rfs4_op_secinfo to get the security information from the
866 * export structure associated with the component.
867 */
868 /* ARGSUSED */
869 static nfsstat4
870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
871 {
872 int error, different_export = 0;
873 vnode_t *dvp, *vp;
874 struct exportinfo *exi = NULL;
875 fid_t fid;
876 uint_t count, i;
877 secinfo4 *resok_val;
878 struct secinfo *secp;
879 seconfig_t *si;
880 bool_t did_traverse = FALSE;
881 int dotdot, walk;
882
883 dvp = cs->vp;
884 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
885
886 /*
887 * If dotdotting, then need to check whether it's above the
888 * root of a filesystem, or above an export point.
889 */
890 if (dotdot) {
891
892 /*
893 * If dotdotting at the root of a filesystem, then
894 * need to traverse back to the mounted-on filesystem
895 * and do the dotdot lookup there.
896 */
897 if (cs->vp->v_flag & VROOT) {
898
899 /*
900 * If at the system root, then can
901 * go up no further.
902 */
903 if (VN_CMP(dvp, rootdir))
904 return (puterrno4(ENOENT));
905
906 /*
907 * Traverse back to the mounted-on filesystem
908 */
909 dvp = untraverse(cs->vp);
910
911 /*
912 * Set the different_export flag so we remember
913 * to pick up a new exportinfo entry for
914 * this new filesystem.
915 */
916 different_export = 1;
917 } else {
918
919 /*
920 * If dotdotting above an export point then set
921 * the different_export to get new export info.
922 */
923 different_export = nfs_exported(cs->exi, cs->vp);
1000 exi = cs->exi;
1001 } else {
1002 VN_RELE(vp);
1003 return (puterrno4(EACCES));
1004 }
1005 }
1006 } else {
1007 exi = cs->exi;
1008 }
1009 ASSERT(exi != NULL);
1010
1011
1012 /*
1013 * Create the secinfo result based on the security information
1014 * from the exportinfo structure (exi).
1015 *
1016 * Return all flavors for a pseudo node.
1017 * For a real export node, return the flavor that the client
1018 * has access with.
1019 */
1020 ASSERT(RW_LOCK_HELD(&exported_lock));
1021 if (PSEUDO(exi)) {
1022 count = exi->exi_export.ex_seccnt; /* total sec count */
1023 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024 secp = exi->exi_export.ex_secinfo;
1025
1026 for (i = 0; i < count; i++) {
1027 si = &secp[i].s_secinfo;
1028 resok_val[i].flavor = si->sc_rpcnum;
1029 if (resok_val[i].flavor == RPCSEC_GSS) {
1030 rpcsec_gss_info *info;
1031
1032 info = &resok_val[i].flavor_info;
1033 info->qop = si->sc_qop;
1034 info->service = (rpc_gss_svc_t)si->sc_service;
1035
1036 /* get oid opaque data */
1037 info->oid.sec_oid4_len =
1038 si->sc_gss_mech_type->length;
1039 info->oid.sec_oid4_val = kmem_alloc(
1040 si->sc_gss_mech_type->length, KM_SLEEP);
1363 if (is_system_labeled() && !admin_low_client)
1364 label_rele(tslabel);
1365
1366 *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369 ACCESS4res *, resp);
1370 }
1371
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375 struct compound_state *cs)
1376 {
1377 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379 int error;
1380 vnode_t *vp = cs->vp;
1381 cred_t *cr = cs->cr;
1382 vattr_t va;
1383
1384 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385 COMMIT4args *, args);
1386
1387 if (vp == NULL) {
1388 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389 goto out;
1390 }
1391 if (cs->access == CS_ACCESS_DENIED) {
1392 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393 goto out;
1394 }
1395
1396 if (args->offset + args->count < args->offset) {
1397 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398 goto out;
1399 }
1400
1401 va.va_mask = AT_UID;
1402 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1419 resp->status = NFS4ERR_ISDIR;
1420 else
1421 resp->status = NFS4ERR_INVAL;
1422 *cs->statusp = resp->status;
1423 goto out;
1424 }
1425
1426 if (crgetuid(cr) != va.va_uid &&
1427 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428 *cs->statusp = resp->status = puterrno4(error);
1429 goto out;
1430 }
1431
1432 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433
1434 if (error) {
1435 *cs->statusp = resp->status = puterrno4(error);
1436 goto out;
1437 }
1438
1439 *cs->statusp = resp->status = NFS4_OK;
1440 resp->writeverf = Write4verf;
1441 out:
1442 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443 COMMIT4res *, resp);
1444 }
1445
1446 /*
1447 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448 * was completed. It does the nfsv4 create for special files.
1449 */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453 struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455 int error;
1456 cred_t *cr = cs->cr;
1457 vnode_t *dvp = cs->vp;
1458 vnode_t *vp = NULL;
1459 int mode;
1460 enum vcexcl excl;
2628 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629
2630 /*
2631 * If dotdotting, then need to check whether it's
2632 * above the root of a filesystem, or above an
2633 * export point.
2634 */
2635 if (dotdot) {
2636
2637 /*
2638 * If dotdotting at the root of a filesystem, then
2639 * need to traverse back to the mounted-on filesystem
2640 * and do the dotdot lookup there.
2641 */
2642 if (cs->vp->v_flag & VROOT) {
2643
2644 /*
2645 * If at the system root, then can
2646 * go up no further.
2647 */
2648 if (VN_CMP(cs->vp, rootdir))
2649 return (puterrno4(ENOENT));
2650
2651 /*
2652 * Traverse back to the mounted-on filesystem
2653 */
2654 cs->vp = untraverse(cs->vp);
2655
2656 /*
2657 * Set the different_export flag so we remember
2658 * to pick up a new exportinfo entry for
2659 * this new filesystem.
2660 */
2661 different_export = 1;
2662 } else {
2663
2664 /*
2665 * If dotdotting above an export point then set
2666 * the different_export to get new export info.
2667 */
2668 different_export = nfs_exported(cs->exi, cs->vp);
3392 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3393
3394 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395 freeb(resp->mblk);
3396 resp->mblk = NULL;
3397 resp->data_len = 0;
3398 }
3399 }
3400
3401
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405 struct compound_state *cs)
3406 {
3407 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3408 int error;
3409 vnode_t *vp;
3410 struct exportinfo *exi, *sav_exi;
3411 nfs_fh4_fmt_t *fh_fmtp;
3412
3413 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414
3415 if (cs->vp) {
3416 VN_RELE(cs->vp);
3417 cs->vp = NULL;
3418 }
3419
3420 if (cs->cr)
3421 crfree(cs->cr);
3422
3423 cs->cr = crdup(cs->basecr);
3424
3425 vp = exi_public->exi_vp;
3426 if (vp == NULL) {
3427 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428 goto out;
3429 }
3430
3431 error = makefh4(&cs->fh, vp, exi_public);
3432 if (error != 0) {
3433 *cs->statusp = resp->status = puterrno4(error);
3434 goto out;
3435 }
3436 sav_exi = cs->exi;
3437 if (exi_public == exi_root) {
3438 /*
3439 * No filesystem is actually shared public, so we default
3440 * to exi_root. In this case, we must check whether root
3441 * is exported.
3442 */
3443 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444
3445 /*
3446 * if root filesystem is exported, the exportinfo struct that we
3447 * should use is what checkexport4 returns, because root_exi is
3448 * actually a mostly empty struct.
3449 */
3450 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452 cs->exi = ((exi != NULL) ? exi : exi_public);
3453 } else {
3454 /*
3455 * it's a properly shared filesystem
3456 */
3457 cs->exi = exi_public;
3458 }
3459
3460 if (is_system_labeled()) {
3461 bslabel_t *clabel;
3462
3463 ASSERT(req->rq_label != NULL);
3464 clabel = req->rq_label;
3465 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466 "got client label from request(1)",
3467 struct svc_req *, req);
3468 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470 cs->exi)) {
3471 *cs->statusp = resp->status =
3472 NFS4ERR_SERVERFAULT;
3473 goto out;
3474 }
3475 }
3476 }
3477
3512 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3513 struct compound_state *cs)
3514 {
3515 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3516 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3517 nfs_fh4_fmt_t *fh_fmtp;
3518
3519 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3520 PUTFH4args *, args);
3521
3522 if (cs->vp) {
3523 VN_RELE(cs->vp);
3524 cs->vp = NULL;
3525 }
3526
3527 if (cs->cr) {
3528 crfree(cs->cr);
3529 cs->cr = NULL;
3530 }
3531
3532
3533 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3534 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3535 goto out;
3536 }
3537
3538 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3539 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3540 NULL);
3541
3542 if (cs->exi == NULL) {
3543 *cs->statusp = resp->status = NFS4ERR_STALE;
3544 goto out;
3545 }
3546
3547 cs->cr = crdup(cs->basecr);
3548
3549 ASSERT(cs->cr != NULL);
3550
3551 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3552 *cs->statusp = resp->status;
3579 struct exportinfo *exi, *sav_exi;
3580
3581 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582
3583 if (cs->vp) {
3584 VN_RELE(cs->vp);
3585 cs->vp = NULL;
3586 }
3587
3588 if (cs->cr)
3589 crfree(cs->cr);
3590
3591 cs->cr = crdup(cs->basecr);
3592
3593 /*
3594 * Using rootdir, the system root vnode,
3595 * get its fid.
3596 */
3597 bzero(&fid, sizeof (fid));
3598 fid.fid_len = MAXFIDSZ;
3599 error = vop_fid_pseudo(rootdir, &fid);
3600 if (error != 0) {
3601 *cs->statusp = resp->status = puterrno4(error);
3602 goto out;
3603 }
3604
3605 /*
3606 * Then use the root fsid & fid it to find out if it's exported
3607 *
3608 * If the server root isn't exported directly, then
3609 * it should at least be a pseudo export based on
3610 * one or more exports further down in the server's
3611 * file tree.
3612 */
3613 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615 NFS4_DEBUG(rfs4_debug,
3616 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618 goto out;
3619 }
3620
3621 /*
3622 * Now make a filehandle based on the root
3623 * export and root vnode.
3624 */
3625 error = makefh4(&cs->fh, rootdir, exi);
3626 if (error != 0) {
3627 *cs->statusp = resp->status = puterrno4(error);
3628 goto out;
3629 }
3630
3631 sav_exi = cs->exi;
3632 cs->exi = exi;
3633
3634 VN_HOLD(rootdir);
3635 cs->vp = rootdir;
3636
3637 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638 VN_RELE(rootdir);
3639 cs->vp = NULL;
3640 cs->exi = sav_exi;
3641 goto out;
3642 }
3643
3644 *cs->statusp = resp->status = NFS4_OK;
3645 cs->deleg = FALSE;
3646 out:
3647 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648 PUTROOTFH4res *, resp);
3649 }
3650
3651 /*
3652 * set_rdattr_params sets up the variables used to manage what information
3653 * to get for each directory entry.
3654 */
3655 static nfsstat4
3656 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3657 bitmap4 attrs, bool_t *need_to_lookup)
3658 {
4229 }
4230 goto out;
4231 }
4232 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4233
4234 /* Actually do the REMOVE operation */
4235 if (vp->v_type == VDIR) {
4236 /*
4237 * Can't remove a directory that has a mounted-on filesystem.
4238 */
4239 if (vn_ismntpt(vp)) {
4240 error = EACCES;
4241 } else {
4242 /*
4243 * System V defines rmdir to return EEXIST,
4244 * not ENOTEMPTY, if the directory is not
4245 * empty. A System V NFS server needs to map
4246 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4247 * transmit over the wire.
4248 */
4249 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4250 NULL, 0)) == EEXIST)
4251 error = ENOTEMPTY;
4252 }
4253 } else {
4254 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4255 fp != NULL) {
4256 struct vattr va;
4257 vnode_t *tvp;
4258
4259 rfs4_dbe_lock(fp->rf_dbe);
4260 tvp = fp->rf_vp;
4261 if (tvp)
4262 VN_HOLD(tvp);
4263 rfs4_dbe_unlock(fp->rf_dbe);
4264
4265 if (tvp) {
4266 /*
4267 * This is va_seq safe because we are not
4268 * manipulating dvp.
4269 */
4341 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4342 REMOVE4res *, resp);
4343 }
4344
4345 /*
4346 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4347 * oldname and newname.
4348 * res: status. If success - CURRENT_FH unchanged, return change_info
4349 * for both from and target directories.
4350 */
4351 /* ARGSUSED */
4352 static void
4353 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4354 struct compound_state *cs)
4355 {
4356 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4357 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4358 int error;
4359 vnode_t *odvp;
4360 vnode_t *ndvp;
4361 vnode_t *srcvp, *targvp;
4362 struct vattr obdva, oidva, oadva;
4363 struct vattr nbdva, nidva, nadva;
4364 char *onm, *nnm;
4365 uint_t olen, nlen;
4366 rfs4_file_t *fp, *sfp;
4367 int in_crit_src, in_crit_targ;
4368 int fp_rele_grant_hold, sfp_rele_grant_hold;
4369 bslabel_t *clabel;
4370 struct sockaddr *ca;
4371 char *converted_onm = NULL;
4372 char *converted_nnm = NULL;
4373 nfsstat4 status;
4374
4375 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4376 RENAME4args *, args);
4377
4378 fp = sfp = NULL;
4379 srcvp = targvp = NULL;
4380 in_crit_src = in_crit_targ = 0;
4381 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4382
4383 /* CURRENT_FH: target directory */
4384 ndvp = cs->vp;
4385 if (ndvp == NULL) {
4386 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4387 goto out;
4388 }
4389
4390 /* SAVED_FH: from directory */
4391 odvp = cs->saved_vp;
4392 if (odvp == NULL) {
4393 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4394 goto out;
4395 }
4396
4397 if (cs->access == CS_ACCESS_DENIED) {
4398 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4399 goto out;
4400 }
4401
4534 kmem_free(onm, olen);
4535 if (nnm != converted_nnm)
4536 kmem_free(converted_nnm, MAXPATHLEN + 1);
4537 kmem_free(nnm, nlen);
4538 goto out;
4539 }
4540
4541 sfp_rele_grant_hold = 1;
4542
4543 /* Does the destination exist and a file and have a delegation? */
4544 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4545 NULL, cs->cr)) {
4546 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4547 NULL)) {
4548 *cs->statusp = resp->status = NFS4ERR_DELAY;
4549 goto err_out;
4550 }
4551 }
4552 fp_rele_grant_hold = 1;
4553
4554
4555 /* Check for NBMAND lock on both source and target */
4556 if (nbl_need_check(srcvp)) {
4557 nbl_start_crit(srcvp, RW_READER);
4558 in_crit_src = 1;
4559 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4560 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4561 goto err_out;
4562 }
4563 }
4564
4565 if (targvp && nbl_need_check(targvp)) {
4566 nbl_start_crit(targvp, RW_READER);
4567 in_crit_targ = 1;
4568 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4569 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570 goto err_out;
4571 }
4572 }
4573
4574 /* Get source "before" change value */
4575 obdva.va_mask = AT_CTIME|AT_SEQ;
4576 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4577 if (!error) {
4578 nbdva.va_mask = AT_CTIME|AT_SEQ;
4579 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4580 }
4581 if (error) {
4582 *cs->statusp = resp->status = puterrno4(error);
4583 goto err_out;
4584 }
4585
4586 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4587 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4588
4589 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4590 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4591 struct vattr va;
4592 vnode_t *tvp;
4593
4594 rfs4_dbe_lock(fp->rf_dbe);
4595 tvp = fp->rf_vp;
4596 if (tvp)
4597 VN_HOLD(tvp);
4598 rfs4_dbe_unlock(fp->rf_dbe);
4599
4600 if (tvp) {
4601 va.va_mask = AT_NLINK;
4602 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4603 va.va_nlink == 0) {
4604 /* The file is gone and so should the state */
4605 if (in_crit_targ) {
4606 nbl_end_crit(targvp);
4607 in_crit_targ = 0;
4608 }
4609 rfs4_close_all_state(fp);
4610 }
4611 VN_RELE(tvp);
4612 }
4613 }
4614 if (error == 0)
4615 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4616
4617 if (in_crit_src)
4618 nbl_end_crit(srcvp);
4619 if (srcvp)
4620 VN_RELE(srcvp);
4621 if (in_crit_targ)
4622 nbl_end_crit(targvp);
4623 if (targvp)
4624 VN_RELE(targvp);
4625
4626 if (sfp) {
4627 rfs4_clear_dont_grant(sfp);
4628 rfs4_file_rele(sfp);
4629 }
4630 if (fp) {
4631 rfs4_clear_dont_grant(fp);
4632 rfs4_file_rele(fp);
4633 }
4634
4635 if (converted_onm != onm)
4636 kmem_free(converted_onm, MAXPATHLEN + 1);
4637 kmem_free(onm, olen);
4638 if (converted_nnm != nnm)
4639 kmem_free(converted_nnm, MAXPATHLEN + 1);
4640 kmem_free(nnm, nlen);
4641
4642 /*
4643 * Get the initial "after" sequence number, if it fails, set to zero
4644 */
4645 oidva.va_mask = AT_SEQ;
5542 static void
5543 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5544 struct compound_state *cs)
5545 {
5546 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5547 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5548 int error;
5549 vnode_t *vp;
5550 struct vattr bva;
5551 u_offset_t rlimit;
5552 struct uio uio;
5553 struct iovec iov[MAX_IOVECS];
5554 struct iovec *iovp;
5555 int iovcnt;
5556 int ioflag;
5557 cred_t *savecred, *cr;
5558 bool_t *deleg = &cs->deleg;
5559 nfsstat4 stat;
5560 int in_crit = 0;
5561 caller_context_t ct;
5562
5563 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5564 WRITE4args *, args);
5565
5566 vp = cs->vp;
5567 if (vp == NULL) {
5568 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5569 goto out;
5570 }
5571 if (cs->access == CS_ACCESS_DENIED) {
5572 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5573 goto out;
5574 }
5575
5576 cr = cs->cr;
5577
5578 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5579 deleg, TRUE, &ct)) != NFS4_OK) {
5580 *cs->statusp = resp->status = stat;
5581 goto out;
5612 goto out;
5613 }
5614
5615 if (vp->v_type != VREG) {
5616 *cs->statusp = resp->status =
5617 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5618 goto out;
5619 }
5620
5621 if (crgetuid(cr) != bva.va_uid &&
5622 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5623 *cs->statusp = resp->status = puterrno4(error);
5624 goto out;
5625 }
5626
5627 if (MANDLOCK(vp, bva.va_mode)) {
5628 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5629 goto out;
5630 }
5631
5632 if (args->data_len == 0) {
5633 *cs->statusp = resp->status = NFS4_OK;
5634 resp->count = 0;
5635 resp->committed = args->stable;
5636 resp->writeverf = Write4verf;
5637 goto out;
5638 }
5639
5640 if (args->mblk != NULL) {
5641 mblk_t *m;
5642 uint_t bytes, round_len;
5643
5644 iovcnt = 0;
5645 bytes = 0;
5646 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5647 for (m = args->mblk;
5648 m != NULL && bytes < round_len;
5649 m = m->b_cont) {
5650 iovcnt++;
5651 bytes += MBLKL(m);
5652 }
5653 #ifdef DEBUG
5654 /* should have ended on an mblk boundary */
5655 if (bytes != round_len) {
5656 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5712 curthread->t_cred = cr;
5713 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5714 curthread->t_cred = savecred;
5715
5716 if (iovp != iov)
5717 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5718
5719 if (error) {
5720 *cs->statusp = resp->status = puterrno4(error);
5721 goto out;
5722 }
5723
5724 *cs->statusp = resp->status = NFS4_OK;
5725 resp->count = args->data_len - uio.uio_resid;
5726
5727 if (ioflag == 0)
5728 resp->committed = UNSTABLE4;
5729 else
5730 resp->committed = FILE_SYNC4;
5731
5732 resp->writeverf = Write4verf;
5733
5734 out:
5735 if (in_crit)
5736 nbl_end_crit(vp);
5737
5738 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5739 WRITE4res *, resp);
5740 }
5741
5742
5743 /* XXX put in a header file */
5744 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5745
5746 void
5747 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5748 struct svc_req *req, cred_t *cr, int *rv)
5749 {
5750 uint_t i;
5751 struct compound_state cs;
5752
5753 if (rv != NULL)
5754 *rv = 0;
5755 rfs4_init_compound_state(&cs);
5756 /*
5757 * Form a reply tag by copying over the reqeuest tag.
5758 */
5759 resp->tag.utf8string_val =
5760 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5761 resp->tag.utf8string_len = args->tag.utf8string_len;
5762 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5763 resp->tag.utf8string_len);
5764
5765 cs.statusp = &resp->status;
5766 cs.req = req;
5767 resp->array = NULL;
5768 resp->array_len = 0;
5769
5770 /*
5771 * XXX for now, minorversion should be zero
5789
5790 cr = crget();
5791 ASSERT(cr != NULL);
5792
5793 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5794 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5795 &cs, COMPOUND4args *, args);
5796 crfree(cr);
5797 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5798 &cs, COMPOUND4res *, resp);
5799 svcerr_badcred(req->rq_xprt);
5800 if (rv != NULL)
5801 *rv = 1;
5802 return;
5803 }
5804 resp->array_len = args->array_len;
5805 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5806 KM_SLEEP);
5807
5808 cs.basecr = cr;
5809
5810 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5811 COMPOUND4args *, args);
5812
5813 /*
5814 * For now, NFS4 compound processing must be protected by
5815 * exported_lock because it can access more than one exportinfo
5816 * per compound and share/unshare can now change multiple
5817 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5818 * per proc (excluding public exinfo), and exi_count design
5819 * is sufficient to protect concurrent execution of NFS2/3
5820 * ops along with unexport. This lock will be removed as
5821 * part of the NFSv4 phase 2 namespace redesign work.
5822 */
5823 rw_enter(&exported_lock, RW_READER);
5824
5825 /*
5826 * If this is the first compound we've seen, we need to start all
5827 * new instances' grace periods.
5828 */
5829 if (rfs4_seen_first_compound == 0) {
5830 rfs4_grace_start_new();
5831 /*
5832 * This must be set after rfs4_grace_start_new(), otherwise
5833 * another thread could proceed past here before the former
5834 * is finished.
5835 */
5836 rfs4_seen_first_compound = 1;
5837 }
5838
5839 for (i = 0; i < args->array_len && cs.cont; i++) {
5840 nfs_argop4 *argop;
5841 nfs_resop4 *resop;
5842 uint_t op;
5843
5844 argop = &args->array[i];
5845 resop = &resp->array[i];
5846 resop->resop = argop->argop;
5847 op = (uint_t)resop->resop;
5848
5849 if (op < rfsv4disp_cnt) {
5850 /*
5851 * Count the individual ops here; NULL and COMPOUND
5852 * are counted in common_dispatch()
5853 */
5854 rfsproccnt_v4_ptr[op].value.ui64++;
5855
5856 NFS4_DEBUG(rfs4_debug > 1,
5857 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5858 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5859 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5860 rfs4_op_string[op], *cs.statusp));
5861 if (*cs.statusp != NFS4_OK)
5862 cs.cont = FALSE;
5863 } else {
5864 /*
5865 * This is effectively dead code since XDR code
5866 * will have already returned BADXDR if op doesn't
5867 * decode to legal value. This only done for a
5868 * day when XDR code doesn't verify v4 opcodes.
5869 */
5870 op = OP_ILLEGAL;
5871 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5872
5873 rfs4_op_illegal(argop, resop, req, &cs);
5874 cs.cont = FALSE;
5875 }
5876
5877 /*
5878 * If not at last op, and if we are to stop, then
5879 * compact the results array.
5880 */
5881 if ((i + 1) < args->array_len && !cs.cont) {
5882 nfs_resop4 *new_res = kmem_alloc(
5883 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5884 bcopy(resp->array,
5885 new_res, (i+1) * sizeof (nfs_resop4));
5886 kmem_free(resp->array,
5887 args->array_len * sizeof (nfs_resop4));
5888
5889 resp->array_len = i + 1;
5890 resp->array = new_res;
5891 }
5892 }
5893
5894 rw_exit(&exported_lock);
5895
5896 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5897 COMPOUND4res *, resp);
5898
5899 if (cs.vp)
5900 VN_RELE(cs.vp);
5901 if (cs.saved_vp)
5902 VN_RELE(cs.saved_vp);
5903 if (cs.saved_fh.nfs_fh4_val)
5904 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5905
5906 if (cs.basecr)
5907 crfree(cs.basecr);
5908 if (cs.cr)
5909 crfree(cs.cr);
5910 /*
5911 * done with this compound request, free the label
5912 */
5913
5914 if (req->rq_label != NULL) {
5915 kmem_free(req->rq_label, sizeof (bslabel_t));
5916 req->rq_label = NULL;
5917 }
5918 }
5919
5920 /*
5921 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5922 * XXX zero out the tag and array values. Need to investigate why the
5952 */
5953 void
5954 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5955 {
5956 int i;
5957 int flag = RPC_ALL;
5958
5959 for (i = 0; flag && i < args->array_len; i++) {
5960 uint_t op;
5961
5962 op = (uint_t)args->array[i].argop;
5963
5964 if (op < rfsv4disp_cnt)
5965 flag &= rfsv4disptab[op].dis_flags;
5966 else
5967 flag = 0;
5968 }
5969 *flagp = flag;
5970 }
5971
5972 nfsstat4
5973 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5974 {
5975 nfsstat4 e;
5976
5977 rfs4_dbe_lock(cp->rc_dbe);
5978
5979 if (cp->rc_sysidt != LM_NOSYSID) {
5980 *sp = cp->rc_sysidt;
5981 e = NFS4_OK;
5982
5983 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5984 *sp = cp->rc_sysidt;
5985 e = NFS4_OK;
5986
5987 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5988 "rfs4_client_sysid: allocated 0x%x\n", *sp));
5989 } else
5990 e = NFS4ERR_DELAY;
5991
6586
6587 /* Check for mandatory locking and that the size gets set. */
6588 cva.va_mask = AT_MODE;
6589 if (setsize)
6590 cva.va_mask |= AT_SIZE;
6591
6592 /* Assume the worst */
6593 cs->mandlock = TRUE;
6594
6595 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6596 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6597
6598 /*
6599 * Truncate the file if necessary; this would be
6600 * the case for create over an existing file.
6601 */
6602
6603 if (trunc) {
6604 int in_crit = 0;
6605 rfs4_file_t *fp;
6606 bool_t create = FALSE;
6607
6608 /*
6609 * We are writing over an existing file.
6610 * Check to see if we need to recall a delegation.
6611 */
6612 rfs4_hold_deleg_policy();
6613 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6614 if (rfs4_check_delegated_byfp(FWRITE, fp,
6615 (reqsize == 0), FALSE, FALSE, &clientid)) {
6616 rfs4_file_rele(fp);
6617 rfs4_rele_deleg_policy();
6618 VN_RELE(vp);
6619 *attrset = 0;
6620 return (NFS4ERR_DELAY);
6621 }
6622 rfs4_file_rele(fp);
6623 }
6624 rfs4_rele_deleg_policy();
6625
6626 if (nbl_need_check(vp)) {
6627 in_crit = 1;
6628
6629 ASSERT(reqsize == 0);
6630
6631 nbl_start_crit(vp, RW_READER);
6632 if (nbl_conflict(vp, NBL_WRITE, 0,
6633 cva.va_size, 0, NULL)) {
6634 in_crit = 0;
6635 nbl_end_crit(vp);
6636 VN_RELE(vp);
6637 *attrset = 0;
6638 return (NFS4ERR_ACCESS);
6639 }
6640 }
6641 ct.cc_sysid = 0;
6642 ct.cc_pid = 0;
6643 ct.cc_caller_id = nfs4_srv_caller_id;
6644 ct.cc_flags = CC_DONTBLOCK;
8162
8163 newcp->rc_cp_confirmed = cp_confirmed;
8164
8165 rfs4_client_rele(newcp);
8166
8167 out:
8168 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8169 SETCLIENTID4res *, res);
8170 }
8171
8172 /*ARGSUSED*/
8173 void
8174 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8175 struct svc_req *req, struct compound_state *cs)
8176 {
8177 SETCLIENTID_CONFIRM4args *args =
8178 &argop->nfs_argop4_u.opsetclientid_confirm;
8179 SETCLIENTID_CONFIRM4res *res =
8180 &resop->nfs_resop4_u.opsetclientid_confirm;
8181 rfs4_client_t *cp, *cptoclose = NULL;
8182
8183 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8184 struct compound_state *, cs,
8185 SETCLIENTID_CONFIRM4args *, args);
8186
8187 *cs->statusp = res->status = NFS4_OK;
8188
8189 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8190
8191 if (cp == NULL) {
8192 *cs->statusp = res->status =
8193 rfs4_check_clientid(&args->clientid, 1);
8194 goto out;
8195 }
8196
8197 if (!creds_ok(cp, req, cs)) {
8198 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8199 rfs4_client_rele(cp);
8200 goto out;
8201 }
8202
8203 /* If the verifier doesn't match, the record doesn't match */
8204 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8205 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8206 rfs4_client_rele(cp);
8207 goto out;
8208 }
8209
8210 rfs4_dbe_lock(cp->rc_dbe);
8211 cp->rc_need_confirm = FALSE;
8212 if (cp->rc_cp_confirmed) {
8213 cptoclose = cp->rc_cp_confirmed;
8214 cptoclose->rc_ss_remove = 1;
8215 cp->rc_cp_confirmed = NULL;
8216 }
8217
8218 /*
8219 * Update the client's associated server instance, if it's changed
8220 * since the client was created.
8221 */
8222 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8223 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8224
8225 /*
8226 * Record clientid in stable storage.
8227 * Must be done after server instance has been assigned.
8228 */
8229 rfs4_ss_clid(cp);
8230
8231 rfs4_dbe_unlock(cp->rc_dbe);
8232
8233 if (cptoclose)
8234 /* don't need to rele, client_close does it */
8235 rfs4_client_close(cptoclose);
8236
8237 /* If needed, initiate CB_NULL call for callback path */
8238 rfs4_deleg_cb_check(cp);
8239 rfs4_update_lease(cp);
8240
8241 /*
8242 * Check to see if client can perform reclaims
8243 */
8244 rfs4_ss_chkclid(cp);
8245
8246 rfs4_client_rele(cp);
8247
8248 out:
8249 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8250 struct compound_state *, cs,
8251 SETCLIENTID_CONFIRM4 *, res);
8252 }
8253
8254
8255 /*ARGSUSED*/
8256 void
8257 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8258 struct svc_req *req, struct compound_state *cs)
8259 {
8260 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8261 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8262 rfs4_state_t *sp;
8263 nfsstat4 status;
8264
9868 /*
9869 * Check to see if we have a downrev Solaris client, so that we
9870 * can send it a symlink instead of a referral.
9871 */
9872 int
9873 client_is_downrev(struct svc_req *req)
9874 {
9875 struct sockaddr *ca;
9876 rfs4_clntip_t *ci;
9877 bool_t create = FALSE;
9878 int is_downrev;
9879
9880 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9881 ASSERT(ca);
9882 ci = rfs4_find_clntip(ca, &create);
9883 if (ci == NULL)
9884 return (0);
9885 is_downrev = ci->ri_no_referrals;
9886 rfs4_dbe_rele(ci->ri_dbe);
9887 return (is_downrev);
9888 }
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright 2019 Nexenta Systems, Inc.
33 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
34 */
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/cred.h>
40 #include <sys/buf.h>
41 #include <sys/vfs.h>
42 #include <sys/vfs_opreg.h>
43 #include <sys/vnode.h>
44 #include <sys/uio.h>
45 #include <sys/errno.h>
46 #include <sys/sysmacros.h>
47 #include <sys/statvfs.h>
48 #include <sys/kmem.h>
49 #include <sys/dirent.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/systeminfo.h>
53 #include <sys/flock.h>
54 #include <sys/pathname.h>
55 #include <sys/nbmlock.h>
56 #include <sys/share.h>
57 #include <sys/atomic.h>
58 #include <sys/policy.h>
59 #include <sys/fem.h>
60 #include <sys/sdt.h>
61 #include <sys/ddi.h>
62 #include <sys/zone.h>
63 #include <sys/kstat.h>
64
65 #include <fs/fs_reparse.h>
66
67 #include <rpc/types.h>
68 #include <rpc/auth.h>
69 #include <rpc/rpcsec_gss.h>
70 #include <rpc/svc.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfssys.h>
74 #include <nfs/export.h>
75 #include <nfs/nfs_cmd.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_drc.h>
79
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82
83 #include <inet/common.h>
84 #include <inet/ip.h>
85 #include <inet/ip6.h>
86
87 #include <sys/tsol/label.h>
88 #include <sys/tsol/tndb.h>
89
90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 extern struct svc_ops rdma_svc_ops;
95 extern int nfs_loaned_buffers;
96 /* End of Tunables */
97
98 static int rdma_setup_read_data4(READ4args *, READ4res *);
136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 #define RFS4_MINLEN_RDDIR_BUF \
138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139
140 /*
141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 * but the dirents UFS gives us are already padded to 8, so just take
143 * what we're given. Dircount is only a hint anyway. Currently the
144 * solaris kernel is ASCII only, so there's no point in calling the
145 * UTF8 functions.
146 *
147 * dirent64: named padded to provide 8 byte struct alignment
148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 *
150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 *
152 */
153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155
156 zone_key_t rfs4_zone_key;
157
158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159
160 u_longlong_t nfs4_srv_caller_id;
161 uint_t nfs4_srv_vkey = 0;
162
163 void rfs4_init_compound_state(struct compound_state *);
164
165 static void nullfree(caddr_t);
166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 struct compound_state *);
168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 struct compound_state *);
172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 struct compound_state *);
174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 struct compound_state *);
176 static void rfs4_op_create_free(nfs_resop4 *resop);
177 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
178 struct svc_req *, struct compound_state *);
179 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
180 struct svc_req *, struct compound_state *);
181 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
231 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 struct compound_state *);
233 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 struct compound_state *);
235 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 struct compound_state *);
237 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 struct compound_state *);
239 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 struct compound_state *);
241 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 struct compound_state *);
243 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
244 struct svc_req *, struct compound_state *);
245 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
246 struct svc_req *req, struct compound_state *);
247 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
248 struct compound_state *);
249 static void rfs4_op_secinfo_free(nfs_resop4 *);
250
251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
252 struct svc_req *);
253 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
254 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
255
256
257 /*
258 * translation table for attrs
259 */
260 struct nfs4_ntov_table {
261 union nfs4_attr_u *na;
262 uint8_t amap[NFS4_MAXNUM_ATTRS];
263 int attrcnt;
264 bool_t vfsstat;
265 };
266
267 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
268 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
269 struct nfs4_svgetit_arg *sargp);
270
271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
272 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
273 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
274
275 static void hanfsv4_failover(nfs4_srv_t *);
276
277 fem_t *deleg_rdops;
278 fem_t *deleg_wrops;
279
280 /*
281 * NFS4 op dispatch table
282 */
283
284 struct rfsv4disp {
285 void (*dis_proc)(); /* proc to call */
286 void (*dis_resfree)(); /* frees space allocated by proc */
287 int dis_flags; /* RPC_IDEMPOTENT, etc... */
288 int op_type; /* operation type, see below */
289 };
290
291 /*
292 * operation types; used primarily for the per-exportinfo kstat implementation
293 */
294 #define NFS4_OP_NOFH 0 /* The operation does not operate with any */
295 /* particular filehandle; we cannot associate */
296 /* it with any exportinfo. */
297
298 #define NFS4_OP_CFH 1 /* The operation works with the current */
299 /* filehandle; we associate the operation */
300 /* with the exportinfo related to the current */
301 /* filehandle (as set before the operation is */
302 /* executed). */
303
304 #define NFS4_OP_SFH 2 /* The operation works with the saved */
305 /* filehandle; we associate the operation */
306 /* with the exportinfo related to the saved */
307 /* filehandle (as set before the operation is */
308 /* executed). */
309
310 #define NFS4_OP_POSTCFH 3 /* The operation ignores the current */
311 /* filehandle, but sets the new current */
312 /* filehandle instead; we associate the */
313 /* operation with the exportinfo related to */
314 /* the current filehandle as set after the */
315 /* operation is successfuly executed. Since */
316 /* we do not know the particular exportinfo */
317 /* (and thus the kstat) before the operation */
318 /* is done, there is no simple way how to */
319 /* update some I/O kstat statistics related */
320 /* to kstat_queue(9F). */
321
322 static struct rfsv4disp rfsv4disptab[] = {
323 /*
324 * NFS VERSION 4
325 */
326
327 /* RFS_NULL = 0 */
328 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
329
330 /* UNUSED = 1 */
331 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
332
333 /* UNUSED = 2 */
334 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
335
336 /* OP_ACCESS = 3 */
337 {rfs4_op_access, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
338
339 /* OP_CLOSE = 4 */
340 {rfs4_op_close, nullfree, 0, NFS4_OP_CFH},
341
342 /* OP_COMMIT = 5 */
343 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
344
345 /* OP_CREATE = 6 */
346 {rfs4_op_create, nullfree, 0, NFS4_OP_CFH},
347
348 /* OP_DELEGPURGE = 7 */
349 {rfs4_op_delegpurge, nullfree, 0, NFS4_OP_NOFH},
350
351 /* OP_DELEGRETURN = 8 */
352 {rfs4_op_delegreturn, nullfree, 0, NFS4_OP_CFH},
353
354 /* OP_GETATTR = 9 */
355 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
356
357 /* OP_GETFH = 10 */
358 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL, NFS4_OP_CFH},
359
360 /* OP_LINK = 11 */
361 {rfs4_op_link, nullfree, 0, NFS4_OP_CFH},
362
363 /* OP_LOCK = 12 */
364 {rfs4_op_lock, lock_denied_free, 0, NFS4_OP_CFH},
365
366 /* OP_LOCKT = 13 */
367 {rfs4_op_lockt, lock_denied_free, 0, NFS4_OP_CFH},
368
369 /* OP_LOCKU = 14 */
370 {rfs4_op_locku, nullfree, 0, NFS4_OP_CFH},
371
372 /* OP_LOOKUP = 15 */
373 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
374 NFS4_OP_CFH},
375
376 /* OP_LOOKUPP = 16 */
377 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
378 NFS4_OP_CFH},
379
380 /* OP_NVERIFY = 17 */
381 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
382
383 /* OP_OPEN = 18 */
384 {rfs4_op_open, rfs4_free_reply, 0, NFS4_OP_CFH},
385
386 /* OP_OPENATTR = 19 */
387 {rfs4_op_openattr, nullfree, 0, NFS4_OP_CFH},
388
389 /* OP_OPEN_CONFIRM = 20 */
390 {rfs4_op_open_confirm, nullfree, 0, NFS4_OP_CFH},
391
392 /* OP_OPEN_DOWNGRADE = 21 */
393 {rfs4_op_open_downgrade, nullfree, 0, NFS4_OP_CFH},
394
395 /* OP_OPEN_PUTFH = 22 */
396 {rfs4_op_putfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
397
398 /* OP_PUTPUBFH = 23 */
399 {rfs4_op_putpubfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
400
401 /* OP_PUTROOTFH = 24 */
402 {rfs4_op_putrootfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
403
404 /* OP_READ = 25 */
405 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
406
407 /* OP_READDIR = 26 */
408 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
409
410 /* OP_READLINK = 27 */
411 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
412
413 /* OP_REMOVE = 28 */
414 {rfs4_op_remove, nullfree, 0, NFS4_OP_CFH},
415
416 /* OP_RENAME = 29 */
417 {rfs4_op_rename, nullfree, 0, NFS4_OP_CFH},
418
419 /* OP_RENEW = 30 */
420 {rfs4_op_renew, nullfree, 0, NFS4_OP_NOFH},
421
422 /* OP_RESTOREFH = 31 */
423 {rfs4_op_restorefh, nullfree, RPC_ALL, NFS4_OP_SFH},
424
425 /* OP_SAVEFH = 32 */
426 {rfs4_op_savefh, nullfree, RPC_ALL, NFS4_OP_CFH},
427
428 /* OP_SECINFO = 33 */
429 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0, NFS4_OP_CFH},
430
431 /* OP_SETATTR = 34 */
432 {rfs4_op_setattr, nullfree, 0, NFS4_OP_CFH},
433
434 /* OP_SETCLIENTID = 35 */
435 {rfs4_op_setclientid, nullfree, 0, NFS4_OP_NOFH},
436
437 /* OP_SETCLIENTID_CONFIRM = 36 */
438 {rfs4_op_setclientid_confirm, nullfree, 0, NFS4_OP_NOFH},
439
440 /* OP_VERIFY = 37 */
441 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
442
443 /* OP_WRITE = 38 */
444 {rfs4_op_write, nullfree, 0, NFS4_OP_CFH},
445
446 /* OP_RELEASE_LOCKOWNER = 39 */
447 {rfs4_op_release_lockowner, nullfree, 0, NFS4_OP_NOFH},
448 };
449
450 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
451
452 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
453
454 #ifdef DEBUG
455
456 int rfs4_fillone_debug = 0;
457 int rfs4_no_stub_access = 1;
458 int rfs4_rddir_debug = 0;
459
460 static char *rfs4_op_string[] = {
461 "rfs4_op_null",
462 "rfs4_op_1 unused",
463 "rfs4_op_2 unused",
464 "rfs4_op_access",
465 "rfs4_op_close",
466 "rfs4_op_commit",
467 "rfs4_op_create",
485 "rfs4_op_putrootfh",
486 "rfs4_op_read",
487 "rfs4_op_readdir",
488 "rfs4_op_readlink",
489 "rfs4_op_remove",
490 "rfs4_op_rename",
491 "rfs4_op_renew",
492 "rfs4_op_restorefh",
493 "rfs4_op_savefh",
494 "rfs4_op_secinfo",
495 "rfs4_op_setattr",
496 "rfs4_op_setclientid",
497 "rfs4_op_setclient_confirm",
498 "rfs4_op_verify",
499 "rfs4_op_write",
500 "rfs4_op_release_lockowner",
501 "rfs4_op_illegal"
502 };
503 #endif
504
505 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
506
507 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
508
509 extern void rfs4_free_fs_locations4(fs_locations4 *);
510
511 #ifdef nextdp
512 #undef nextdp
513 #endif
514 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
515
516 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
517 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
518 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
519 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
520 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
521 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
522 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
523 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
524 NULL, NULL
525 };
526 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
527 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
528 VOPNAME_READ, { .femop_read = deleg_wr_read },
529 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
530 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
531 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
532 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
533 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
534 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
535 NULL, NULL
536 };
537
538 /* ARGSUSED */
539 static void *
540 rfs4_zone_init(zoneid_t zoneid)
541 {
542 nfs4_srv_t *nsrv4;
543 timespec32_t verf;
544
545 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
546
547 /*
548 * The following algorithm attempts to find a unique verifier
549 * to be used as the write verifier returned from the server
550 * to the client. It is important that this verifier change
551 * whenever the server reboots. Of secondary importance, it
552 * is important for the verifier to be unique between two
553 * different servers.
554 *
555 * Thus, an attempt is made to use the system hostid and the
556 * current time in seconds when the nfssrv kernel module is
557 * loaded. It is assumed that an NFS server will not be able
558 * to boot and then to reboot in less than a second. If the
559 * hostid has not been set, then the current high resolution
560 * time is used. This will ensure different verifiers each
561 * time the server reboots and minimize the chances that two
562 * different servers will have the same verifier.
563 * XXX - this is broken on LP64 kernels.
564 */
565 verf.tv_sec = (time_t)zone_get_hostid(NULL);
566 if (verf.tv_sec != 0) {
567 verf.tv_nsec = gethrestime_sec();
568 } else {
569 timespec_t tverf;
570
571 gethrestime(&tverf);
572 verf.tv_sec = (time_t)tverf.tv_sec;
573 verf.tv_nsec = tverf.tv_nsec;
574 }
575 nsrv4->write4verf = *(uint64_t *)&verf;
576
577 /* Used to manage create/destroy of server state */
578 nsrv4->nfs4_server_state = NULL;
579 nsrv4->nfs4_cur_servinst = NULL;
580 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
581 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
582 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
583 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
584 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
585
586 return (nsrv4);
587 }
588
589 /* ARGSUSED */
590 static void
591 rfs4_zone_fini(zoneid_t zoneid, void *data)
592 {
593 nfs4_srv_t *nsrv4 = data;
594
595 mutex_destroy(&nsrv4->deleg_lock);
596 mutex_destroy(&nsrv4->state_lock);
597 mutex_destroy(&nsrv4->servinst_lock);
598 rw_destroy(&nsrv4->deleg_policy_lock);
599
600 kmem_free(nsrv4, sizeof (*nsrv4));
601 }
602
603 void
604 rfs4_srvrinit(void)
605 {
606 extern void rfs4_attr_init();
607
608 zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
609
610 rfs4_attr_init();
611
612
613 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
614 rfs4_disable_delegation();
615 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
616 &deleg_wrops) != 0) {
617 rfs4_disable_delegation();
618 fem_free(deleg_rdops);
619 }
620
621 nfs4_srv_caller_id = fs_new_caller_id();
622 lockt_sysid = lm_alloc_sysidt();
623 vsd_create(&nfs4_srv_vkey, NULL);
624 rfs4_state_g_init();
625 }
626
627 void
628 rfs4_srvrfini(void)
629 {
630 if (lockt_sysid != LM_NOSYSID) {
631 lm_free_sysidt(lockt_sysid);
632 lockt_sysid = LM_NOSYSID;
633 }
634
635 rfs4_state_g_fini();
636
637 fem_free(deleg_rdops);
638 fem_free(deleg_wrops);
639
640 (void) zone_key_delete(rfs4_zone_key);
641 }
642
643 void
644 rfs4_do_server_start(int server_upordown,
645 int srv_delegation, int cluster_booted)
646 {
647 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
648
649 /* Is this a warm start? */
650 if (server_upordown == NFS_SERVER_QUIESCED) {
651 cmn_err(CE_NOTE, "nfs4_srv: "
652 "server was previously quiesced; "
653 "existing NFSv4 state will be re-used");
654
655 /*
656 * HA-NFSv4: this is also the signal
657 * that a Resource Group failover has
658 * occurred.
659 */
660 if (cluster_booted)
661 hanfsv4_failover(nsrv4);
662 } else {
663 /* Cold start */
664 nsrv4->rfs4_start_time = 0;
665 rfs4_state_zone_init(nsrv4);
666 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
667 nfs4_drc_hash);
668 }
669
670 /* Check if delegation is to be enabled */
671 if (srv_delegation != FALSE)
672 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
673 }
674
675 void
676 rfs4_init_compound_state(struct compound_state *cs)
677 {
678 bzero(cs, sizeof (*cs));
679 cs->cont = TRUE;
680 cs->access = CS_ACCESS_DENIED;
681 cs->deleg = FALSE;
682 cs->mandlock = FALSE;
683 cs->fh.nfs_fh4_val = cs->fhbuf;
684 cs->statusp = NULL;
685 }
686
687 void
688 rfs4_grace_start(rfs4_servinst_t *sip)
689 {
690 rw_enter(&sip->rwlock, RW_WRITER);
691 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
692 sip->grace_period = rfs4_grace_period;
693 rw_exit(&sip->rwlock);
694 }
695
696 /*
697 * returns true if the instance's grace period has never been started
698 */
699 int
700 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
701 {
702 time_t start_time;
703
704 rw_enter(&sip->rwlock, RW_READER);
719
720 rw_enter(&sip->rwlock, RW_READER);
721 grace_expiry = sip->start_time + sip->grace_period;
722 rw_exit(&sip->rwlock);
723
724 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
725 }
726
727 int
728 rfs4_clnt_in_grace(rfs4_client_t *cp)
729 {
730 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
731
732 return (rfs4_servinst_in_grace(cp->rc_server_instance));
733 }
734
735 /*
736 * reset all currently active grace periods
737 */
738 void
739 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
740 {
741 rfs4_servinst_t *sip;
742
743 mutex_enter(&nsrv4->servinst_lock);
744 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
745 if (rfs4_servinst_in_grace(sip))
746 rfs4_grace_start(sip);
747 mutex_exit(&nsrv4->servinst_lock);
748 }
749
750 /*
751 * start any new instances' grace periods
752 */
753 void
754 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
755 {
756 rfs4_servinst_t *sip;
757
758 mutex_enter(&nsrv4->servinst_lock);
759 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
760 if (rfs4_servinst_grace_new(sip))
761 rfs4_grace_start(sip);
762 mutex_exit(&nsrv4->servinst_lock);
763 }
764
765 static rfs4_dss_path_t *
766 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
767 char *path, unsigned index)
768 {
769 size_t len;
770 rfs4_dss_path_t *dss_path;
771
772 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
773
774 /*
775 * Take a copy of the string, since the original may be overwritten.
776 * Sadly, no strdup() in the kernel.
777 */
778 /* allow for NUL */
779 len = strlen(path) + 1;
780 dss_path->path = kmem_alloc(len, KM_SLEEP);
781 (void) strlcpy(dss_path->path, path, len);
782
783 /* associate with servinst */
784 dss_path->sip = sip;
785 dss_path->index = index;
786
787 /*
788 * Add to list of served paths.
789 * No locking required, as we're only ever called at startup.
790 */
791 if (nsrv4->dss_pathlist == NULL) {
792 /* this is the first dss_path_t */
793
794 /* needed for insque/remque */
795 dss_path->next = dss_path->prev = dss_path;
796
797 nsrv4->dss_pathlist = dss_path;
798 } else {
799 insque(dss_path, nsrv4->dss_pathlist);
800 }
801
802 return (dss_path);
803 }
804
805 /*
806 * Create a new server instance, and make it the currently active instance.
807 * Note that starting the grace period too early will reduce the clients'
808 * recovery window.
809 */
810 void
811 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
812 int dss_npaths, char **dss_paths)
813 {
814 unsigned i;
815 rfs4_servinst_t *sip;
816 rfs4_oldstate_t *oldstate;
817
818 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
819 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
820
821 sip->start_time = (time_t)0;
822 sip->grace_period = (time_t)0;
823 sip->next = NULL;
824 sip->prev = NULL;
825
826 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
827 /*
828 * This initial dummy entry is required to setup for insque/remque.
829 * It must be skipped over whenever the list is traversed.
830 */
831 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
832 /* insque/remque require initial list entry to be self-terminated */
833 oldstate->next = oldstate;
834 oldstate->prev = oldstate;
835 sip->oldstate = oldstate;
836
837
838 sip->dss_npaths = dss_npaths;
839 sip->dss_paths = kmem_alloc(dss_npaths *
840 sizeof (rfs4_dss_path_t *), KM_SLEEP);
841
842 for (i = 0; i < dss_npaths; i++) {
843 /* CSTYLED */
844 sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
845 }
846
847 mutex_enter(&nsrv4->servinst_lock);
848 if (nsrv4->nfs4_cur_servinst != NULL) {
849 /* add to linked list */
850 sip->prev = nsrv4->nfs4_cur_servinst;
851 nsrv4->nfs4_cur_servinst->next = sip;
852 }
853 if (start_grace)
854 rfs4_grace_start(sip);
855 /* make the new instance "current" */
856 nsrv4->nfs4_cur_servinst = sip;
857
858 mutex_exit(&nsrv4->servinst_lock);
859 }
860
861 /*
862 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
863 * all instances directly.
864 */
865 void
866 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
867 {
868 rfs4_servinst_t *sip, *prev, *current;
869 #ifdef DEBUG
870 int n = 0;
871 #endif
872
873 mutex_enter(&nsrv4->servinst_lock);
874 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
875 current = nsrv4->nfs4_cur_servinst;
876 nsrv4->nfs4_cur_servinst = NULL;
877 for (sip = current; sip != NULL; sip = prev) {
878 prev = sip->prev;
879 rw_destroy(&sip->rwlock);
880 if (sip->oldstate)
881 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
882 if (sip->dss_paths)
883 kmem_free(sip->dss_paths,
884 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
885 kmem_free(sip, sizeof (rfs4_servinst_t));
886 #ifdef DEBUG
887 n++;
888 #endif
889 }
890 mutex_exit(&nsrv4->servinst_lock);
891 }
892
893 /*
894 * Assign the current server instance to a client_t.
895 * Should be called with cp->rc_dbe held.
896 */
897 void
898 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
899 rfs4_servinst_t *sip)
900 {
901 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
902
903 /*
904 * The lock ensures that if the current instance is in the process
905 * of changing, we will see the new one.
906 */
907 mutex_enter(&nsrv4->servinst_lock);
908 cp->rc_server_instance = sip;
909 mutex_exit(&nsrv4->servinst_lock);
910 }
911
912 rfs4_servinst_t *
913 rfs4_servinst(rfs4_client_t *cp)
914 {
915 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
916
917 return (cp->rc_server_instance);
918 }
919
920 /* ARGSUSED */
921 static void
922 nullfree(caddr_t resop)
923 {
924 }
925
926 /*
927 * This is a fall-through for invalid or not implemented (yet) ops
928 */
929 /* ARGSUSED */
950 }
951
952 /*
953 * Used by rfs4_op_secinfo to get the security information from the
954 * export structure associated with the component.
955 */
956 /* ARGSUSED */
957 static nfsstat4
958 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
959 {
960 int error, different_export = 0;
961 vnode_t *dvp, *vp;
962 struct exportinfo *exi = NULL;
963 fid_t fid;
964 uint_t count, i;
965 secinfo4 *resok_val;
966 struct secinfo *secp;
967 seconfig_t *si;
968 bool_t did_traverse = FALSE;
969 int dotdot, walk;
970 nfs_export_t *ne = nfs_get_export();
971
972 dvp = cs->vp;
973 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
974
975 /*
976 * If dotdotting, then need to check whether it's above the
977 * root of a filesystem, or above an export point.
978 */
979 if (dotdot) {
980
981 /*
982 * If dotdotting at the root of a filesystem, then
983 * need to traverse back to the mounted-on filesystem
984 * and do the dotdot lookup there.
985 */
986 if (cs->vp->v_flag & VROOT) {
987
988 /*
989 * If at the system root, then can
990 * go up no further.
991 */
992 if (VN_CMP(dvp, ZONE_ROOTVP()))
993 return (puterrno4(ENOENT));
994
995 /*
996 * Traverse back to the mounted-on filesystem
997 */
998 dvp = untraverse(cs->vp);
999
1000 /*
1001 * Set the different_export flag so we remember
1002 * to pick up a new exportinfo entry for
1003 * this new filesystem.
1004 */
1005 different_export = 1;
1006 } else {
1007
1008 /*
1009 * If dotdotting above an export point then set
1010 * the different_export to get new export info.
1011 */
1012 different_export = nfs_exported(cs->exi, cs->vp);
1089 exi = cs->exi;
1090 } else {
1091 VN_RELE(vp);
1092 return (puterrno4(EACCES));
1093 }
1094 }
1095 } else {
1096 exi = cs->exi;
1097 }
1098 ASSERT(exi != NULL);
1099
1100
1101 /*
1102 * Create the secinfo result based on the security information
1103 * from the exportinfo structure (exi).
1104 *
1105 * Return all flavors for a pseudo node.
1106 * For a real export node, return the flavor that the client
1107 * has access with.
1108 */
1109 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1110 if (PSEUDO(exi)) {
1111 count = exi->exi_export.ex_seccnt; /* total sec count */
1112 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1113 secp = exi->exi_export.ex_secinfo;
1114
1115 for (i = 0; i < count; i++) {
1116 si = &secp[i].s_secinfo;
1117 resok_val[i].flavor = si->sc_rpcnum;
1118 if (resok_val[i].flavor == RPCSEC_GSS) {
1119 rpcsec_gss_info *info;
1120
1121 info = &resok_val[i].flavor_info;
1122 info->qop = si->sc_qop;
1123 info->service = (rpc_gss_svc_t)si->sc_service;
1124
1125 /* get oid opaque data */
1126 info->oid.sec_oid4_len =
1127 si->sc_gss_mech_type->length;
1128 info->oid.sec_oid4_val = kmem_alloc(
1129 si->sc_gss_mech_type->length, KM_SLEEP);
1452 if (is_system_labeled() && !admin_low_client)
1453 label_rele(tslabel);
1454
1455 *cs->statusp = resp->status = NFS4_OK;
1456 out:
1457 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1458 ACCESS4res *, resp);
1459 }
1460
1461 /* ARGSUSED */
1462 static void
1463 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1464 struct compound_state *cs)
1465 {
1466 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1467 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1468 int error;
1469 vnode_t *vp = cs->vp;
1470 cred_t *cr = cs->cr;
1471 vattr_t va;
1472 nfs4_srv_t *nsrv4;
1473
1474 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1475 COMMIT4args *, args);
1476
1477 if (vp == NULL) {
1478 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1479 goto out;
1480 }
1481 if (cs->access == CS_ACCESS_DENIED) {
1482 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1483 goto out;
1484 }
1485
1486 if (args->offset + args->count < args->offset) {
1487 *cs->statusp = resp->status = NFS4ERR_INVAL;
1488 goto out;
1489 }
1490
1491 va.va_mask = AT_UID;
1492 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1509 resp->status = NFS4ERR_ISDIR;
1510 else
1511 resp->status = NFS4ERR_INVAL;
1512 *cs->statusp = resp->status;
1513 goto out;
1514 }
1515
1516 if (crgetuid(cr) != va.va_uid &&
1517 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1518 *cs->statusp = resp->status = puterrno4(error);
1519 goto out;
1520 }
1521
1522 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1523
1524 if (error) {
1525 *cs->statusp = resp->status = puterrno4(error);
1526 goto out;
1527 }
1528
1529 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1530 *cs->statusp = resp->status = NFS4_OK;
1531 resp->writeverf = nsrv4->write4verf;
1532 out:
1533 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1534 COMMIT4res *, resp);
1535 }
1536
1537 /*
1538 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1539 * was completed. It does the nfsv4 create for special files.
1540 */
1541 /* ARGSUSED */
1542 static vnode_t *
1543 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1544 struct compound_state *cs, vattr_t *vap, char *nm)
1545 {
1546 int error;
1547 cred_t *cr = cs->cr;
1548 vnode_t *dvp = cs->vp;
1549 vnode_t *vp = NULL;
1550 int mode;
1551 enum vcexcl excl;
2719 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2720
2721 /*
2722 * If dotdotting, then need to check whether it's
2723 * above the root of a filesystem, or above an
2724 * export point.
2725 */
2726 if (dotdot) {
2727
2728 /*
2729 * If dotdotting at the root of a filesystem, then
2730 * need to traverse back to the mounted-on filesystem
2731 * and do the dotdot lookup there.
2732 */
2733 if (cs->vp->v_flag & VROOT) {
2734
2735 /*
2736 * If at the system root, then can
2737 * go up no further.
2738 */
2739 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2740 return (puterrno4(ENOENT));
2741
2742 /*
2743 * Traverse back to the mounted-on filesystem
2744 */
2745 cs->vp = untraverse(cs->vp);
2746
2747 /*
2748 * Set the different_export flag so we remember
2749 * to pick up a new exportinfo entry for
2750 * this new filesystem.
2751 */
2752 different_export = 1;
2753 } else {
2754
2755 /*
2756 * If dotdotting above an export point then set
2757 * the different_export to get new export info.
2758 */
2759 different_export = nfs_exported(cs->exi, cs->vp);
3483 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3484
3485 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3486 freeb(resp->mblk);
3487 resp->mblk = NULL;
3488 resp->data_len = 0;
3489 }
3490 }
3491
3492
3493 /* ARGSUSED */
3494 static void
3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3496 struct compound_state *cs)
3497 {
3498 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3499 int error;
3500 vnode_t *vp;
3501 struct exportinfo *exi, *sav_exi;
3502 nfs_fh4_fmt_t *fh_fmtp;
3503 nfs_export_t *ne = nfs_get_export();
3504
3505 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3506
3507 if (cs->vp) {
3508 VN_RELE(cs->vp);
3509 cs->vp = NULL;
3510 }
3511
3512 if (cs->cr)
3513 crfree(cs->cr);
3514
3515 cs->cr = crdup(cs->basecr);
3516
3517 vp = ne->exi_public->exi_vp;
3518 if (vp == NULL) {
3519 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3520 goto out;
3521 }
3522
3523 error = makefh4(&cs->fh, vp, ne->exi_public);
3524 if (error != 0) {
3525 *cs->statusp = resp->status = puterrno4(error);
3526 goto out;
3527 }
3528 sav_exi = cs->exi;
3529 if (ne->exi_public == ne->exi_root) {
3530 /*
3531 * No filesystem is actually shared public, so we default
3532 * to exi_root. In this case, we must check whether root
3533 * is exported.
3534 */
3535 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3536
3537 /*
3538 * if root filesystem is exported, the exportinfo struct that we
3539 * should use is what checkexport4 returns, because root_exi is
3540 * actually a mostly empty struct.
3541 */
3542 exi = checkexport4(&fh_fmtp->fh4_fsid,
3543 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3544 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3545 } else {
3546 /*
3547 * it's a properly shared filesystem
3548 */
3549 cs->exi = ne->exi_public;
3550 }
3551
3552 if (is_system_labeled()) {
3553 bslabel_t *clabel;
3554
3555 ASSERT(req->rq_label != NULL);
3556 clabel = req->rq_label;
3557 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3558 "got client label from request(1)",
3559 struct svc_req *, req);
3560 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3561 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3562 cs->exi)) {
3563 *cs->statusp = resp->status =
3564 NFS4ERR_SERVERFAULT;
3565 goto out;
3566 }
3567 }
3568 }
3569
3604 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3605 struct compound_state *cs)
3606 {
3607 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3608 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3609 nfs_fh4_fmt_t *fh_fmtp;
3610
3611 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3612 PUTFH4args *, args);
3613
3614 if (cs->vp) {
3615 VN_RELE(cs->vp);
3616 cs->vp = NULL;
3617 }
3618
3619 if (cs->cr) {
3620 crfree(cs->cr);
3621 cs->cr = NULL;
3622 }
3623
3624 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3625 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3626 goto out;
3627 }
3628
3629 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3630 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3631 NULL);
3632
3633 if (cs->exi == NULL) {
3634 *cs->statusp = resp->status = NFS4ERR_STALE;
3635 goto out;
3636 }
3637
3638 cs->cr = crdup(cs->basecr);
3639
3640 ASSERT(cs->cr != NULL);
3641
3642 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3643 *cs->statusp = resp->status;
3670 struct exportinfo *exi, *sav_exi;
3671
3672 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3673
3674 if (cs->vp) {
3675 VN_RELE(cs->vp);
3676 cs->vp = NULL;
3677 }
3678
3679 if (cs->cr)
3680 crfree(cs->cr);
3681
3682 cs->cr = crdup(cs->basecr);
3683
3684 /*
3685 * Using rootdir, the system root vnode,
3686 * get its fid.
3687 */
3688 bzero(&fid, sizeof (fid));
3689 fid.fid_len = MAXFIDSZ;
3690 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3691 if (error != 0) {
3692 *cs->statusp = resp->status = puterrno4(error);
3693 goto out;
3694 }
3695
3696 /*
3697 * Then use the root fsid & fid it to find out if it's exported
3698 *
3699 * If the server root isn't exported directly, then
3700 * it should at least be a pseudo export based on
3701 * one or more exports further down in the server's
3702 * file tree.
3703 */
3704 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3705 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3706 NFS4_DEBUG(rfs4_debug,
3707 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3708 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3709 goto out;
3710 }
3711
3712 /*
3713 * Now make a filehandle based on the root
3714 * export and root vnode.
3715 */
3716 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3717 if (error != 0) {
3718 *cs->statusp = resp->status = puterrno4(error);
3719 goto out;
3720 }
3721
3722 sav_exi = cs->exi;
3723 cs->exi = exi;
3724
3725 VN_HOLD(ZONE_ROOTVP());
3726 cs->vp = ZONE_ROOTVP();
3727
3728 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3729 VN_RELE(cs->vp);
3730 cs->vp = NULL;
3731 cs->exi = sav_exi;
3732 goto out;
3733 }
3734
3735 *cs->statusp = resp->status = NFS4_OK;
3736 cs->deleg = FALSE;
3737 out:
3738 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3739 PUTROOTFH4res *, resp);
3740 }
3741
3742 /*
3743 * set_rdattr_params sets up the variables used to manage what information
3744 * to get for each directory entry.
3745 */
3746 static nfsstat4
3747 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3748 bitmap4 attrs, bool_t *need_to_lookup)
3749 {
4320 }
4321 goto out;
4322 }
4323 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4324
4325 /* Actually do the REMOVE operation */
4326 if (vp->v_type == VDIR) {
4327 /*
4328 * Can't remove a directory that has a mounted-on filesystem.
4329 */
4330 if (vn_ismntpt(vp)) {
4331 error = EACCES;
4332 } else {
4333 /*
4334 * System V defines rmdir to return EEXIST,
4335 * not ENOTEMPTY, if the directory is not
4336 * empty. A System V NFS server needs to map
4337 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4338 * transmit over the wire.
4339 */
4340 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4341 NULL, 0)) == EEXIST)
4342 error = ENOTEMPTY;
4343 }
4344 } else {
4345 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4346 fp != NULL) {
4347 struct vattr va;
4348 vnode_t *tvp;
4349
4350 rfs4_dbe_lock(fp->rf_dbe);
4351 tvp = fp->rf_vp;
4352 if (tvp)
4353 VN_HOLD(tvp);
4354 rfs4_dbe_unlock(fp->rf_dbe);
4355
4356 if (tvp) {
4357 /*
4358 * This is va_seq safe because we are not
4359 * manipulating dvp.
4360 */
4432 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4433 REMOVE4res *, resp);
4434 }
4435
4436 /*
4437 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4438 * oldname and newname.
4439 * res: status. If success - CURRENT_FH unchanged, return change_info
4440 * for both from and target directories.
4441 */
4442 /* ARGSUSED */
4443 static void
4444 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4445 struct compound_state *cs)
4446 {
4447 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4448 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4449 int error;
4450 vnode_t *odvp;
4451 vnode_t *ndvp;
4452 vnode_t *srcvp, *targvp, *tvp;
4453 struct vattr obdva, oidva, oadva;
4454 struct vattr nbdva, nidva, nadva;
4455 char *onm, *nnm;
4456 uint_t olen, nlen;
4457 rfs4_file_t *fp, *sfp;
4458 int in_crit_src, in_crit_targ;
4459 int fp_rele_grant_hold, sfp_rele_grant_hold;
4460 int unlinked;
4461 bslabel_t *clabel;
4462 struct sockaddr *ca;
4463 char *converted_onm = NULL;
4464 char *converted_nnm = NULL;
4465 nfsstat4 status;
4466
4467 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4468 RENAME4args *, args);
4469
4470 fp = sfp = NULL;
4471 srcvp = targvp = tvp = NULL;
4472 in_crit_src = in_crit_targ = 0;
4473 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4474 unlinked = 0;
4475
4476 /* CURRENT_FH: target directory */
4477 ndvp = cs->vp;
4478 if (ndvp == NULL) {
4479 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4480 goto out;
4481 }
4482
4483 /* SAVED_FH: from directory */
4484 odvp = cs->saved_vp;
4485 if (odvp == NULL) {
4486 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4487 goto out;
4488 }
4489
4490 if (cs->access == CS_ACCESS_DENIED) {
4491 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4492 goto out;
4493 }
4494
4627 kmem_free(onm, olen);
4628 if (nnm != converted_nnm)
4629 kmem_free(converted_nnm, MAXPATHLEN + 1);
4630 kmem_free(nnm, nlen);
4631 goto out;
4632 }
4633
4634 sfp_rele_grant_hold = 1;
4635
4636 /* Does the destination exist and a file and have a delegation? */
4637 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4638 NULL, cs->cr)) {
4639 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4640 NULL)) {
4641 *cs->statusp = resp->status = NFS4ERR_DELAY;
4642 goto err_out;
4643 }
4644 }
4645 fp_rele_grant_hold = 1;
4646
4647 /* Check for NBMAND lock on both source and target */
4648 if (nbl_need_check(srcvp)) {
4649 nbl_start_crit(srcvp, RW_READER);
4650 in_crit_src = 1;
4651 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4652 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4653 goto err_out;
4654 }
4655 }
4656
4657 if (targvp && nbl_need_check(targvp)) {
4658 nbl_start_crit(targvp, RW_READER);
4659 in_crit_targ = 1;
4660 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4661 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4662 goto err_out;
4663 }
4664 }
4665
4666 /* Get source "before" change value */
4667 obdva.va_mask = AT_CTIME|AT_SEQ;
4668 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4669 if (!error) {
4670 nbdva.va_mask = AT_CTIME|AT_SEQ;
4671 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4672 }
4673 if (error) {
4674 *cs->statusp = resp->status = puterrno4(error);
4675 goto err_out;
4676 }
4677
4678 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4679 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4680
4681 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4682 NULL, 0);
4683
4684 /*
4685 * If target existed and was unlinked by VOP_RENAME, state will need
4686 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4687 * any necessary nbl_end_crit on srcvp and tgtvp.
4688 */
4689 if (error == 0 && fp != NULL) {
4690 rfs4_dbe_lock(fp->rf_dbe);
4691 tvp = fp->rf_vp;
4692 if (tvp)
4693 VN_HOLD(tvp);
4694 rfs4_dbe_unlock(fp->rf_dbe);
4695
4696 if (tvp) {
4697 struct vattr va;
4698 va.va_mask = AT_NLINK;
4699
4700 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4701 va.va_nlink == 0) {
4702 unlinked = 1;
4703
4704 /* DEBUG data */
4705 if ((srcvp == targvp) || (tvp != targvp)) {
4706 cmn_err(CE_WARN, "rfs4_op_rename: "
4707 "srcvp %p, targvp: %p, tvp: %p",
4708 (void *)srcvp, (void *)targvp,
4709 (void *)tvp);
4710 }
4711 } else {
4712 VN_RELE(tvp);
4713 }
4714 }
4715 }
4716 if (error == 0)
4717 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4718
4719 if (in_crit_src)
4720 nbl_end_crit(srcvp);
4721 if (srcvp)
4722 VN_RELE(srcvp);
4723 if (in_crit_targ)
4724 nbl_end_crit(targvp);
4725 if (targvp)
4726 VN_RELE(targvp);
4727
4728 if (unlinked) {
4729 ASSERT(fp != NULL);
4730 ASSERT(tvp != NULL);
4731
4732 /* DEBUG data */
4733 if (RW_READ_HELD(&tvp->v_nbllock)) {
4734 cmn_err(CE_WARN, "rfs4_op_rename: "
4735 "RW_READ_HELD(%p)", (void *)tvp);
4736 }
4737
4738 /* The file is gone and so should the state */
4739 rfs4_close_all_state(fp);
4740 VN_RELE(tvp);
4741 }
4742
4743 if (sfp) {
4744 rfs4_clear_dont_grant(sfp);
4745 rfs4_file_rele(sfp);
4746 }
4747 if (fp) {
4748 rfs4_clear_dont_grant(fp);
4749 rfs4_file_rele(fp);
4750 }
4751
4752 if (converted_onm != onm)
4753 kmem_free(converted_onm, MAXPATHLEN + 1);
4754 kmem_free(onm, olen);
4755 if (converted_nnm != nnm)
4756 kmem_free(converted_nnm, MAXPATHLEN + 1);
4757 kmem_free(nnm, nlen);
4758
4759 /*
4760 * Get the initial "after" sequence number, if it fails, set to zero
4761 */
4762 oidva.va_mask = AT_SEQ;
5659 static void
5660 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5661 struct compound_state *cs)
5662 {
5663 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5664 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5665 int error;
5666 vnode_t *vp;
5667 struct vattr bva;
5668 u_offset_t rlimit;
5669 struct uio uio;
5670 struct iovec iov[MAX_IOVECS];
5671 struct iovec *iovp;
5672 int iovcnt;
5673 int ioflag;
5674 cred_t *savecred, *cr;
5675 bool_t *deleg = &cs->deleg;
5676 nfsstat4 stat;
5677 int in_crit = 0;
5678 caller_context_t ct;
5679 nfs4_srv_t *nsrv4;
5680
5681 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5682 WRITE4args *, args);
5683
5684 vp = cs->vp;
5685 if (vp == NULL) {
5686 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5687 goto out;
5688 }
5689 if (cs->access == CS_ACCESS_DENIED) {
5690 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5691 goto out;
5692 }
5693
5694 cr = cs->cr;
5695
5696 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5697 deleg, TRUE, &ct)) != NFS4_OK) {
5698 *cs->statusp = resp->status = stat;
5699 goto out;
5730 goto out;
5731 }
5732
5733 if (vp->v_type != VREG) {
5734 *cs->statusp = resp->status =
5735 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5736 goto out;
5737 }
5738
5739 if (crgetuid(cr) != bva.va_uid &&
5740 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5741 *cs->statusp = resp->status = puterrno4(error);
5742 goto out;
5743 }
5744
5745 if (MANDLOCK(vp, bva.va_mode)) {
5746 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5747 goto out;
5748 }
5749
5750 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5751 if (args->data_len == 0) {
5752 *cs->statusp = resp->status = NFS4_OK;
5753 resp->count = 0;
5754 resp->committed = args->stable;
5755 resp->writeverf = nsrv4->write4verf;
5756 goto out;
5757 }
5758
5759 if (args->mblk != NULL) {
5760 mblk_t *m;
5761 uint_t bytes, round_len;
5762
5763 iovcnt = 0;
5764 bytes = 0;
5765 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5766 for (m = args->mblk;
5767 m != NULL && bytes < round_len;
5768 m = m->b_cont) {
5769 iovcnt++;
5770 bytes += MBLKL(m);
5771 }
5772 #ifdef DEBUG
5773 /* should have ended on an mblk boundary */
5774 if (bytes != round_len) {
5775 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5831 curthread->t_cred = cr;
5832 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5833 curthread->t_cred = savecred;
5834
5835 if (iovp != iov)
5836 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5837
5838 if (error) {
5839 *cs->statusp = resp->status = puterrno4(error);
5840 goto out;
5841 }
5842
5843 *cs->statusp = resp->status = NFS4_OK;
5844 resp->count = args->data_len - uio.uio_resid;
5845
5846 if (ioflag == 0)
5847 resp->committed = UNSTABLE4;
5848 else
5849 resp->committed = FILE_SYNC4;
5850
5851 resp->writeverf = nsrv4->write4verf;
5852
5853 out:
5854 if (in_crit)
5855 nbl_end_crit(vp);
5856
5857 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5858 WRITE4res *, resp);
5859 }
5860
5861
5862 /* XXX put in a header file */
5863 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5864
5865 void
5866 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5867 struct svc_req *req, cred_t *cr, int *rv)
5868 {
5869 uint_t i;
5870 struct compound_state cs;
5871 nfs4_srv_t *nsrv4;
5872 nfs_export_t *ne = nfs_get_export();
5873
5874 if (rv != NULL)
5875 *rv = 0;
5876 rfs4_init_compound_state(&cs);
5877 /*
5878 * Form a reply tag by copying over the reqeuest tag.
5879 */
5880 resp->tag.utf8string_val =
5881 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5882 resp->tag.utf8string_len = args->tag.utf8string_len;
5883 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5884 resp->tag.utf8string_len);
5885
5886 cs.statusp = &resp->status;
5887 cs.req = req;
5888 resp->array = NULL;
5889 resp->array_len = 0;
5890
5891 /*
5892 * XXX for now, minorversion should be zero
5910
5911 cr = crget();
5912 ASSERT(cr != NULL);
5913
5914 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5915 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5916 &cs, COMPOUND4args *, args);
5917 crfree(cr);
5918 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5919 &cs, COMPOUND4res *, resp);
5920 svcerr_badcred(req->rq_xprt);
5921 if (rv != NULL)
5922 *rv = 1;
5923 return;
5924 }
5925 resp->array_len = args->array_len;
5926 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5927 KM_SLEEP);
5928
5929 cs.basecr = cr;
5930 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5931
5932 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5933 COMPOUND4args *, args);
5934
5935 /*
5936 * For now, NFS4 compound processing must be protected by
5937 * exported_lock because it can access more than one exportinfo
5938 * per compound and share/unshare can now change multiple
5939 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5940 * per proc (excluding public exinfo), and exi_count design
5941 * is sufficient to protect concurrent execution of NFS2/3
5942 * ops along with unexport. This lock will be removed as
5943 * part of the NFSv4 phase 2 namespace redesign work.
5944 */
5945 rw_enter(&ne->exported_lock, RW_READER);
5946
5947 /*
5948 * If this is the first compound we've seen, we need to start all
5949 * new instances' grace periods.
5950 */
5951 if (nsrv4->seen_first_compound == 0) {
5952 rfs4_grace_start_new(nsrv4);
5953 /*
5954 * This must be set after rfs4_grace_start_new(), otherwise
5955 * another thread could proceed past here before the former
5956 * is finished.
5957 */
5958 nsrv4->seen_first_compound = 1;
5959 }
5960
5961 for (i = 0; i < args->array_len && cs.cont; i++) {
5962 nfs_argop4 *argop;
5963 nfs_resop4 *resop;
5964 uint_t op;
5965
5966 argop = &args->array[i];
5967 resop = &resp->array[i];
5968 resop->resop = argop->argop;
5969 op = (uint_t)resop->resop;
5970
5971 if (op < rfsv4disp_cnt) {
5972 kstat_t *ksp = rfsprocio_v4_ptr[op];
5973 kstat_t *exi_ksp = NULL;
5974
5975 /*
5976 * Count the individual ops here; NULL and COMPOUND
5977 * are counted in common_dispatch()
5978 */
5979 rfsproccnt_v4_ptr[op].value.ui64++;
5980
5981 if (ksp != NULL) {
5982 mutex_enter(ksp->ks_lock);
5983 kstat_runq_enter(KSTAT_IO_PTR(ksp));
5984 mutex_exit(ksp->ks_lock);
5985 }
5986
5987 switch (rfsv4disptab[op].op_type) {
5988 case NFS4_OP_CFH:
5989 resop->exi = cs.exi;
5990 break;
5991 case NFS4_OP_SFH:
5992 resop->exi = cs.saved_exi;
5993 break;
5994 default:
5995 ASSERT(resop->exi == NULL);
5996 break;
5997 }
5998
5999 if (resop->exi != NULL) {
6000 exi_ksp = NULL;
6001 if (resop->exi->exi_kstats != NULL) {
6002 exi_ksp = exp_kstats_v4(
6003 resop->exi->exi_kstats, op);
6004 }
6005 if (exi_ksp != NULL) {
6006 mutex_enter(exi_ksp->ks_lock);
6007 kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
6008 mutex_exit(exi_ksp->ks_lock);
6009 }
6010 }
6011
6012 NFS4_DEBUG(rfs4_debug > 1,
6013 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6014 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
6015 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6016 rfs4_op_string[op], *cs.statusp));
6017 if (*cs.statusp != NFS4_OK)
6018 cs.cont = FALSE;
6019
6020 if (rfsv4disptab[op].op_type == NFS4_OP_POSTCFH &&
6021 *cs.statusp == NFS4_OK &&
6022 (resop->exi = cs.exi) != NULL) {
6023 exi_ksp = NULL;
6024 if (resop->exi->exi_kstats != NULL) {
6025 exi_ksp = exp_kstats_v4(
6026 resop->exi->exi_kstats, op);
6027 }
6028 }
6029
6030 if (exi_ksp != NULL) {
6031 mutex_enter(exi_ksp->ks_lock);
6032 KSTAT_IO_PTR(exi_ksp)->nwritten +=
6033 argop->opsize;
6034 KSTAT_IO_PTR(exi_ksp)->writes++;
6035 if (rfsv4disptab[op].op_type != NFS4_OP_POSTCFH)
6036 kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
6037 mutex_exit(exi_ksp->ks_lock);
6038 } else {
6039 resop->exi = NULL;
6040 }
6041
6042 if (ksp != NULL) {
6043 mutex_enter(ksp->ks_lock);
6044 kstat_runq_exit(KSTAT_IO_PTR(ksp));
6045 mutex_exit(ksp->ks_lock);
6046 }
6047 } else {
6048 /*
6049 * This is effectively dead code since XDR code
6050 * will have already returned BADXDR if op doesn't
6051 * decode to legal value. This only done for a
6052 * day when XDR code doesn't verify v4 opcodes.
6053 */
6054 op = OP_ILLEGAL;
6055 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
6056
6057 rfs4_op_illegal(argop, resop, req, &cs);
6058 cs.cont = FALSE;
6059 }
6060
6061 /*
6062 * The exi saved in the resop to be used for kstats update
6063 * once the opsize is calculated during XDR response encoding.
6064 * Put a hold on resop->exi so that it can't be destroyed.
6065 */
6066 if (resop->exi != NULL)
6067 exi_hold(resop->exi);
6068
6069 /*
6070 * If not at last op, and if we are to stop, then
6071 * compact the results array.
6072 */
6073 if ((i + 1) < args->array_len && !cs.cont) {
6074 nfs_resop4 *new_res = kmem_alloc(
6075 (i + 1) * sizeof (nfs_resop4), KM_SLEEP);
6076 bcopy(resp->array,
6077 new_res, (i + 1) * sizeof (nfs_resop4));
6078 kmem_free(resp->array,
6079 args->array_len * sizeof (nfs_resop4));
6080
6081 resp->array_len = i + 1;
6082 resp->array = new_res;
6083 }
6084 }
6085
6086 rw_exit(&ne->exported_lock);
6087
6088 /*
6089 * clear exportinfo and vnode fields from compound_state before dtrace
6090 * probe, to avoid tracing residual values for path and share path.
6091 */
6092 if (cs.vp)
6093 VN_RELE(cs.vp);
6094 if (cs.saved_vp)
6095 VN_RELE(cs.saved_vp);
6096 cs.exi = cs.saved_exi = NULL;
6097 cs.vp = cs.saved_vp = NULL;
6098
6099 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
6100 COMPOUND4res *, resp);
6101
6102 if (cs.saved_fh.nfs_fh4_val)
6103 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
6104
6105 if (cs.basecr)
6106 crfree(cs.basecr);
6107 if (cs.cr)
6108 crfree(cs.cr);
6109 /*
6110 * done with this compound request, free the label
6111 */
6112
6113 if (req->rq_label != NULL) {
6114 kmem_free(req->rq_label, sizeof (bslabel_t));
6115 req->rq_label = NULL;
6116 }
6117 }
6118
6119 /*
6120 * XXX because of what appears to be duplicate calls to rfs4_compound_free
6121 * XXX zero out the tag and array values. Need to investigate why the
6151 */
6152 void
6153 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6154 {
6155 int i;
6156 int flag = RPC_ALL;
6157
6158 for (i = 0; flag && i < args->array_len; i++) {
6159 uint_t op;
6160
6161 op = (uint_t)args->array[i].argop;
6162
6163 if (op < rfsv4disp_cnt)
6164 flag &= rfsv4disptab[op].dis_flags;
6165 else
6166 flag = 0;
6167 }
6168 *flagp = flag;
6169 }
6170
6171 /*
6172 * Update the kstats for the received requests.
6173 * Note: writes/nwritten are used to hold count and nbytes of requests received.
6174 *
6175 * Per export request statistics need to be updated during the compound request
6176 * processing (rfs4_compound()) as that is where it is known which exportinfo to
6177 * associate the kstats with.
6178 */
6179 void
6180 rfs4_compound_kstat_args(COMPOUND4args *args)
6181 {
6182 int i;
6183
6184 for (i = 0; i < args->array_len; i++) {
6185 uint_t op = (uint_t)args->array[i].argop;
6186
6187 if (op < rfsv4disp_cnt) {
6188 kstat_t *ksp = rfsprocio_v4_ptr[op];
6189
6190 if (ksp != NULL) {
6191 mutex_enter(ksp->ks_lock);
6192 KSTAT_IO_PTR(ksp)->nwritten +=
6193 args->array[i].opsize;
6194 KSTAT_IO_PTR(ksp)->writes++;
6195 mutex_exit(ksp->ks_lock);
6196 }
6197 }
6198 }
6199 }
6200
6201 /*
6202 * Update the kstats for the sent responses.
6203 * Note: reads/nread are used to hold count and nbytes of responses sent.
6204 *
6205 * Per export response statistics cannot be updated until here, after the
6206 * response send has generated the opsize (bytes sent) in the XDR encoding.
6207 * The exportinfo with which the kstats should be associated is thus saved
6208 * in the response structure (by rfs4_compound()) for use here. A hold is
6209 * placed on the exi to ensure it cannot be deleted before use. This hold
6210 * is released, and the exi set to NULL, here.
6211 */
6212 void
6213 rfs4_compound_kstat_res(COMPOUND4res *res)
6214 {
6215 int i;
6216 nfs_export_t *ne = nfs_get_export();
6217
6218 for (i = 0; i < res->array_len; i++) {
6219 uint_t op = (uint_t)res->array[i].resop;
6220
6221 if (op < rfsv4disp_cnt) {
6222 kstat_t *ksp = rfsprocio_v4_ptr[op];
6223 struct exportinfo *exi = res->array[i].exi;
6224
6225 if (ksp != NULL) {
6226 mutex_enter(ksp->ks_lock);
6227 KSTAT_IO_PTR(ksp)->nread +=
6228 res->array[i].opsize;
6229 KSTAT_IO_PTR(ksp)->reads++;
6230 mutex_exit(ksp->ks_lock);
6231 }
6232
6233 if (exi != NULL) {
6234 kstat_t *exi_ksp = NULL;
6235
6236 rw_enter(&ne->exported_lock, RW_READER);
6237
6238 if (exi->exi_kstats != NULL) {
6239 /*CSTYLED*/
6240 exi_ksp = exp_kstats_v4(exi->exi_kstats, op);
6241 }
6242 if (exi_ksp != NULL) {
6243 mutex_enter(exi_ksp->ks_lock);
6244 KSTAT_IO_PTR(exi_ksp)->nread +=
6245 res->array[i].opsize;
6246 KSTAT_IO_PTR(exi_ksp)->reads++;
6247 mutex_exit(exi_ksp->ks_lock);
6248 }
6249
6250 exi_rele(&exi);
6251 res->array[i].exi = NULL;
6252 rw_exit(&ne->exported_lock);
6253 }
6254 }
6255 }
6256 }
6257
6258 nfsstat4
6259 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6260 {
6261 nfsstat4 e;
6262
6263 rfs4_dbe_lock(cp->rc_dbe);
6264
6265 if (cp->rc_sysidt != LM_NOSYSID) {
6266 *sp = cp->rc_sysidt;
6267 e = NFS4_OK;
6268
6269 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6270 *sp = cp->rc_sysidt;
6271 e = NFS4_OK;
6272
6273 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6274 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6275 } else
6276 e = NFS4ERR_DELAY;
6277
6872
6873 /* Check for mandatory locking and that the size gets set. */
6874 cva.va_mask = AT_MODE;
6875 if (setsize)
6876 cva.va_mask |= AT_SIZE;
6877
6878 /* Assume the worst */
6879 cs->mandlock = TRUE;
6880
6881 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6882 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6883
6884 /*
6885 * Truncate the file if necessary; this would be
6886 * the case for create over an existing file.
6887 */
6888
6889 if (trunc) {
6890 int in_crit = 0;
6891 rfs4_file_t *fp;
6892 nfs4_srv_t *nsrv4;
6893 bool_t create = FALSE;
6894
6895 /*
6896 * We are writing over an existing file.
6897 * Check to see if we need to recall a delegation.
6898 */
6899 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6900 rfs4_hold_deleg_policy(nsrv4);
6901 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6902 if (rfs4_check_delegated_byfp(FWRITE, fp,
6903 (reqsize == 0), FALSE, FALSE, &clientid)) {
6904 rfs4_file_rele(fp);
6905 rfs4_rele_deleg_policy(nsrv4);
6906 VN_RELE(vp);
6907 *attrset = 0;
6908 return (NFS4ERR_DELAY);
6909 }
6910 rfs4_file_rele(fp);
6911 }
6912 rfs4_rele_deleg_policy(nsrv4);
6913
6914 if (nbl_need_check(vp)) {
6915 in_crit = 1;
6916
6917 ASSERT(reqsize == 0);
6918
6919 nbl_start_crit(vp, RW_READER);
6920 if (nbl_conflict(vp, NBL_WRITE, 0,
6921 cva.va_size, 0, NULL)) {
6922 in_crit = 0;
6923 nbl_end_crit(vp);
6924 VN_RELE(vp);
6925 *attrset = 0;
6926 return (NFS4ERR_ACCESS);
6927 }
6928 }
6929 ct.cc_sysid = 0;
6930 ct.cc_pid = 0;
6931 ct.cc_caller_id = nfs4_srv_caller_id;
6932 ct.cc_flags = CC_DONTBLOCK;
8450
8451 newcp->rc_cp_confirmed = cp_confirmed;
8452
8453 rfs4_client_rele(newcp);
8454
8455 out:
8456 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8457 SETCLIENTID4res *, res);
8458 }
8459
8460 /*ARGSUSED*/
8461 void
8462 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8463 struct svc_req *req, struct compound_state *cs)
8464 {
8465 SETCLIENTID_CONFIRM4args *args =
8466 &argop->nfs_argop4_u.opsetclientid_confirm;
8467 SETCLIENTID_CONFIRM4res *res =
8468 &resop->nfs_resop4_u.opsetclientid_confirm;
8469 rfs4_client_t *cp, *cptoclose = NULL;
8470 nfs4_srv_t *nsrv4;
8471
8472 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8473 struct compound_state *, cs,
8474 SETCLIENTID_CONFIRM4args *, args);
8475
8476 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8477 *cs->statusp = res->status = NFS4_OK;
8478
8479 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8480
8481 if (cp == NULL) {
8482 *cs->statusp = res->status =
8483 rfs4_check_clientid(&args->clientid, 1);
8484 goto out;
8485 }
8486
8487 if (!creds_ok(cp, req, cs)) {
8488 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8489 rfs4_client_rele(cp);
8490 goto out;
8491 }
8492
8493 /* If the verifier doesn't match, the record doesn't match */
8494 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8495 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8496 rfs4_client_rele(cp);
8497 goto out;
8498 }
8499
8500 rfs4_dbe_lock(cp->rc_dbe);
8501 cp->rc_need_confirm = FALSE;
8502 if (cp->rc_cp_confirmed) {
8503 cptoclose = cp->rc_cp_confirmed;
8504 cptoclose->rc_ss_remove = 1;
8505 cp->rc_cp_confirmed = NULL;
8506 }
8507
8508 /*
8509 * Update the client's associated server instance, if it's changed
8510 * since the client was created.
8511 */
8512 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8513 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8514
8515 /*
8516 * Record clientid in stable storage.
8517 * Must be done after server instance has been assigned.
8518 */
8519 rfs4_ss_clid(nsrv4, cp);
8520
8521 rfs4_dbe_unlock(cp->rc_dbe);
8522
8523 if (cptoclose)
8524 /* don't need to rele, client_close does it */
8525 rfs4_client_close(cptoclose);
8526
8527 /* If needed, initiate CB_NULL call for callback path */
8528 rfs4_deleg_cb_check(cp);
8529 rfs4_update_lease(cp);
8530
8531 /*
8532 * Check to see if client can perform reclaims
8533 */
8534 rfs4_ss_chkclid(nsrv4, cp);
8535
8536 rfs4_client_rele(cp);
8537
8538 out:
8539 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8540 struct compound_state *, cs,
8541 SETCLIENTID_CONFIRM4 *, res);
8542 }
8543
8544
8545 /*ARGSUSED*/
8546 void
8547 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8548 struct svc_req *req, struct compound_state *cs)
8549 {
8550 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8551 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8552 rfs4_state_t *sp;
8553 nfsstat4 status;
8554
10158 /*
10159 * Check to see if we have a downrev Solaris client, so that we
10160 * can send it a symlink instead of a referral.
10161 */
10162 int
10163 client_is_downrev(struct svc_req *req)
10164 {
10165 struct sockaddr *ca;
10166 rfs4_clntip_t *ci;
10167 bool_t create = FALSE;
10168 int is_downrev;
10169
10170 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10171 ASSERT(ca);
10172 ci = rfs4_find_clntip(ca, &create);
10173 if (ci == NULL)
10174 return (0);
10175 is_downrev = ci->ri_no_referrals;
10176 rfs4_dbe_rele(ci->ri_dbe);
10177 return (is_downrev);
10178 }
10179
10180 /*
10181 * Do the main work of handling HA-NFSv4 Resource Group failover on
10182 * Sun Cluster.
10183 * We need to detect whether any RG admin paths have been added or removed,
10184 * and adjust resources accordingly.
10185 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10186 * order to scale, the list and array of paths need to be held in more
10187 * suitable data structures.
10188 */
10189 static void
10190 hanfsv4_failover(nfs4_srv_t *nsrv4)
10191 {
10192 int i, start_grace, numadded_paths = 0;
10193 char **added_paths = NULL;
10194 rfs4_dss_path_t *dss_path;
10195
10196 /*
10197 * Note: currently, dss_pathlist cannot be NULL, since
10198 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10199 * make the latter dynamically specified too, the following will
10200 * need to be adjusted.
10201 */
10202
10203 /*
10204 * First, look for removed paths: RGs that have been failed-over
10205 * away from this node.
10206 * Walk the "currently-serving" dss_pathlist and, for each
10207 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10208 * from nfsd. If not, that RG path has been removed.
10209 *
10210 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10211 * any duplicates.
10212 */
10213 dss_path = nsrv4->dss_pathlist;
10214 do {
10215 int found = 0;
10216 char *path = dss_path->path;
10217
10218 /* used only for non-HA so may not be removed */
10219 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10220 dss_path = dss_path->next;
10221 continue;
10222 }
10223
10224 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10225 int cmpret;
10226 char *newpath = rfs4_dss_newpaths[i];
10227
10228 /*
10229 * Since nfsd has sorted rfs4_dss_newpaths for us,
10230 * once the return from strcmp is negative we know
10231 * we've passed the point where "path" should be,
10232 * and can stop searching: "path" has been removed.
10233 */
10234 cmpret = strcmp(path, newpath);
10235 if (cmpret < 0)
10236 break;
10237 if (cmpret == 0) {
10238 found = 1;
10239 break;
10240 }
10241 }
10242
10243 if (found == 0) {
10244 unsigned index = dss_path->index;
10245 rfs4_servinst_t *sip = dss_path->sip;
10246 rfs4_dss_path_t *path_next = dss_path->next;
10247
10248 /*
10249 * This path has been removed.
10250 * We must clear out the servinst reference to
10251 * it, since it's now owned by another
10252 * node: we should not attempt to touch it.
10253 */
10254 ASSERT(dss_path == sip->dss_paths[index]);
10255 sip->dss_paths[index] = NULL;
10256
10257 /* remove from "currently-serving" list, and destroy */
10258 remque(dss_path);
10259 /* allow for NUL */
10260 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10261 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10262
10263 dss_path = path_next;
10264 } else {
10265 /* path was found; not removed */
10266 dss_path = dss_path->next;
10267 }
10268 } while (dss_path != nsrv4->dss_pathlist);
10269
10270 /*
10271 * Now, look for added paths: RGs that have been failed-over
10272 * to this node.
10273 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10274 * for each path, check if it is on the "currently-serving"
10275 * dss_pathlist. If not, that RG path has been added.
10276 *
10277 * Note: we don't do duplicate detection here; nfsd does that for us.
10278 *
10279 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10280 * an upper bound for the size needed for added_paths[numadded_paths].
10281 */
10282
10283 /* probably more space than we need, but guaranteed to be enough */
10284 if (rfs4_dss_numnewpaths > 0) {
10285 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10286 added_paths = kmem_zalloc(sz, KM_SLEEP);
10287 }
10288
10289 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10290 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10291 int found = 0;
10292 char *newpath = rfs4_dss_newpaths[i];
10293
10294 dss_path = nsrv4->dss_pathlist;
10295 do {
10296 char *path = dss_path->path;
10297
10298 /* used only for non-HA */
10299 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10300 dss_path = dss_path->next;
10301 continue;
10302 }
10303
10304 if (strncmp(path, newpath, strlen(path)) == 0) {
10305 found = 1;
10306 break;
10307 }
10308
10309 dss_path = dss_path->next;
10310 } while (dss_path != nsrv4->dss_pathlist);
10311
10312 if (found == 0) {
10313 added_paths[numadded_paths] = newpath;
10314 numadded_paths++;
10315 }
10316 }
10317
10318 /* did we find any added paths? */
10319 if (numadded_paths > 0) {
10320
10321 /* create a new server instance, and start its grace period */
10322 start_grace = 1;
10323 /* CSTYLED */
10324 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10325
10326 /* read in the stable storage state from these paths */
10327 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10328
10329 /*
10330 * Multiple failovers during a grace period will cause
10331 * clients of the same resource group to be partitioned
10332 * into different server instances, with different
10333 * grace periods. Since clients of the same resource
10334 * group must be subject to the same grace period,
10335 * we need to reset all currently active grace periods.
10336 */
10337 rfs4_grace_reset_all(nsrv4);
10338 }
10339
10340 if (rfs4_dss_numnewpaths > 0)
10341 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10342 }
|