1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright 2019 Nexenta Systems, Inc.
33 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
34 */
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/cred.h>
40 #include <sys/buf.h>
41 #include <sys/vfs.h>
42 #include <sys/vfs_opreg.h>
43 #include <sys/vnode.h>
44 #include <sys/uio.h>
45 #include <sys/errno.h>
46 #include <sys/sysmacros.h>
47 #include <sys/statvfs.h>
48 #include <sys/kmem.h>
49 #include <sys/dirent.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/systeminfo.h>
53 #include <sys/flock.h>
54 #include <sys/pathname.h>
55 #include <sys/nbmlock.h>
56 #include <sys/share.h>
57 #include <sys/atomic.h>
58 #include <sys/policy.h>
59 #include <sys/fem.h>
60 #include <sys/sdt.h>
61 #include <sys/ddi.h>
62 #include <sys/zone.h>
63 #include <sys/kstat.h>
64
65 #include <fs/fs_reparse.h>
66
67 #include <rpc/types.h>
68 #include <rpc/auth.h>
69 #include <rpc/rpcsec_gss.h>
70 #include <rpc/svc.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfssys.h>
74 #include <nfs/export.h>
75 #include <nfs/nfs_cmd.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_drc.h>
79
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82
83 #include <inet/common.h>
84 #include <inet/ip.h>
85 #include <inet/ip6.h>
86
87 #include <sys/tsol/label.h>
88 #include <sys/tsol/tndb.h>
89
90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 extern struct svc_ops rdma_svc_ops;
95 extern int nfs_loaned_buffers;
96 /* End of Tunables */
97
98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99
100 /*
101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 */
103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104
105 /*
106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 * maxcount that isn't large enough to hold the smallest possible
109 * XDR encoded dirent.
110 *
111 * sizeof cookie (8 bytes) +
112 * sizeof name_len (4 bytes) +
113 * sizeof smallest (padded) name (4 bytes) +
114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 * sizeof attrlist4_len (4 bytes) +
116 * sizeof next boolean (4 bytes)
117 *
118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 * the smallest possible entry4 (assumes no attrs requested).
120 * sizeof nfsstat4 (4 bytes) +
121 * sizeof verifier4 (8 bytes) +
122 * sizeof entry4list bool (4 bytes) +
123 * sizeof entry4 (36 bytes) +
124 * sizeof eof bool (4 bytes)
125 *
126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 * required for a given name length. MAXNAMELEN is the maximum
130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 * to hold ., .., and the largest possible solaris dirent64.
134 */
135 #define RFS4_MINLEN_ENTRY4 36
136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 #define RFS4_MINLEN_RDDIR_BUF \
138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139
140 /*
141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 * but the dirents UFS gives us are already padded to 8, so just take
143 * what we're given. Dircount is only a hint anyway. Currently the
144 * solaris kernel is ASCII only, so there's no point in calling the
145 * UTF8 functions.
146 *
147 * dirent64: named padded to provide 8 byte struct alignment
148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 *
150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 *
152 */
153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155
156 zone_key_t rfs4_zone_key;
157
158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159
160 u_longlong_t nfs4_srv_caller_id;
161 uint_t nfs4_srv_vkey = 0;
162
163 void rfs4_init_compound_state(struct compound_state *);
164
165 static void nullfree(caddr_t);
166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 struct compound_state *);
168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 struct compound_state *);
172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 struct compound_state *);
174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 struct compound_state *);
176 static void rfs4_op_create_free(nfs_resop4 *resop);
177 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
178 struct svc_req *, struct compound_state *);
179 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
180 struct svc_req *, struct compound_state *);
181 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
183 static void rfs4_op_getattr_free(nfs_resop4 *);
184 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 struct compound_state *);
186 static void rfs4_op_getfh_free(nfs_resop4 *);
187 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 struct compound_state *);
189 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 struct compound_state *);
191 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 struct compound_state *);
193 static void lock_denied_free(nfs_resop4 *);
194 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 struct compound_state *);
196 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 struct compound_state *);
198 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 struct compound_state *);
200 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 struct compound_state *);
202 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
203 struct svc_req *req, struct compound_state *cs);
204 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 struct compound_state *);
206 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 struct compound_state *);
208 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
209 struct svc_req *, struct compound_state *);
210 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
211 struct svc_req *, struct compound_state *);
212 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 struct compound_state *);
214 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 struct compound_state *);
216 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 struct compound_state *);
218 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
219 struct compound_state *);
220 static void rfs4_op_read_free(nfs_resop4 *);
221 static void rfs4_op_readdir_free(nfs_resop4 *resop);
222 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
223 struct compound_state *);
224 static void rfs4_op_readlink_free(nfs_resop4 *);
225 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
226 struct svc_req *, struct compound_state *);
227 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 struct compound_state *);
229 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 struct compound_state *);
231 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 struct compound_state *);
233 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 struct compound_state *);
235 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 struct compound_state *);
237 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 struct compound_state *);
239 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 struct compound_state *);
241 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 struct compound_state *);
243 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
244 struct svc_req *, struct compound_state *);
245 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
246 struct svc_req *req, struct compound_state *);
247 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
248 struct compound_state *);
249 static void rfs4_op_secinfo_free(nfs_resop4 *);
250
251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
252 struct svc_req *);
253 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
254 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
255
256
257 /*
258 * translation table for attrs
259 */
260 struct nfs4_ntov_table {
261 union nfs4_attr_u *na;
262 uint8_t amap[NFS4_MAXNUM_ATTRS];
263 int attrcnt;
264 bool_t vfsstat;
265 };
266
267 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
268 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
269 struct nfs4_svgetit_arg *sargp);
270
271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
272 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
273 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
274
275 static void hanfsv4_failover(nfs4_srv_t *);
276
277 fem_t *deleg_rdops;
278 fem_t *deleg_wrops;
279
280 /*
281 * NFS4 op dispatch table
282 */
283
284 struct rfsv4disp {
285 void (*dis_proc)(); /* proc to call */
286 void (*dis_resfree)(); /* frees space allocated by proc */
287 int dis_flags; /* RPC_IDEMPOTENT, etc... */
288 int op_type; /* operation type, see below */
289 };
290
291 /*
292 * operation types; used primarily for the per-exportinfo kstat implementation
293 */
294 #define NFS4_OP_NOFH 0 /* The operation does not operate with any */
295 /* particular filehandle; we cannot associate */
296 /* it with any exportinfo. */
297
298 #define NFS4_OP_CFH 1 /* The operation works with the current */
299 /* filehandle; we associate the operation */
300 /* with the exportinfo related to the current */
301 /* filehandle (as set before the operation is */
302 /* executed). */
303
304 #define NFS4_OP_SFH 2 /* The operation works with the saved */
305 /* filehandle; we associate the operation */
306 /* with the exportinfo related to the saved */
307 /* filehandle (as set before the operation is */
308 /* executed). */
309
310 #define NFS4_OP_POSTCFH 3 /* The operation ignores the current */
311 /* filehandle, but sets the new current */
312 /* filehandle instead; we associate the */
313 /* operation with the exportinfo related to */
314 /* the current filehandle as set after the */
315 /* operation is successfuly executed. Since */
316 /* we do not know the particular exportinfo */
317 /* (and thus the kstat) before the operation */
318 /* is done, there is no simple way how to */
319 /* update some I/O kstat statistics related */
320 /* to kstat_queue(9F). */
321
322 static struct rfsv4disp rfsv4disptab[] = {
323 /*
324 * NFS VERSION 4
325 */
326
327 /* RFS_NULL = 0 */
328 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
329
330 /* UNUSED = 1 */
331 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
332
333 /* UNUSED = 2 */
334 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
335
336 /* OP_ACCESS = 3 */
337 {rfs4_op_access, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
338
339 /* OP_CLOSE = 4 */
340 {rfs4_op_close, nullfree, 0, NFS4_OP_CFH},
341
342 /* OP_COMMIT = 5 */
343 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
344
345 /* OP_CREATE = 6 */
346 {rfs4_op_create, nullfree, 0, NFS4_OP_CFH},
347
348 /* OP_DELEGPURGE = 7 */
349 {rfs4_op_delegpurge, nullfree, 0, NFS4_OP_NOFH},
350
351 /* OP_DELEGRETURN = 8 */
352 {rfs4_op_delegreturn, nullfree, 0, NFS4_OP_CFH},
353
354 /* OP_GETATTR = 9 */
355 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
356
357 /* OP_GETFH = 10 */
358 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL, NFS4_OP_CFH},
359
360 /* OP_LINK = 11 */
361 {rfs4_op_link, nullfree, 0, NFS4_OP_CFH},
362
363 /* OP_LOCK = 12 */
364 {rfs4_op_lock, lock_denied_free, 0, NFS4_OP_CFH},
365
366 /* OP_LOCKT = 13 */
367 {rfs4_op_lockt, lock_denied_free, 0, NFS4_OP_CFH},
368
369 /* OP_LOCKU = 14 */
370 {rfs4_op_locku, nullfree, 0, NFS4_OP_CFH},
371
372 /* OP_LOOKUP = 15 */
373 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
374 NFS4_OP_CFH},
375
376 /* OP_LOOKUPP = 16 */
377 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
378 NFS4_OP_CFH},
379
380 /* OP_NVERIFY = 17 */
381 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
382
383 /* OP_OPEN = 18 */
384 {rfs4_op_open, rfs4_free_reply, 0, NFS4_OP_CFH},
385
386 /* OP_OPENATTR = 19 */
387 {rfs4_op_openattr, nullfree, 0, NFS4_OP_CFH},
388
389 /* OP_OPEN_CONFIRM = 20 */
390 {rfs4_op_open_confirm, nullfree, 0, NFS4_OP_CFH},
391
392 /* OP_OPEN_DOWNGRADE = 21 */
393 {rfs4_op_open_downgrade, nullfree, 0, NFS4_OP_CFH},
394
395 /* OP_OPEN_PUTFH = 22 */
396 {rfs4_op_putfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
397
398 /* OP_PUTPUBFH = 23 */
399 {rfs4_op_putpubfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
400
401 /* OP_PUTROOTFH = 24 */
402 {rfs4_op_putrootfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
403
404 /* OP_READ = 25 */
405 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
406
407 /* OP_READDIR = 26 */
408 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
409
410 /* OP_READLINK = 27 */
411 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
412
413 /* OP_REMOVE = 28 */
414 {rfs4_op_remove, nullfree, 0, NFS4_OP_CFH},
415
416 /* OP_RENAME = 29 */
417 {rfs4_op_rename, nullfree, 0, NFS4_OP_CFH},
418
419 /* OP_RENEW = 30 */
420 {rfs4_op_renew, nullfree, 0, NFS4_OP_NOFH},
421
422 /* OP_RESTOREFH = 31 */
423 {rfs4_op_restorefh, nullfree, RPC_ALL, NFS4_OP_SFH},
424
425 /* OP_SAVEFH = 32 */
426 {rfs4_op_savefh, nullfree, RPC_ALL, NFS4_OP_CFH},
427
428 /* OP_SECINFO = 33 */
429 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0, NFS4_OP_CFH},
430
431 /* OP_SETATTR = 34 */
432 {rfs4_op_setattr, nullfree, 0, NFS4_OP_CFH},
433
434 /* OP_SETCLIENTID = 35 */
435 {rfs4_op_setclientid, nullfree, 0, NFS4_OP_NOFH},
436
437 /* OP_SETCLIENTID_CONFIRM = 36 */
438 {rfs4_op_setclientid_confirm, nullfree, 0, NFS4_OP_NOFH},
439
440 /* OP_VERIFY = 37 */
441 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
442
443 /* OP_WRITE = 38 */
444 {rfs4_op_write, nullfree, 0, NFS4_OP_CFH},
445
446 /* OP_RELEASE_LOCKOWNER = 39 */
447 {rfs4_op_release_lockowner, nullfree, 0, NFS4_OP_NOFH},
448 };
449
450 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
451
452 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
453
454 #ifdef DEBUG
455
456 int rfs4_fillone_debug = 0;
457 int rfs4_no_stub_access = 1;
458 int rfs4_rddir_debug = 0;
459
460 static char *rfs4_op_string[] = {
461 "rfs4_op_null",
462 "rfs4_op_1 unused",
463 "rfs4_op_2 unused",
464 "rfs4_op_access",
465 "rfs4_op_close",
466 "rfs4_op_commit",
467 "rfs4_op_create",
468 "rfs4_op_delegpurge",
469 "rfs4_op_delegreturn",
470 "rfs4_op_getattr",
471 "rfs4_op_getfh",
472 "rfs4_op_link",
473 "rfs4_op_lock",
474 "rfs4_op_lockt",
475 "rfs4_op_locku",
476 "rfs4_op_lookup",
477 "rfs4_op_lookupp",
478 "rfs4_op_nverify",
479 "rfs4_op_open",
480 "rfs4_op_openattr",
481 "rfs4_op_open_confirm",
482 "rfs4_op_open_downgrade",
483 "rfs4_op_putfh",
484 "rfs4_op_putpubfh",
485 "rfs4_op_putrootfh",
486 "rfs4_op_read",
487 "rfs4_op_readdir",
488 "rfs4_op_readlink",
489 "rfs4_op_remove",
490 "rfs4_op_rename",
491 "rfs4_op_renew",
492 "rfs4_op_restorefh",
493 "rfs4_op_savefh",
494 "rfs4_op_secinfo",
495 "rfs4_op_setattr",
496 "rfs4_op_setclientid",
497 "rfs4_op_setclient_confirm",
498 "rfs4_op_verify",
499 "rfs4_op_write",
500 "rfs4_op_release_lockowner",
501 "rfs4_op_illegal"
502 };
503 #endif
504
505 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
506
507 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
508
509 extern void rfs4_free_fs_locations4(fs_locations4 *);
510
511 #ifdef nextdp
512 #undef nextdp
513 #endif
514 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
515
516 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
517 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
518 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
519 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
520 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
521 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
522 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
523 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
524 NULL, NULL
525 };
526 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
527 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
528 VOPNAME_READ, { .femop_read = deleg_wr_read },
529 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
530 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
531 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
532 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
533 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
534 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
535 NULL, NULL
536 };
537
538 /* ARGSUSED */
539 static void *
540 rfs4_zone_init(zoneid_t zoneid)
541 {
542 nfs4_srv_t *nsrv4;
543 timespec32_t verf;
544
545 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
546
547 /*
548 * The following algorithm attempts to find a unique verifier
549 * to be used as the write verifier returned from the server
550 * to the client. It is important that this verifier change
551 * whenever the server reboots. Of secondary importance, it
552 * is important for the verifier to be unique between two
553 * different servers.
554 *
555 * Thus, an attempt is made to use the system hostid and the
556 * current time in seconds when the nfssrv kernel module is
557 * loaded. It is assumed that an NFS server will not be able
558 * to boot and then to reboot in less than a second. If the
559 * hostid has not been set, then the current high resolution
560 * time is used. This will ensure different verifiers each
561 * time the server reboots and minimize the chances that two
562 * different servers will have the same verifier.
563 * XXX - this is broken on LP64 kernels.
564 */
565 verf.tv_sec = (time_t)zone_get_hostid(NULL);
566 if (verf.tv_sec != 0) {
567 verf.tv_nsec = gethrestime_sec();
568 } else {
569 timespec_t tverf;
570
571 gethrestime(&tverf);
572 verf.tv_sec = (time_t)tverf.tv_sec;
573 verf.tv_nsec = tverf.tv_nsec;
574 }
575 nsrv4->write4verf = *(uint64_t *)&verf;
576
577 /* Used to manage create/destroy of server state */
578 nsrv4->nfs4_server_state = NULL;
579 nsrv4->nfs4_cur_servinst = NULL;
580 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
581 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
582 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
583 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
584 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
585
586 return (nsrv4);
587 }
588
589 /* ARGSUSED */
590 static void
591 rfs4_zone_fini(zoneid_t zoneid, void *data)
592 {
593 nfs4_srv_t *nsrv4 = data;
594
595 mutex_destroy(&nsrv4->deleg_lock);
596 mutex_destroy(&nsrv4->state_lock);
597 mutex_destroy(&nsrv4->servinst_lock);
598 rw_destroy(&nsrv4->deleg_policy_lock);
599
600 kmem_free(nsrv4, sizeof (*nsrv4));
601 }
602
603 void
604 rfs4_srvrinit(void)
605 {
606 extern void rfs4_attr_init();
607
608 zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
609
610 rfs4_attr_init();
611
612
613 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
614 rfs4_disable_delegation();
615 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
616 &deleg_wrops) != 0) {
617 rfs4_disable_delegation();
618 fem_free(deleg_rdops);
619 }
620
621 nfs4_srv_caller_id = fs_new_caller_id();
622 lockt_sysid = lm_alloc_sysidt();
623 vsd_create(&nfs4_srv_vkey, NULL);
624 rfs4_state_g_init();
625 }
626
627 void
628 rfs4_srvrfini(void)
629 {
630 if (lockt_sysid != LM_NOSYSID) {
631 lm_free_sysidt(lockt_sysid);
632 lockt_sysid = LM_NOSYSID;
633 }
634
635 rfs4_state_g_fini();
636
637 fem_free(deleg_rdops);
638 fem_free(deleg_wrops);
639
640 (void) zone_key_delete(rfs4_zone_key);
641 }
642
643 void
644 rfs4_do_server_start(int server_upordown,
645 int srv_delegation, int cluster_booted)
646 {
647 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
648
649 /* Is this a warm start? */
650 if (server_upordown == NFS_SERVER_QUIESCED) {
651 cmn_err(CE_NOTE, "nfs4_srv: "
652 "server was previously quiesced; "
653 "existing NFSv4 state will be re-used");
654
655 /*
656 * HA-NFSv4: this is also the signal
657 * that a Resource Group failover has
658 * occurred.
659 */
660 if (cluster_booted)
661 hanfsv4_failover(nsrv4);
662 } else {
663 /* Cold start */
664 nsrv4->rfs4_start_time = 0;
665 rfs4_state_zone_init(nsrv4);
666 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
667 nfs4_drc_hash);
668 }
669
670 /* Check if delegation is to be enabled */
671 if (srv_delegation != FALSE)
672 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
673 }
674
675 void
676 rfs4_init_compound_state(struct compound_state *cs)
677 {
678 bzero(cs, sizeof (*cs));
679 cs->cont = TRUE;
680 cs->access = CS_ACCESS_DENIED;
681 cs->deleg = FALSE;
682 cs->mandlock = FALSE;
683 cs->fh.nfs_fh4_val = cs->fhbuf;
684 cs->statusp = NULL;
685 }
686
687 void
688 rfs4_grace_start(rfs4_servinst_t *sip)
689 {
690 rw_enter(&sip->rwlock, RW_WRITER);
691 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
692 sip->grace_period = rfs4_grace_period;
693 rw_exit(&sip->rwlock);
694 }
695
696 /*
697 * returns true if the instance's grace period has never been started
698 */
699 int
700 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
701 {
702 time_t start_time;
703
704 rw_enter(&sip->rwlock, RW_READER);
705 start_time = sip->start_time;
706 rw_exit(&sip->rwlock);
707
708 return (start_time == 0);
709 }
710
711 /*
712 * Indicates if server instance is within the
713 * grace period.
714 */
715 int
716 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
717 {
718 time_t grace_expiry;
719
720 rw_enter(&sip->rwlock, RW_READER);
721 grace_expiry = sip->start_time + sip->grace_period;
722 rw_exit(&sip->rwlock);
723
724 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
725 }
726
727 int
728 rfs4_clnt_in_grace(rfs4_client_t *cp)
729 {
730 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
731
732 return (rfs4_servinst_in_grace(cp->rc_server_instance));
733 }
734
735 /*
736 * reset all currently active grace periods
737 */
738 void
739 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
740 {
741 rfs4_servinst_t *sip;
742
743 mutex_enter(&nsrv4->servinst_lock);
744 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
745 if (rfs4_servinst_in_grace(sip))
746 rfs4_grace_start(sip);
747 mutex_exit(&nsrv4->servinst_lock);
748 }
749
750 /*
751 * start any new instances' grace periods
752 */
753 void
754 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
755 {
756 rfs4_servinst_t *sip;
757
758 mutex_enter(&nsrv4->servinst_lock);
759 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
760 if (rfs4_servinst_grace_new(sip))
761 rfs4_grace_start(sip);
762 mutex_exit(&nsrv4->servinst_lock);
763 }
764
765 static rfs4_dss_path_t *
766 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
767 char *path, unsigned index)
768 {
769 size_t len;
770 rfs4_dss_path_t *dss_path;
771
772 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
773
774 /*
775 * Take a copy of the string, since the original may be overwritten.
776 * Sadly, no strdup() in the kernel.
777 */
778 /* allow for NUL */
779 len = strlen(path) + 1;
780 dss_path->path = kmem_alloc(len, KM_SLEEP);
781 (void) strlcpy(dss_path->path, path, len);
782
783 /* associate with servinst */
784 dss_path->sip = sip;
785 dss_path->index = index;
786
787 /*
788 * Add to list of served paths.
789 * No locking required, as we're only ever called at startup.
790 */
791 if (nsrv4->dss_pathlist == NULL) {
792 /* this is the first dss_path_t */
793
794 /* needed for insque/remque */
795 dss_path->next = dss_path->prev = dss_path;
796
797 nsrv4->dss_pathlist = dss_path;
798 } else {
799 insque(dss_path, nsrv4->dss_pathlist);
800 }
801
802 return (dss_path);
803 }
804
805 /*
806 * Create a new server instance, and make it the currently active instance.
807 * Note that starting the grace period too early will reduce the clients'
808 * recovery window.
809 */
810 void
811 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
812 int dss_npaths, char **dss_paths)
813 {
814 unsigned i;
815 rfs4_servinst_t *sip;
816 rfs4_oldstate_t *oldstate;
817
818 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
819 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
820
821 sip->start_time = (time_t)0;
822 sip->grace_period = (time_t)0;
823 sip->next = NULL;
824 sip->prev = NULL;
825
826 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
827 /*
828 * This initial dummy entry is required to setup for insque/remque.
829 * It must be skipped over whenever the list is traversed.
830 */
831 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
832 /* insque/remque require initial list entry to be self-terminated */
833 oldstate->next = oldstate;
834 oldstate->prev = oldstate;
835 sip->oldstate = oldstate;
836
837
838 sip->dss_npaths = dss_npaths;
839 sip->dss_paths = kmem_alloc(dss_npaths *
840 sizeof (rfs4_dss_path_t *), KM_SLEEP);
841
842 for (i = 0; i < dss_npaths; i++) {
843 /* CSTYLED */
844 sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
845 }
846
847 mutex_enter(&nsrv4->servinst_lock);
848 if (nsrv4->nfs4_cur_servinst != NULL) {
849 /* add to linked list */
850 sip->prev = nsrv4->nfs4_cur_servinst;
851 nsrv4->nfs4_cur_servinst->next = sip;
852 }
853 if (start_grace)
854 rfs4_grace_start(sip);
855 /* make the new instance "current" */
856 nsrv4->nfs4_cur_servinst = sip;
857
858 mutex_exit(&nsrv4->servinst_lock);
859 }
860
861 /*
862 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
863 * all instances directly.
864 */
865 void
866 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
867 {
868 rfs4_servinst_t *sip, *prev, *current;
869 #ifdef DEBUG
870 int n = 0;
871 #endif
872
873 mutex_enter(&nsrv4->servinst_lock);
874 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
875 current = nsrv4->nfs4_cur_servinst;
876 nsrv4->nfs4_cur_servinst = NULL;
877 for (sip = current; sip != NULL; sip = prev) {
878 prev = sip->prev;
879 rw_destroy(&sip->rwlock);
880 if (sip->oldstate)
881 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
882 if (sip->dss_paths)
883 kmem_free(sip->dss_paths,
884 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
885 kmem_free(sip, sizeof (rfs4_servinst_t));
886 #ifdef DEBUG
887 n++;
888 #endif
889 }
890 mutex_exit(&nsrv4->servinst_lock);
891 }
892
893 /*
894 * Assign the current server instance to a client_t.
895 * Should be called with cp->rc_dbe held.
896 */
897 void
898 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
899 rfs4_servinst_t *sip)
900 {
901 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
902
903 /*
904 * The lock ensures that if the current instance is in the process
905 * of changing, we will see the new one.
906 */
907 mutex_enter(&nsrv4->servinst_lock);
908 cp->rc_server_instance = sip;
909 mutex_exit(&nsrv4->servinst_lock);
910 }
911
912 rfs4_servinst_t *
913 rfs4_servinst(rfs4_client_t *cp)
914 {
915 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
916
917 return (cp->rc_server_instance);
918 }
919
920 /* ARGSUSED */
921 static void
922 nullfree(caddr_t resop)
923 {
924 }
925
926 /*
927 * This is a fall-through for invalid or not implemented (yet) ops
928 */
929 /* ARGSUSED */
930 static void
931 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
932 struct compound_state *cs)
933 {
934 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
935 }
936
937 /*
938 * Check if the security flavor, nfsnum, is in the flavor_list.
939 */
940 bool_t
941 in_flavor_list(int nfsnum, int *flavor_list, int count)
942 {
943 int i;
944
945 for (i = 0; i < count; i++) {
946 if (nfsnum == flavor_list[i])
947 return (TRUE);
948 }
949 return (FALSE);
950 }
951
952 /*
953 * Used by rfs4_op_secinfo to get the security information from the
954 * export structure associated with the component.
955 */
956 /* ARGSUSED */
957 static nfsstat4
958 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
959 {
960 int error, different_export = 0;
961 vnode_t *dvp, *vp;
962 struct exportinfo *exi = NULL;
963 fid_t fid;
964 uint_t count, i;
965 secinfo4 *resok_val;
966 struct secinfo *secp;
967 seconfig_t *si;
968 bool_t did_traverse = FALSE;
969 int dotdot, walk;
970 nfs_export_t *ne = nfs_get_export();
971
972 dvp = cs->vp;
973 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
974
975 /*
976 * If dotdotting, then need to check whether it's above the
977 * root of a filesystem, or above an export point.
978 */
979 if (dotdot) {
980
981 /*
982 * If dotdotting at the root of a filesystem, then
983 * need to traverse back to the mounted-on filesystem
984 * and do the dotdot lookup there.
985 */
986 if (cs->vp->v_flag & VROOT) {
987
988 /*
989 * If at the system root, then can
990 * go up no further.
991 */
992 if (VN_CMP(dvp, ZONE_ROOTVP()))
993 return (puterrno4(ENOENT));
994
995 /*
996 * Traverse back to the mounted-on filesystem
997 */
998 dvp = untraverse(cs->vp);
999
1000 /*
1001 * Set the different_export flag so we remember
1002 * to pick up a new exportinfo entry for
1003 * this new filesystem.
1004 */
1005 different_export = 1;
1006 } else {
1007
1008 /*
1009 * If dotdotting above an export point then set
1010 * the different_export to get new export info.
1011 */
1012 different_export = nfs_exported(cs->exi, cs->vp);
1013 }
1014 }
1015
1016 /*
1017 * Get the vnode for the component "nm".
1018 */
1019 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1020 NULL, NULL, NULL);
1021 if (error)
1022 return (puterrno4(error));
1023
1024 /*
1025 * If the vnode is in a pseudo filesystem, or if the security flavor
1026 * used in the request is valid but not an explicitly shared flavor,
1027 * or the access bit indicates that this is a limited access,
1028 * check whether this vnode is visible.
1029 */
1030 if (!different_export &&
1031 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1032 cs->access & CS_ACCESS_LIMITED)) {
1033 if (! nfs_visible(cs->exi, vp, &different_export)) {
1034 VN_RELE(vp);
1035 return (puterrno4(ENOENT));
1036 }
1037 }
1038
1039 /*
1040 * If it's a mountpoint, then traverse it.
1041 */
1042 if (vn_ismntpt(vp)) {
1043 if ((error = traverse(&vp)) != 0) {
1044 VN_RELE(vp);
1045 return (puterrno4(error));
1046 }
1047 /* remember that we had to traverse mountpoint */
1048 did_traverse = TRUE;
1049 different_export = 1;
1050 } else if (vp->v_vfsp != dvp->v_vfsp) {
1051 /*
1052 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1053 * then vp is probably an LOFS object. We don't need the
1054 * realvp, we just need to know that we might have crossed
1055 * a server fs boundary and need to call checkexport4.
1056 * (LOFS lookup hides server fs mountpoints, and actually calls
1057 * traverse)
1058 */
1059 different_export = 1;
1060 }
1061
1062 /*
1063 * Get the export information for it.
1064 */
1065 if (different_export) {
1066
1067 bzero(&fid, sizeof (fid));
1068 fid.fid_len = MAXFIDSZ;
1069 error = vop_fid_pseudo(vp, &fid);
1070 if (error) {
1071 VN_RELE(vp);
1072 return (puterrno4(error));
1073 }
1074
1075 if (dotdot)
1076 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1077 else
1078 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1079
1080 if (exi == NULL) {
1081 if (did_traverse == TRUE) {
1082 /*
1083 * If this vnode is a mounted-on vnode,
1084 * but the mounted-on file system is not
1085 * exported, send back the secinfo for
1086 * the exported node that the mounted-on
1087 * vnode lives in.
1088 */
1089 exi = cs->exi;
1090 } else {
1091 VN_RELE(vp);
1092 return (puterrno4(EACCES));
1093 }
1094 }
1095 } else {
1096 exi = cs->exi;
1097 }
1098 ASSERT(exi != NULL);
1099
1100
1101 /*
1102 * Create the secinfo result based on the security information
1103 * from the exportinfo structure (exi).
1104 *
1105 * Return all flavors for a pseudo node.
1106 * For a real export node, return the flavor that the client
1107 * has access with.
1108 */
1109 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1110 if (PSEUDO(exi)) {
1111 count = exi->exi_export.ex_seccnt; /* total sec count */
1112 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1113 secp = exi->exi_export.ex_secinfo;
1114
1115 for (i = 0; i < count; i++) {
1116 si = &secp[i].s_secinfo;
1117 resok_val[i].flavor = si->sc_rpcnum;
1118 if (resok_val[i].flavor == RPCSEC_GSS) {
1119 rpcsec_gss_info *info;
1120
1121 info = &resok_val[i].flavor_info;
1122 info->qop = si->sc_qop;
1123 info->service = (rpc_gss_svc_t)si->sc_service;
1124
1125 /* get oid opaque data */
1126 info->oid.sec_oid4_len =
1127 si->sc_gss_mech_type->length;
1128 info->oid.sec_oid4_val = kmem_alloc(
1129 si->sc_gss_mech_type->length, KM_SLEEP);
1130 bcopy(
1131 si->sc_gss_mech_type->elements,
1132 info->oid.sec_oid4_val,
1133 info->oid.sec_oid4_len);
1134 }
1135 }
1136 resp->SECINFO4resok_len = count;
1137 resp->SECINFO4resok_val = resok_val;
1138 } else {
1139 int ret_cnt = 0, k = 0;
1140 int *flavor_list;
1141
1142 count = exi->exi_export.ex_seccnt; /* total sec count */
1143 secp = exi->exi_export.ex_secinfo;
1144
1145 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1146 /* find out which flavors to return */
1147 for (i = 0; i < count; i ++) {
1148 int access, flavor, perm;
1149
1150 flavor = secp[i].s_secinfo.sc_nfsnum;
1151 perm = secp[i].s_flags;
1152
1153 access = nfsauth4_secinfo_access(exi, cs->req,
1154 flavor, perm, cs->basecr);
1155
1156 if (! (access & NFSAUTH_DENIED) &&
1157 ! (access & NFSAUTH_WRONGSEC)) {
1158 flavor_list[ret_cnt] = flavor;
1159 ret_cnt++;
1160 }
1161 }
1162
1163 /* Create the returning SECINFO value */
1164 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1165
1166 for (i = 0; i < count; i++) {
1167 /*
1168 * If the flavor is in the flavor list,
1169 * fill in resok_val.
1170 */
1171 si = &secp[i].s_secinfo;
1172 if (in_flavor_list(si->sc_nfsnum,
1173 flavor_list, ret_cnt)) {
1174 resok_val[k].flavor = si->sc_rpcnum;
1175 if (resok_val[k].flavor == RPCSEC_GSS) {
1176 rpcsec_gss_info *info;
1177
1178 info = &resok_val[k].flavor_info;
1179 info->qop = si->sc_qop;
1180 info->service = (rpc_gss_svc_t)
1181 si->sc_service;
1182
1183 /* get oid opaque data */
1184 info->oid.sec_oid4_len =
1185 si->sc_gss_mech_type->length;
1186 info->oid.sec_oid4_val = kmem_alloc(
1187 si->sc_gss_mech_type->length,
1188 KM_SLEEP);
1189 bcopy(si->sc_gss_mech_type->elements,
1190 info->oid.sec_oid4_val,
1191 info->oid.sec_oid4_len);
1192 }
1193 k++;
1194 }
1195 if (k >= ret_cnt)
1196 break;
1197 }
1198 resp->SECINFO4resok_len = ret_cnt;
1199 resp->SECINFO4resok_val = resok_val;
1200 kmem_free(flavor_list, count * sizeof (int));
1201 }
1202
1203 VN_RELE(vp);
1204 return (NFS4_OK);
1205 }
1206
1207 /*
1208 * SECINFO (Operation 33): Obtain required security information on
1209 * the component name in the format of (security-mechanism-oid, qop, service)
1210 * triplets.
1211 */
1212 /* ARGSUSED */
1213 static void
1214 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1215 struct compound_state *cs)
1216 {
1217 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1218 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1219 utf8string *utfnm = &args->name;
1220 uint_t len;
1221 char *nm;
1222 struct sockaddr *ca;
1223 char *name = NULL;
1224 nfsstat4 status = NFS4_OK;
1225
1226 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1227 SECINFO4args *, args);
1228
1229 /*
1230 * Current file handle (cfh) should have been set before getting
1231 * into this function. If not, return error.
1232 */
1233 if (cs->vp == NULL) {
1234 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1235 goto out;
1236 }
1237
1238 if (cs->vp->v_type != VDIR) {
1239 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1240 goto out;
1241 }
1242
1243 /*
1244 * Verify the component name. If failed, error out, but
1245 * do not error out if the component name is a "..".
1246 * SECINFO will return its parents secinfo data for SECINFO "..".
1247 */
1248 status = utf8_dir_verify(utfnm);
1249 if (status != NFS4_OK) {
1250 if (utfnm->utf8string_len != 2 ||
1251 utfnm->utf8string_val[0] != '.' ||
1252 utfnm->utf8string_val[1] != '.') {
1253 *cs->statusp = resp->status = status;
1254 goto out;
1255 }
1256 }
1257
1258 nm = utf8_to_str(utfnm, &len, NULL);
1259 if (nm == NULL) {
1260 *cs->statusp = resp->status = NFS4ERR_INVAL;
1261 goto out;
1262 }
1263
1264 if (len > MAXNAMELEN) {
1265 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1266 kmem_free(nm, len);
1267 goto out;
1268 }
1269
1270 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1271 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1272 MAXPATHLEN + 1);
1273
1274 if (name == NULL) {
1275 *cs->statusp = resp->status = NFS4ERR_INVAL;
1276 kmem_free(nm, len);
1277 goto out;
1278 }
1279
1280
1281 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1282
1283 if (name != nm)
1284 kmem_free(name, MAXPATHLEN + 1);
1285 kmem_free(nm, len);
1286
1287 out:
1288 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1289 SECINFO4res *, resp);
1290 }
1291
1292 /*
1293 * Free SECINFO result.
1294 */
1295 /* ARGSUSED */
1296 static void
1297 rfs4_op_secinfo_free(nfs_resop4 *resop)
1298 {
1299 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1300 int count, i;
1301 secinfo4 *resok_val;
1302
1303 /* If this is not an Ok result, nothing to free. */
1304 if (resp->status != NFS4_OK) {
1305 return;
1306 }
1307
1308 count = resp->SECINFO4resok_len;
1309 resok_val = resp->SECINFO4resok_val;
1310
1311 for (i = 0; i < count; i++) {
1312 if (resok_val[i].flavor == RPCSEC_GSS) {
1313 rpcsec_gss_info *info;
1314
1315 info = &resok_val[i].flavor_info;
1316 kmem_free(info->oid.sec_oid4_val,
1317 info->oid.sec_oid4_len);
1318 }
1319 }
1320 kmem_free(resok_val, count * sizeof (secinfo4));
1321 resp->SECINFO4resok_len = 0;
1322 resp->SECINFO4resok_val = NULL;
1323 }
1324
1325 /* ARGSUSED */
1326 static void
1327 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1328 struct compound_state *cs)
1329 {
1330 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1331 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1332 int error;
1333 vnode_t *vp;
1334 struct vattr va;
1335 int checkwriteperm;
1336 cred_t *cr = cs->cr;
1337 bslabel_t *clabel, *slabel;
1338 ts_label_t *tslabel;
1339 boolean_t admin_low_client;
1340
1341 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1342 ACCESS4args *, args);
1343
1344 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1345 if (cs->access == CS_ACCESS_DENIED) {
1346 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1347 goto out;
1348 }
1349 #endif
1350 if (cs->vp == NULL) {
1351 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1352 goto out;
1353 }
1354
1355 ASSERT(cr != NULL);
1356
1357 vp = cs->vp;
1358
1359 /*
1360 * If the file system is exported read only, it is not appropriate
1361 * to check write permissions for regular files and directories.
1362 * Special files are interpreted by the client, so the underlying
1363 * permissions are sent back to the client for interpretation.
1364 */
1365 if (rdonly4(req, cs) &&
1366 (vp->v_type == VREG || vp->v_type == VDIR))
1367 checkwriteperm = 0;
1368 else
1369 checkwriteperm = 1;
1370
1371 /*
1372 * XXX
1373 * We need the mode so that we can correctly determine access
1374 * permissions relative to a mandatory lock file. Access to
1375 * mandatory lock files is denied on the server, so it might
1376 * as well be reflected to the server during the open.
1377 */
1378 va.va_mask = AT_MODE;
1379 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1380 if (error) {
1381 *cs->statusp = resp->status = puterrno4(error);
1382 goto out;
1383 }
1384 resp->access = 0;
1385 resp->supported = 0;
1386
1387 if (is_system_labeled()) {
1388 ASSERT(req->rq_label != NULL);
1389 clabel = req->rq_label;
1390 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1391 "got client label from request(1)",
1392 struct svc_req *, req);
1393 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1394 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1395 *cs->statusp = resp->status = puterrno4(EACCES);
1396 goto out;
1397 }
1398 slabel = label2bslabel(tslabel);
1399 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1400 char *, "got server label(1) for vp(2)",
1401 bslabel_t *, slabel, vnode_t *, vp);
1402
1403 admin_low_client = B_FALSE;
1404 } else
1405 admin_low_client = B_TRUE;
1406 }
1407
1408 if (args->access & ACCESS4_READ) {
1409 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1410 if (!error && !MANDLOCK(vp, va.va_mode) &&
1411 (!is_system_labeled() || admin_low_client ||
1412 bldominates(clabel, slabel)))
1413 resp->access |= ACCESS4_READ;
1414 resp->supported |= ACCESS4_READ;
1415 }
1416 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1417 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1418 if (!error && (!is_system_labeled() || admin_low_client ||
1419 bldominates(clabel, slabel)))
1420 resp->access |= ACCESS4_LOOKUP;
1421 resp->supported |= ACCESS4_LOOKUP;
1422 }
1423 if (checkwriteperm &&
1424 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1425 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1426 if (!error && !MANDLOCK(vp, va.va_mode) &&
1427 (!is_system_labeled() || admin_low_client ||
1428 blequal(clabel, slabel)))
1429 resp->access |=
1430 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1431 resp->supported |=
1432 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1433 }
1434
1435 if (checkwriteperm &&
1436 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1437 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1438 if (!error && (!is_system_labeled() || admin_low_client ||
1439 blequal(clabel, slabel)))
1440 resp->access |= ACCESS4_DELETE;
1441 resp->supported |= ACCESS4_DELETE;
1442 }
1443 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1444 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1445 if (!error && !MANDLOCK(vp, va.va_mode) &&
1446 (!is_system_labeled() || admin_low_client ||
1447 bldominates(clabel, slabel)))
1448 resp->access |= ACCESS4_EXECUTE;
1449 resp->supported |= ACCESS4_EXECUTE;
1450 }
1451
1452 if (is_system_labeled() && !admin_low_client)
1453 label_rele(tslabel);
1454
1455 *cs->statusp = resp->status = NFS4_OK;
1456 out:
1457 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1458 ACCESS4res *, resp);
1459 }
1460
1461 /* ARGSUSED */
1462 static void
1463 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1464 struct compound_state *cs)
1465 {
1466 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1467 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1468 int error;
1469 vnode_t *vp = cs->vp;
1470 cred_t *cr = cs->cr;
1471 vattr_t va;
1472 nfs4_srv_t *nsrv4;
1473
1474 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1475 COMMIT4args *, args);
1476
1477 if (vp == NULL) {
1478 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1479 goto out;
1480 }
1481 if (cs->access == CS_ACCESS_DENIED) {
1482 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1483 goto out;
1484 }
1485
1486 if (args->offset + args->count < args->offset) {
1487 *cs->statusp = resp->status = NFS4ERR_INVAL;
1488 goto out;
1489 }
1490
1491 va.va_mask = AT_UID;
1492 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1493
1494 /*
1495 * If we can't get the attributes, then we can't do the
1496 * right access checking. So, we'll fail the request.
1497 */
1498 if (error) {
1499 *cs->statusp = resp->status = puterrno4(error);
1500 goto out;
1501 }
1502 if (rdonly4(req, cs)) {
1503 *cs->statusp = resp->status = NFS4ERR_ROFS;
1504 goto out;
1505 }
1506
1507 if (vp->v_type != VREG) {
1508 if (vp->v_type == VDIR)
1509 resp->status = NFS4ERR_ISDIR;
1510 else
1511 resp->status = NFS4ERR_INVAL;
1512 *cs->statusp = resp->status;
1513 goto out;
1514 }
1515
1516 if (crgetuid(cr) != va.va_uid &&
1517 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1518 *cs->statusp = resp->status = puterrno4(error);
1519 goto out;
1520 }
1521
1522 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1523
1524 if (error) {
1525 *cs->statusp = resp->status = puterrno4(error);
1526 goto out;
1527 }
1528
1529 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1530 *cs->statusp = resp->status = NFS4_OK;
1531 resp->writeverf = nsrv4->write4verf;
1532 out:
1533 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1534 COMMIT4res *, resp);
1535 }
1536
1537 /*
1538 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1539 * was completed. It does the nfsv4 create for special files.
1540 */
1541 /* ARGSUSED */
1542 static vnode_t *
1543 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1544 struct compound_state *cs, vattr_t *vap, char *nm)
1545 {
1546 int error;
1547 cred_t *cr = cs->cr;
1548 vnode_t *dvp = cs->vp;
1549 vnode_t *vp = NULL;
1550 int mode;
1551 enum vcexcl excl;
1552
1553 switch (args->type) {
1554 case NF4CHR:
1555 case NF4BLK:
1556 if (secpolicy_sys_devices(cr) != 0) {
1557 *cs->statusp = resp->status = NFS4ERR_PERM;
1558 return (NULL);
1559 }
1560 if (args->type == NF4CHR)
1561 vap->va_type = VCHR;
1562 else
1563 vap->va_type = VBLK;
1564 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1565 args->ftype4_u.devdata.specdata2);
1566 vap->va_mask |= AT_RDEV;
1567 break;
1568 case NF4SOCK:
1569 vap->va_type = VSOCK;
1570 break;
1571 case NF4FIFO:
1572 vap->va_type = VFIFO;
1573 break;
1574 default:
1575 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1576 return (NULL);
1577 }
1578
1579 /*
1580 * Must specify the mode.
1581 */
1582 if (!(vap->va_mask & AT_MODE)) {
1583 *cs->statusp = resp->status = NFS4ERR_INVAL;
1584 return (NULL);
1585 }
1586
1587 excl = EXCL;
1588
1589 mode = 0;
1590
1591 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1592 if (error) {
1593 *cs->statusp = resp->status = puterrno4(error);
1594 return (NULL);
1595 }
1596 return (vp);
1597 }
1598
1599 /*
1600 * nfsv4 create is used to create non-regular files. For regular files,
1601 * use nfsv4 open.
1602 */
1603 /* ARGSUSED */
1604 static void
1605 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1606 struct compound_state *cs)
1607 {
1608 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1609 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1610 int error;
1611 struct vattr bva, iva, iva2, ava, *vap;
1612 cred_t *cr = cs->cr;
1613 vnode_t *dvp = cs->vp;
1614 vnode_t *vp = NULL;
1615 vnode_t *realvp;
1616 char *nm, *lnm;
1617 uint_t len, llen;
1618 int syncval = 0;
1619 struct nfs4_svgetit_arg sarg;
1620 struct nfs4_ntov_table ntov;
1621 struct statvfs64 sb;
1622 nfsstat4 status;
1623 struct sockaddr *ca;
1624 char *name = NULL;
1625 char *lname = NULL;
1626
1627 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1628 CREATE4args *, args);
1629
1630 resp->attrset = 0;
1631
1632 if (dvp == NULL) {
1633 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1634 goto out;
1635 }
1636
1637 /*
1638 * If there is an unshared filesystem mounted on this vnode,
1639 * do not allow to create an object in this directory.
1640 */
1641 if (vn_ismntpt(dvp)) {
1642 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1643 goto out;
1644 }
1645
1646 /* Verify that type is correct */
1647 switch (args->type) {
1648 case NF4LNK:
1649 case NF4BLK:
1650 case NF4CHR:
1651 case NF4SOCK:
1652 case NF4FIFO:
1653 case NF4DIR:
1654 break;
1655 default:
1656 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1657 goto out;
1658 };
1659
1660 if (cs->access == CS_ACCESS_DENIED) {
1661 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1662 goto out;
1663 }
1664 if (dvp->v_type != VDIR) {
1665 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1666 goto out;
1667 }
1668 status = utf8_dir_verify(&args->objname);
1669 if (status != NFS4_OK) {
1670 *cs->statusp = resp->status = status;
1671 goto out;
1672 }
1673
1674 if (rdonly4(req, cs)) {
1675 *cs->statusp = resp->status = NFS4ERR_ROFS;
1676 goto out;
1677 }
1678
1679 /*
1680 * Name of newly created object
1681 */
1682 nm = utf8_to_fn(&args->objname, &len, NULL);
1683 if (nm == NULL) {
1684 *cs->statusp = resp->status = NFS4ERR_INVAL;
1685 goto out;
1686 }
1687
1688 if (len > MAXNAMELEN) {
1689 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1690 kmem_free(nm, len);
1691 goto out;
1692 }
1693
1694 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1695 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1696 MAXPATHLEN + 1);
1697
1698 if (name == NULL) {
1699 *cs->statusp = resp->status = NFS4ERR_INVAL;
1700 kmem_free(nm, len);
1701 goto out;
1702 }
1703
1704 resp->attrset = 0;
1705
1706 sarg.sbp = &sb;
1707 sarg.is_referral = B_FALSE;
1708 nfs4_ntov_table_init(&ntov);
1709
1710 status = do_rfs4_set_attrs(&resp->attrset,
1711 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1712
1713 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1714 status = NFS4ERR_INVAL;
1715
1716 if (status != NFS4_OK) {
1717 *cs->statusp = resp->status = status;
1718 if (name != nm)
1719 kmem_free(name, MAXPATHLEN + 1);
1720 kmem_free(nm, len);
1721 nfs4_ntov_table_free(&ntov, &sarg);
1722 resp->attrset = 0;
1723 goto out;
1724 }
1725
1726 /* Get "before" change value */
1727 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1728 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1729 if (error) {
1730 *cs->statusp = resp->status = puterrno4(error);
1731 if (name != nm)
1732 kmem_free(name, MAXPATHLEN + 1);
1733 kmem_free(nm, len);
1734 nfs4_ntov_table_free(&ntov, &sarg);
1735 resp->attrset = 0;
1736 goto out;
1737 }
1738 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1739
1740 vap = sarg.vap;
1741
1742 /*
1743 * Set the default initial values for attributes when the parent
1744 * directory does not have the VSUID/VSGID bit set and they have
1745 * not been specified in createattrs.
1746 */
1747 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1748 vap->va_uid = crgetuid(cr);
1749 vap->va_mask |= AT_UID;
1750 }
1751 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1752 vap->va_gid = crgetgid(cr);
1753 vap->va_mask |= AT_GID;
1754 }
1755
1756 vap->va_mask |= AT_TYPE;
1757 switch (args->type) {
1758 case NF4DIR:
1759 vap->va_type = VDIR;
1760 if ((vap->va_mask & AT_MODE) == 0) {
1761 vap->va_mode = 0700; /* default: owner rwx only */
1762 vap->va_mask |= AT_MODE;
1763 }
1764 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1765 if (error)
1766 break;
1767
1768 /*
1769 * Get the initial "after" sequence number, if it fails,
1770 * set to zero
1771 */
1772 iva.va_mask = AT_SEQ;
1773 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1774 iva.va_seq = 0;
1775 break;
1776 case NF4LNK:
1777 vap->va_type = VLNK;
1778 if ((vap->va_mask & AT_MODE) == 0) {
1779 vap->va_mode = 0700; /* default: owner rwx only */
1780 vap->va_mask |= AT_MODE;
1781 }
1782
1783 /*
1784 * symlink names must be treated as data
1785 */
1786 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1787 &llen, NULL);
1788
1789 if (lnm == NULL) {
1790 *cs->statusp = resp->status = NFS4ERR_INVAL;
1791 if (name != nm)
1792 kmem_free(name, MAXPATHLEN + 1);
1793 kmem_free(nm, len);
1794 nfs4_ntov_table_free(&ntov, &sarg);
1795 resp->attrset = 0;
1796 goto out;
1797 }
1798
1799 if (llen > MAXPATHLEN) {
1800 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1801 if (name != nm)
1802 kmem_free(name, MAXPATHLEN + 1);
1803 kmem_free(nm, len);
1804 kmem_free(lnm, llen);
1805 nfs4_ntov_table_free(&ntov, &sarg);
1806 resp->attrset = 0;
1807 goto out;
1808 }
1809
1810 lname = nfscmd_convname(ca, cs->exi, lnm,
1811 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1812
1813 if (lname == NULL) {
1814 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1815 if (name != nm)
1816 kmem_free(name, MAXPATHLEN + 1);
1817 kmem_free(nm, len);
1818 kmem_free(lnm, llen);
1819 nfs4_ntov_table_free(&ntov, &sarg);
1820 resp->attrset = 0;
1821 goto out;
1822 }
1823
1824 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1825 if (lname != lnm)
1826 kmem_free(lname, MAXPATHLEN + 1);
1827 kmem_free(lnm, llen);
1828 if (error)
1829 break;
1830
1831 /*
1832 * Get the initial "after" sequence number, if it fails,
1833 * set to zero
1834 */
1835 iva.va_mask = AT_SEQ;
1836 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1837 iva.va_seq = 0;
1838
1839 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1840 NULL, NULL, NULL);
1841 if (error)
1842 break;
1843
1844 /*
1845 * va_seq is not safe over VOP calls, check it again
1846 * if it has changed zero out iva to force atomic = FALSE.
1847 */
1848 iva2.va_mask = AT_SEQ;
1849 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1850 iva2.va_seq != iva.va_seq)
1851 iva.va_seq = 0;
1852 break;
1853 default:
1854 /*
1855 * probably a special file.
1856 */
1857 if ((vap->va_mask & AT_MODE) == 0) {
1858 vap->va_mode = 0600; /* default: owner rw only */
1859 vap->va_mask |= AT_MODE;
1860 }
1861 syncval = FNODSYNC;
1862 /*
1863 * We know this will only generate one VOP call
1864 */
1865 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1866
1867 if (vp == NULL) {
1868 if (name != nm)
1869 kmem_free(name, MAXPATHLEN + 1);
1870 kmem_free(nm, len);
1871 nfs4_ntov_table_free(&ntov, &sarg);
1872 resp->attrset = 0;
1873 goto out;
1874 }
1875
1876 /*
1877 * Get the initial "after" sequence number, if it fails,
1878 * set to zero
1879 */
1880 iva.va_mask = AT_SEQ;
1881 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1882 iva.va_seq = 0;
1883
1884 break;
1885 }
1886 if (name != nm)
1887 kmem_free(name, MAXPATHLEN + 1);
1888 kmem_free(nm, len);
1889
1890 if (error) {
1891 *cs->statusp = resp->status = puterrno4(error);
1892 }
1893
1894 /*
1895 * Force modified data and metadata out to stable storage.
1896 */
1897 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1898
1899 if (resp->status != NFS4_OK) {
1900 if (vp != NULL)
1901 VN_RELE(vp);
1902 nfs4_ntov_table_free(&ntov, &sarg);
1903 resp->attrset = 0;
1904 goto out;
1905 }
1906
1907 /*
1908 * Finish setup of cinfo response, "before" value already set.
1909 * Get "after" change value, if it fails, simply return the
1910 * before value.
1911 */
1912 ava.va_mask = AT_CTIME|AT_SEQ;
1913 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1914 ava.va_ctime = bva.va_ctime;
1915 ava.va_seq = 0;
1916 }
1917 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1918
1919 /*
1920 * True verification that object was created with correct
1921 * attrs is impossible. The attrs could have been changed
1922 * immediately after object creation. If attributes did
1923 * not verify, the only recourse for the server is to
1924 * destroy the object. Maybe if some attrs (like gid)
1925 * are set incorrectly, the object should be destroyed;
1926 * however, seems bad as a default policy. Do we really
1927 * want to destroy an object over one of the times not
1928 * verifying correctly? For these reasons, the server
1929 * currently sets bits in attrset for createattrs
1930 * that were set; however, no verification is done.
1931 *
1932 * vmask_to_nmask accounts for vattr bits set on create
1933 * [do_rfs4_set_attrs() only sets resp bits for
1934 * non-vattr/vfs bits.]
1935 * Mask off any bits set by default so as not to return
1936 * more attrset bits than were requested in createattrs
1937 */
1938 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1939 resp->attrset &= args->createattrs.attrmask;
1940 nfs4_ntov_table_free(&ntov, &sarg);
1941
1942 error = makefh4(&cs->fh, vp, cs->exi);
1943 if (error) {
1944 *cs->statusp = resp->status = puterrno4(error);
1945 }
1946
1947 /*
1948 * The cinfo.atomic = TRUE only if we got no errors, we have
1949 * non-zero va_seq's, and it has incremented by exactly one
1950 * during the creation and it didn't change during the VOP_LOOKUP
1951 * or VOP_FSYNC.
1952 */
1953 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1954 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1955 resp->cinfo.atomic = TRUE;
1956 else
1957 resp->cinfo.atomic = FALSE;
1958
1959 /*
1960 * Force modified metadata out to stable storage.
1961 *
1962 * if a underlying vp exists, pass it to VOP_FSYNC
1963 */
1964 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1965 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1966 else
1967 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1968
1969 if (resp->status != NFS4_OK) {
1970 VN_RELE(vp);
1971 goto out;
1972 }
1973 if (cs->vp)
1974 VN_RELE(cs->vp);
1975
1976 cs->vp = vp;
1977 *cs->statusp = resp->status = NFS4_OK;
1978 out:
1979 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1980 CREATE4res *, resp);
1981 }
1982
1983 /*ARGSUSED*/
1984 static void
1985 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1986 struct compound_state *cs)
1987 {
1988 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1989 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1990
1991 rfs4_op_inval(argop, resop, req, cs);
1992
1993 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1994 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1995 }
1996
1997 /*ARGSUSED*/
1998 static void
1999 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2000 struct compound_state *cs)
2001 {
2002 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2003 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2004 rfs4_deleg_state_t *dsp;
2005 nfsstat4 status;
2006
2007 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2008 DELEGRETURN4args *, args);
2009
2010 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2011 resp->status = *cs->statusp = status;
2012 if (status != NFS4_OK)
2013 goto out;
2014
2015 /* Ensure specified filehandle matches */
2016 if (cs->vp != dsp->rds_finfo->rf_vp) {
2017 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2018 } else
2019 rfs4_return_deleg(dsp, FALSE);
2020
2021 rfs4_update_lease(dsp->rds_client);
2022
2023 rfs4_deleg_state_rele(dsp);
2024 out:
2025 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2026 DELEGRETURN4res *, resp);
2027 }
2028
2029 /*
2030 * Check to see if a given "flavor" is an explicitly shared flavor.
2031 * The assumption of this routine is the "flavor" is already a valid
2032 * flavor in the secinfo list of "exi".
2033 *
2034 * e.g.
2035 * # share -o sec=flavor1 /export
2036 * # share -o sec=flavor2 /export/home
2037 *
2038 * flavor2 is not an explicitly shared flavor for /export,
2039 * however it is in the secinfo list for /export thru the
2040 * server namespace setup.
2041 */
2042 int
2043 is_exported_sec(int flavor, struct exportinfo *exi)
2044 {
2045 int i;
2046 struct secinfo *sp;
2047
2048 sp = exi->exi_export.ex_secinfo;
2049 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2050 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2051 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2052 return (SEC_REF_EXPORTED(&sp[i]));
2053 }
2054 }
2055
2056 /* Should not reach this point based on the assumption */
2057 return (0);
2058 }
2059
2060 /*
2061 * Check if the security flavor used in the request matches what is
2062 * required at the export point or at the root pseudo node (exi_root).
2063 *
2064 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2065 *
2066 */
2067 static int
2068 secinfo_match_or_authnone(struct compound_state *cs)
2069 {
2070 int i;
2071 struct secinfo *sp;
2072
2073 /*
2074 * Check cs->nfsflavor (from the request) against
2075 * the current export data in cs->exi.
2076 */
2077 sp = cs->exi->exi_export.ex_secinfo;
2078 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2079 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2080 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2081 return (1);
2082 }
2083
2084 return (0);
2085 }
2086
2087 /*
2088 * Check the access authority for the client and return the correct error.
2089 */
2090 nfsstat4
2091 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2092 {
2093 int authres;
2094
2095 /*
2096 * First, check if the security flavor used in the request
2097 * are among the flavors set in the server namespace.
2098 */
2099 if (!secinfo_match_or_authnone(cs)) {
2100 *cs->statusp = NFS4ERR_WRONGSEC;
2101 return (*cs->statusp);
2102 }
2103
2104 authres = checkauth4(cs, req);
2105
2106 if (authres > 0) {
2107 *cs->statusp = NFS4_OK;
2108 if (! (cs->access & CS_ACCESS_LIMITED))
2109 cs->access = CS_ACCESS_OK;
2110 } else if (authres == 0) {
2111 *cs->statusp = NFS4ERR_ACCESS;
2112 } else if (authres == -2) {
2113 *cs->statusp = NFS4ERR_WRONGSEC;
2114 } else {
2115 *cs->statusp = NFS4ERR_DELAY;
2116 }
2117 return (*cs->statusp);
2118 }
2119
2120 /*
2121 * bitmap4_to_attrmask is called by getattr and readdir.
2122 * It sets up the vattr mask and determines whether vfsstat call is needed
2123 * based on the input bitmap.
2124 * Returns nfsv4 status.
2125 */
2126 static nfsstat4
2127 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2128 {
2129 int i;
2130 uint_t va_mask;
2131 struct statvfs64 *sbp = sargp->sbp;
2132
2133 sargp->sbp = NULL;
2134 sargp->flag = 0;
2135 sargp->rdattr_error = NFS4_OK;
2136 sargp->mntdfid_set = FALSE;
2137 if (sargp->cs->vp)
2138 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2139 FH4_ATTRDIR | FH4_NAMEDATTR);
2140 else
2141 sargp->xattr = 0;
2142
2143 /*
2144 * Set rdattr_error_req to true if return error per
2145 * failed entry rather than fail the readdir.
2146 */
2147 if (breq & FATTR4_RDATTR_ERROR_MASK)
2148 sargp->rdattr_error_req = 1;
2149 else
2150 sargp->rdattr_error_req = 0;
2151
2152 /*
2153 * generate the va_mask
2154 * Handle the easy cases first
2155 */
2156 switch (breq) {
2157 case NFS4_NTOV_ATTR_MASK:
2158 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2159 return (NFS4_OK);
2160
2161 case NFS4_FS_ATTR_MASK:
2162 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2163 sargp->sbp = sbp;
2164 return (NFS4_OK);
2165
2166 case NFS4_NTOV_ATTR_CACHE_MASK:
2167 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2168 return (NFS4_OK);
2169
2170 case FATTR4_LEASE_TIME_MASK:
2171 sargp->vap->va_mask = 0;
2172 return (NFS4_OK);
2173
2174 default:
2175 va_mask = 0;
2176 for (i = 0; i < nfs4_ntov_map_size; i++) {
2177 if ((breq & nfs4_ntov_map[i].fbit) &&
2178 nfs4_ntov_map[i].vbit)
2179 va_mask |= nfs4_ntov_map[i].vbit;
2180 }
2181
2182 /*
2183 * Check is vfsstat is needed
2184 */
2185 if (breq & NFS4_FS_ATTR_MASK)
2186 sargp->sbp = sbp;
2187
2188 sargp->vap->va_mask = va_mask;
2189 return (NFS4_OK);
2190 }
2191 /* NOTREACHED */
2192 }
2193
2194 /*
2195 * bitmap4_get_sysattrs is called by getattr and readdir.
2196 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2197 * Returns nfsv4 status.
2198 */
2199 static nfsstat4
2200 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2201 {
2202 int error;
2203 struct compound_state *cs = sargp->cs;
2204 vnode_t *vp = cs->vp;
2205
2206 if (sargp->sbp != NULL) {
2207 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2208 sargp->sbp = NULL; /* to identify error */
2209 return (puterrno4(error));
2210 }
2211 }
2212
2213 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2214 }
2215
2216 static void
2217 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2218 {
2219 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2220 KM_SLEEP);
2221 ntovp->attrcnt = 0;
2222 ntovp->vfsstat = FALSE;
2223 }
2224
2225 static void
2226 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2227 struct nfs4_svgetit_arg *sargp)
2228 {
2229 int i;
2230 union nfs4_attr_u *na;
2231 uint8_t *amap;
2232
2233 /*
2234 * XXX Should do the same checks for whether the bit is set
2235 */
2236 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2237 i < ntovp->attrcnt; i++, na++, amap++) {
2238 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2239 NFS4ATTR_FREEIT, sargp, na);
2240 }
2241 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2242 /*
2243 * xdr_free for getattr will be done later
2244 */
2245 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2246 i < ntovp->attrcnt; i++, na++, amap++) {
2247 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2248 }
2249 }
2250 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2251 }
2252
2253 /*
2254 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2255 */
2256 static nfsstat4
2257 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2258 struct nfs4_svgetit_arg *sargp)
2259 {
2260 int error = 0;
2261 int i, k;
2262 struct nfs4_ntov_table ntov;
2263 XDR xdr;
2264 ulong_t xdr_size;
2265 char *xdr_attrs;
2266 nfsstat4 status = NFS4_OK;
2267 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2268 union nfs4_attr_u *na;
2269 uint8_t *amap;
2270
2271 sargp->op = NFS4ATTR_GETIT;
2272 sargp->flag = 0;
2273
2274 fattrp->attrmask = 0;
2275 /* if no bits requested, then return empty fattr4 */
2276 if (breq == 0) {
2277 fattrp->attrlist4_len = 0;
2278 fattrp->attrlist4 = NULL;
2279 return (NFS4_OK);
2280 }
2281
2282 /*
2283 * return NFS4ERR_INVAL when client requests write-only attrs
2284 */
2285 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2286 return (NFS4ERR_INVAL);
2287
2288 nfs4_ntov_table_init(&ntov);
2289 na = ntov.na;
2290 amap = ntov.amap;
2291
2292 /*
2293 * Now loop to get or verify the attrs
2294 */
2295 for (i = 0; i < nfs4_ntov_map_size; i++) {
2296 if (breq & nfs4_ntov_map[i].fbit) {
2297 if ((*nfs4_ntov_map[i].sv_getit)(
2298 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2299
2300 error = (*nfs4_ntov_map[i].sv_getit)(
2301 NFS4ATTR_GETIT, sargp, na);
2302
2303 /*
2304 * Possible error values:
2305 * >0 if sv_getit failed to
2306 * get the attr; 0 if succeeded;
2307 * <0 if rdattr_error and the
2308 * attribute cannot be returned.
2309 */
2310 if (error && !(sargp->rdattr_error_req))
2311 goto done;
2312 /*
2313 * If error then just for entry
2314 */
2315 if (error == 0) {
2316 fattrp->attrmask |=
2317 nfs4_ntov_map[i].fbit;
2318 *amap++ =
2319 (uint8_t)nfs4_ntov_map[i].nval;
2320 na++;
2321 (ntov.attrcnt)++;
2322 } else if ((error > 0) &&
2323 (sargp->rdattr_error == NFS4_OK)) {
2324 sargp->rdattr_error = puterrno4(error);
2325 }
2326 error = 0;
2327 }
2328 }
2329 }
2330
2331 /*
2332 * If rdattr_error was set after the return value for it was assigned,
2333 * update it.
2334 */
2335 if (prev_rdattr_error != sargp->rdattr_error) {
2336 na = ntov.na;
2337 amap = ntov.amap;
2338 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2339 k = *amap;
2340 if (k < FATTR4_RDATTR_ERROR) {
2341 continue;
2342 }
2343 if ((k == FATTR4_RDATTR_ERROR) &&
2344 ((*nfs4_ntov_map[k].sv_getit)(
2345 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2346
2347 (void) (*nfs4_ntov_map[k].sv_getit)(
2348 NFS4ATTR_GETIT, sargp, na);
2349 }
2350 break;
2351 }
2352 }
2353
2354 xdr_size = 0;
2355 na = ntov.na;
2356 amap = ntov.amap;
2357 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2358 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2359 }
2360
2361 fattrp->attrlist4_len = xdr_size;
2362 if (xdr_size) {
2363 /* freed by rfs4_op_getattr_free() */
2364 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2365
2366 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2367
2368 na = ntov.na;
2369 amap = ntov.amap;
2370 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2371 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2372 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2373 int, *amap);
2374 status = NFS4ERR_SERVERFAULT;
2375 break;
2376 }
2377 }
2378 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2379 } else {
2380 fattrp->attrlist4 = NULL;
2381 }
2382 done:
2383
2384 nfs4_ntov_table_free(&ntov, sargp);
2385
2386 if (error != 0)
2387 status = puterrno4(error);
2388
2389 return (status);
2390 }
2391
2392 /* ARGSUSED */
2393 static void
2394 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2395 struct compound_state *cs)
2396 {
2397 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2398 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2399 struct nfs4_svgetit_arg sarg;
2400 struct statvfs64 sb;
2401 nfsstat4 status;
2402
2403 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2404 GETATTR4args *, args);
2405
2406 if (cs->vp == NULL) {
2407 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2408 goto out;
2409 }
2410
2411 if (cs->access == CS_ACCESS_DENIED) {
2412 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2413 goto out;
2414 }
2415
2416 sarg.sbp = &sb;
2417 sarg.cs = cs;
2418 sarg.is_referral = B_FALSE;
2419
2420 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2421 if (status == NFS4_OK) {
2422
2423 status = bitmap4_get_sysattrs(&sarg);
2424 if (status == NFS4_OK) {
2425
2426 /* Is this a referral? */
2427 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2428 /* Older V4 Solaris client sees a link */
2429 if (client_is_downrev(req))
2430 sarg.vap->va_type = VLNK;
2431 else
2432 sarg.is_referral = B_TRUE;
2433 }
2434
2435 status = do_rfs4_op_getattr(args->attr_request,
2436 &resp->obj_attributes, &sarg);
2437 }
2438 }
2439 *cs->statusp = resp->status = status;
2440 out:
2441 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2442 GETATTR4res *, resp);
2443 }
2444
2445 static void
2446 rfs4_op_getattr_free(nfs_resop4 *resop)
2447 {
2448 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2449
2450 nfs4_fattr4_free(&resp->obj_attributes);
2451 }
2452
2453 /* ARGSUSED */
2454 static void
2455 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2456 struct compound_state *cs)
2457 {
2458 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2459
2460 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2461
2462 if (cs->vp == NULL) {
2463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464 goto out;
2465 }
2466 if (cs->access == CS_ACCESS_DENIED) {
2467 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2468 goto out;
2469 }
2470
2471 /* check for reparse point at the share point */
2472 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2473 /* it's all bad */
2474 cs->exi->exi_moved = 1;
2475 *cs->statusp = resp->status = NFS4ERR_MOVED;
2476 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2477 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2478 return;
2479 }
2480
2481 /* check for reparse point at vp */
2482 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2483 /* it's not all bad */
2484 *cs->statusp = resp->status = NFS4ERR_MOVED;
2485 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2486 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2487 return;
2488 }
2489
2490 resp->object.nfs_fh4_val =
2491 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2492 nfs_fh4_copy(&cs->fh, &resp->object);
2493 *cs->statusp = resp->status = NFS4_OK;
2494 out:
2495 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2496 GETFH4res *, resp);
2497 }
2498
2499 static void
2500 rfs4_op_getfh_free(nfs_resop4 *resop)
2501 {
2502 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2503
2504 if (resp->status == NFS4_OK &&
2505 resp->object.nfs_fh4_val != NULL) {
2506 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2507 resp->object.nfs_fh4_val = NULL;
2508 resp->object.nfs_fh4_len = 0;
2509 }
2510 }
2511
2512 /*
2513 * illegal: args: void
2514 * res : status (NFS4ERR_OP_ILLEGAL)
2515 */
2516 /* ARGSUSED */
2517 static void
2518 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2519 struct svc_req *req, struct compound_state *cs)
2520 {
2521 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2522
2523 resop->resop = OP_ILLEGAL;
2524 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2525 }
2526
2527 /*
2528 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2529 * res: status. If success - CURRENT_FH unchanged, return change_info
2530 */
2531 /* ARGSUSED */
2532 static void
2533 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2534 struct compound_state *cs)
2535 {
2536 LINK4args *args = &argop->nfs_argop4_u.oplink;
2537 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2538 int error;
2539 vnode_t *vp;
2540 vnode_t *dvp;
2541 struct vattr bdva, idva, adva;
2542 char *nm;
2543 uint_t len;
2544 struct sockaddr *ca;
2545 char *name = NULL;
2546 nfsstat4 status;
2547
2548 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2549 LINK4args *, args);
2550
2551 /* SAVED_FH: source object */
2552 vp = cs->saved_vp;
2553 if (vp == NULL) {
2554 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2555 goto out;
2556 }
2557
2558 /* CURRENT_FH: target directory */
2559 dvp = cs->vp;
2560 if (dvp == NULL) {
2561 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2562 goto out;
2563 }
2564
2565 /*
2566 * If there is a non-shared filesystem mounted on this vnode,
2567 * do not allow to link any file in this directory.
2568 */
2569 if (vn_ismntpt(dvp)) {
2570 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2571 goto out;
2572 }
2573
2574 if (cs->access == CS_ACCESS_DENIED) {
2575 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2576 goto out;
2577 }
2578
2579 /* Check source object's type validity */
2580 if (vp->v_type == VDIR) {
2581 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2582 goto out;
2583 }
2584
2585 /* Check target directory's type */
2586 if (dvp->v_type != VDIR) {
2587 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2588 goto out;
2589 }
2590
2591 if (cs->saved_exi != cs->exi) {
2592 *cs->statusp = resp->status = NFS4ERR_XDEV;
2593 goto out;
2594 }
2595
2596 status = utf8_dir_verify(&args->newname);
2597 if (status != NFS4_OK) {
2598 *cs->statusp = resp->status = status;
2599 goto out;
2600 }
2601
2602 nm = utf8_to_fn(&args->newname, &len, NULL);
2603 if (nm == NULL) {
2604 *cs->statusp = resp->status = NFS4ERR_INVAL;
2605 goto out;
2606 }
2607
2608 if (len > MAXNAMELEN) {
2609 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2610 kmem_free(nm, len);
2611 goto out;
2612 }
2613
2614 if (rdonly4(req, cs)) {
2615 *cs->statusp = resp->status = NFS4ERR_ROFS;
2616 kmem_free(nm, len);
2617 goto out;
2618 }
2619
2620 /* Get "before" change value */
2621 bdva.va_mask = AT_CTIME|AT_SEQ;
2622 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2623 if (error) {
2624 *cs->statusp = resp->status = puterrno4(error);
2625 kmem_free(nm, len);
2626 goto out;
2627 }
2628
2629 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2630 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2631 MAXPATHLEN + 1);
2632
2633 if (name == NULL) {
2634 *cs->statusp = resp->status = NFS4ERR_INVAL;
2635 kmem_free(nm, len);
2636 goto out;
2637 }
2638
2639 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2640
2641 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2642
2643 if (nm != name)
2644 kmem_free(name, MAXPATHLEN + 1);
2645 kmem_free(nm, len);
2646
2647 /*
2648 * Get the initial "after" sequence number, if it fails, set to zero
2649 */
2650 idva.va_mask = AT_SEQ;
2651 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2652 idva.va_seq = 0;
2653
2654 /*
2655 * Force modified data and metadata out to stable storage.
2656 */
2657 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2658 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2659
2660 if (error) {
2661 *cs->statusp = resp->status = puterrno4(error);
2662 goto out;
2663 }
2664
2665 /*
2666 * Get "after" change value, if it fails, simply return the
2667 * before value.
2668 */
2669 adva.va_mask = AT_CTIME|AT_SEQ;
2670 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2671 adva.va_ctime = bdva.va_ctime;
2672 adva.va_seq = 0;
2673 }
2674
2675 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2676
2677 /*
2678 * The cinfo.atomic = TRUE only if we have
2679 * non-zero va_seq's, and it has incremented by exactly one
2680 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2681 */
2682 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2683 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2684 resp->cinfo.atomic = TRUE;
2685 else
2686 resp->cinfo.atomic = FALSE;
2687
2688 *cs->statusp = resp->status = NFS4_OK;
2689 out:
2690 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2691 LINK4res *, resp);
2692 }
2693
2694 /*
2695 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2696 */
2697
2698 /* ARGSUSED */
2699 static nfsstat4
2700 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2701 {
2702 int error;
2703 int different_export = 0;
2704 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2705 struct exportinfo *exi = NULL, *pre_exi = NULL;
2706 nfsstat4 stat;
2707 fid_t fid;
2708 int attrdir, dotdot, walk;
2709 bool_t is_newvp = FALSE;
2710
2711 if (cs->vp->v_flag & V_XATTRDIR) {
2712 attrdir = 1;
2713 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2714 } else {
2715 attrdir = 0;
2716 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2717 }
2718
2719 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2720
2721 /*
2722 * If dotdotting, then need to check whether it's
2723 * above the root of a filesystem, or above an
2724 * export point.
2725 */
2726 if (dotdot) {
2727
2728 /*
2729 * If dotdotting at the root of a filesystem, then
2730 * need to traverse back to the mounted-on filesystem
2731 * and do the dotdot lookup there.
2732 */
2733 if (cs->vp->v_flag & VROOT) {
2734
2735 /*
2736 * If at the system root, then can
2737 * go up no further.
2738 */
2739 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2740 return (puterrno4(ENOENT));
2741
2742 /*
2743 * Traverse back to the mounted-on filesystem
2744 */
2745 cs->vp = untraverse(cs->vp);
2746
2747 /*
2748 * Set the different_export flag so we remember
2749 * to pick up a new exportinfo entry for
2750 * this new filesystem.
2751 */
2752 different_export = 1;
2753 } else {
2754
2755 /*
2756 * If dotdotting above an export point then set
2757 * the different_export to get new export info.
2758 */
2759 different_export = nfs_exported(cs->exi, cs->vp);
2760 }
2761 }
2762
2763 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2764 NULL, NULL, NULL);
2765 if (error)
2766 return (puterrno4(error));
2767
2768 /*
2769 * If the vnode is in a pseudo filesystem, check whether it is visible.
2770 *
2771 * XXX if the vnode is a symlink and it is not visible in
2772 * a pseudo filesystem, return ENOENT (not following symlink).
2773 * V4 client can not mount such symlink. This is a regression
2774 * from V2/V3.
2775 *
2776 * In the same exported filesystem, if the security flavor used
2777 * is not an explicitly shared flavor, limit the view to the visible
2778 * list entries only. This is not a WRONGSEC case because it's already
2779 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2780 */
2781 if (!different_export &&
2782 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2783 cs->access & CS_ACCESS_LIMITED)) {
2784 if (! nfs_visible(cs->exi, vp, &different_export)) {
2785 VN_RELE(vp);
2786 return (puterrno4(ENOENT));
2787 }
2788 }
2789
2790 /*
2791 * If it's a mountpoint, then traverse it.
2792 */
2793 if (vn_ismntpt(vp)) {
2794 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2795 pre_tvp = vp; /* save pre-traversed vnode */
2796
2797 /*
2798 * hold pre_tvp to counteract rele by traverse. We will
2799 * need pre_tvp below if checkexport4 fails
2800 */
2801 VN_HOLD(pre_tvp);
2802 if ((error = traverse(&vp)) != 0) {
2803 VN_RELE(vp);
2804 VN_RELE(pre_tvp);
2805 return (puterrno4(error));
2806 }
2807 different_export = 1;
2808 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2809 /*
2810 * The vfsp comparison is to handle the case where
2811 * a LOFS mount is shared. lo_lookup traverses mount points,
2812 * and NFS is unaware of local fs transistions because
2813 * v_vfsmountedhere isn't set. For this special LOFS case,
2814 * the dir and the obj returned by lookup will have different
2815 * vfs ptrs.
2816 */
2817 different_export = 1;
2818 }
2819
2820 if (different_export) {
2821
2822 bzero(&fid, sizeof (fid));
2823 fid.fid_len = MAXFIDSZ;
2824 error = vop_fid_pseudo(vp, &fid);
2825 if (error) {
2826 VN_RELE(vp);
2827 if (pre_tvp)
2828 VN_RELE(pre_tvp);
2829 return (puterrno4(error));
2830 }
2831
2832 if (dotdot)
2833 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2834 else
2835 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2836
2837 if (exi == NULL) {
2838 if (pre_tvp) {
2839 /*
2840 * If this vnode is a mounted-on vnode,
2841 * but the mounted-on file system is not
2842 * exported, send back the filehandle for
2843 * the mounted-on vnode, not the root of
2844 * the mounted-on file system.
2845 */
2846 VN_RELE(vp);
2847 vp = pre_tvp;
2848 exi = pre_exi;
2849 } else {
2850 VN_RELE(vp);
2851 return (puterrno4(EACCES));
2852 }
2853 } else if (pre_tvp) {
2854 /* we're done with pre_tvp now. release extra hold */
2855 VN_RELE(pre_tvp);
2856 }
2857
2858 cs->exi = exi;
2859
2860 /*
2861 * Now we do a checkauth4. The reason is that
2862 * this client/user may not have access to the new
2863 * exported file system, and if they do,
2864 * the client/user may be mapped to a different uid.
2865 *
2866 * We start with a new cr, because the checkauth4 done
2867 * in the PUT*FH operation over wrote the cred's uid,
2868 * gid, etc, and we want the real thing before calling
2869 * checkauth4()
2870 */
2871 crfree(cs->cr);
2872 cs->cr = crdup(cs->basecr);
2873
2874 oldvp = cs->vp;
2875 cs->vp = vp;
2876 is_newvp = TRUE;
2877
2878 stat = call_checkauth4(cs, req);
2879 if (stat != NFS4_OK) {
2880 VN_RELE(cs->vp);
2881 cs->vp = oldvp;
2882 return (stat);
2883 }
2884 }
2885
2886 /*
2887 * After various NFS checks, do a label check on the path
2888 * component. The label on this path should either be the
2889 * global zone's label or a zone's label. We are only
2890 * interested in the zone's label because exported files
2891 * in global zone is accessible (though read-only) to
2892 * clients. The exportability/visibility check is already
2893 * done before reaching this code.
2894 */
2895 if (is_system_labeled()) {
2896 bslabel_t *clabel;
2897
2898 ASSERT(req->rq_label != NULL);
2899 clabel = req->rq_label;
2900 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2901 "got client label from request(1)", struct svc_req *, req);
2902
2903 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2904 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2905 cs->exi)) {
2906 error = EACCES;
2907 goto err_out;
2908 }
2909 } else {
2910 /*
2911 * We grant access to admin_low label clients
2912 * only if the client is trusted, i.e. also
2913 * running Solaris Trusted Extension.
2914 */
2915 struct sockaddr *ca;
2916 int addr_type;
2917 void *ipaddr;
2918 tsol_tpc_t *tp;
2919
2920 ca = (struct sockaddr *)svc_getrpccaller(
2921 req->rq_xprt)->buf;
2922 if (ca->sa_family == AF_INET) {
2923 addr_type = IPV4_VERSION;
2924 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2925 } else if (ca->sa_family == AF_INET6) {
2926 addr_type = IPV6_VERSION;
2927 ipaddr = &((struct sockaddr_in6 *)
2928 ca)->sin6_addr;
2929 }
2930 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2931 if (tp == NULL || tp->tpc_tp.tp_doi !=
2932 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2933 SUN_CIPSO) {
2934 if (tp != NULL)
2935 TPC_RELE(tp);
2936 error = EACCES;
2937 goto err_out;
2938 }
2939 TPC_RELE(tp);
2940 }
2941 }
2942
2943 error = makefh4(&cs->fh, vp, cs->exi);
2944
2945 err_out:
2946 if (error) {
2947 if (is_newvp) {
2948 VN_RELE(cs->vp);
2949 cs->vp = oldvp;
2950 } else
2951 VN_RELE(vp);
2952 return (puterrno4(error));
2953 }
2954
2955 if (!is_newvp) {
2956 if (cs->vp)
2957 VN_RELE(cs->vp);
2958 cs->vp = vp;
2959 } else if (oldvp)
2960 VN_RELE(oldvp);
2961
2962 /*
2963 * if did lookup on attrdir and didn't lookup .., set named
2964 * attr fh flag
2965 */
2966 if (attrdir && ! dotdot)
2967 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2968
2969 /* Assume false for now, open proc will set this */
2970 cs->mandlock = FALSE;
2971
2972 return (NFS4_OK);
2973 }
2974
2975 /* ARGSUSED */
2976 static void
2977 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2978 struct compound_state *cs)
2979 {
2980 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2981 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2982 char *nm;
2983 uint_t len;
2984 struct sockaddr *ca;
2985 char *name = NULL;
2986 nfsstat4 status;
2987
2988 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2989 LOOKUP4args *, args);
2990
2991 if (cs->vp == NULL) {
2992 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2993 goto out;
2994 }
2995
2996 if (cs->vp->v_type == VLNK) {
2997 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2998 goto out;
2999 }
3000
3001 if (cs->vp->v_type != VDIR) {
3002 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3003 goto out;
3004 }
3005
3006 status = utf8_dir_verify(&args->objname);
3007 if (status != NFS4_OK) {
3008 *cs->statusp = resp->status = status;
3009 goto out;
3010 }
3011
3012 nm = utf8_to_str(&args->objname, &len, NULL);
3013 if (nm == NULL) {
3014 *cs->statusp = resp->status = NFS4ERR_INVAL;
3015 goto out;
3016 }
3017
3018 if (len > MAXNAMELEN) {
3019 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3020 kmem_free(nm, len);
3021 goto out;
3022 }
3023
3024 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3025 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3026 MAXPATHLEN + 1);
3027
3028 if (name == NULL) {
3029 *cs->statusp = resp->status = NFS4ERR_INVAL;
3030 kmem_free(nm, len);
3031 goto out;
3032 }
3033
3034 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3035
3036 if (name != nm)
3037 kmem_free(name, MAXPATHLEN + 1);
3038 kmem_free(nm, len);
3039
3040 out:
3041 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3042 LOOKUP4res *, resp);
3043 }
3044
3045 /* ARGSUSED */
3046 static void
3047 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3048 struct compound_state *cs)
3049 {
3050 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3051
3052 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3053
3054 if (cs->vp == NULL) {
3055 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3056 goto out;
3057 }
3058
3059 if (cs->vp->v_type != VDIR) {
3060 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3061 goto out;
3062 }
3063
3064 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3065
3066 /*
3067 * From NFSV4 Specification, LOOKUPP should not check for
3068 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3069 */
3070 if (resp->status == NFS4ERR_WRONGSEC) {
3071 *cs->statusp = resp->status = NFS4_OK;
3072 }
3073
3074 out:
3075 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3076 LOOKUPP4res *, resp);
3077 }
3078
3079
3080 /*ARGSUSED2*/
3081 static void
3082 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3083 struct compound_state *cs)
3084 {
3085 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3086 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3087 vnode_t *avp = NULL;
3088 int lookup_flags = LOOKUP_XATTR, error;
3089 int exp_ro = 0;
3090
3091 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3092 OPENATTR4args *, args);
3093
3094 if (cs->vp == NULL) {
3095 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3096 goto out;
3097 }
3098
3099 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3100 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3101 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3102 goto out;
3103 }
3104
3105 /*
3106 * If file system supports passing ACE mask to VOP_ACCESS then
3107 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3108 */
3109
3110 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3111 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3112 V_ACE_MASK, cs->cr, NULL);
3113 else
3114 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3115 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3116 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3117
3118 if (error) {
3119 *cs->statusp = resp->status = puterrno4(EACCES);
3120 goto out;
3121 }
3122
3123 /*
3124 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3125 * the file system is exported read-only -- regardless of
3126 * createdir flag. Otherwise the attrdir would be created
3127 * (assuming server fs isn't mounted readonly locally). If
3128 * VOP_LOOKUP returns ENOENT in this case, the error will
3129 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3130 * because specfs has no VOP_LOOKUP op, so the macro would
3131 * return ENOSYS. EINVAL is returned by all (current)
3132 * Solaris file system implementations when any of their
3133 * restrictions are violated (xattr(dir) can't have xattrdir).
3134 * Returning NOTSUPP is more appropriate in this case
3135 * because the object will never be able to have an attrdir.
3136 */
3137 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3138 lookup_flags |= CREATE_XATTR_DIR;
3139
3140 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3141 NULL, NULL, NULL);
3142
3143 if (error) {
3144 if (error == ENOENT && args->createdir && exp_ro)
3145 *cs->statusp = resp->status = puterrno4(EROFS);
3146 else if (error == EINVAL || error == ENOSYS)
3147 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3148 else
3149 *cs->statusp = resp->status = puterrno4(error);
3150 goto out;
3151 }
3152
3153 ASSERT(avp->v_flag & V_XATTRDIR);
3154
3155 error = makefh4(&cs->fh, avp, cs->exi);
3156
3157 if (error) {
3158 VN_RELE(avp);
3159 *cs->statusp = resp->status = puterrno4(error);
3160 goto out;
3161 }
3162
3163 VN_RELE(cs->vp);
3164 cs->vp = avp;
3165
3166 /*
3167 * There is no requirement for an attrdir fh flag
3168 * because the attrdir has a vnode flag to distinguish
3169 * it from regular (non-xattr) directories. The
3170 * FH4_ATTRDIR flag is set for future sanity checks.
3171 */
3172 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3173 *cs->statusp = resp->status = NFS4_OK;
3174
3175 out:
3176 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3177 OPENATTR4res *, resp);
3178 }
3179
3180 static int
3181 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3182 caller_context_t *ct)
3183 {
3184 int error;
3185 int i;
3186 clock_t delaytime;
3187
3188 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3189
3190 /*
3191 * Don't block on mandatory locks. If this routine returns
3192 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3193 */
3194 uio->uio_fmode = FNONBLOCK;
3195
3196 for (i = 0; i < rfs4_maxlock_tries; i++) {
3197
3198
3199 if (direction == FREAD) {
3200 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3201 error = VOP_READ(vp, uio, ioflag, cred, ct);
3202 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3203 } else {
3204 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3205 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3206 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3207 }
3208
3209 if (error != EAGAIN)
3210 break;
3211
3212 if (i < rfs4_maxlock_tries - 1) {
3213 delay(delaytime);
3214 delaytime *= 2;
3215 }
3216 }
3217
3218 return (error);
3219 }
3220
3221 /* ARGSUSED */
3222 static void
3223 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3224 struct compound_state *cs)
3225 {
3226 READ4args *args = &argop->nfs_argop4_u.opread;
3227 READ4res *resp = &resop->nfs_resop4_u.opread;
3228 int error;
3229 int verror;
3230 vnode_t *vp;
3231 struct vattr va;
3232 struct iovec iov, *iovp = NULL;
3233 int iovcnt;
3234 struct uio uio;
3235 u_offset_t offset;
3236 bool_t *deleg = &cs->deleg;
3237 nfsstat4 stat;
3238 int in_crit = 0;
3239 mblk_t *mp = NULL;
3240 int alloc_err = 0;
3241 int rdma_used = 0;
3242 int loaned_buffers;
3243 caller_context_t ct;
3244 struct uio *uiop;
3245
3246 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3247 READ4args, args);
3248
3249 vp = cs->vp;
3250 if (vp == NULL) {
3251 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3252 goto out;
3253 }
3254 if (cs->access == CS_ACCESS_DENIED) {
3255 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3256 goto out;
3257 }
3258
3259 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3260 deleg, TRUE, &ct)) != NFS4_OK) {
3261 *cs->statusp = resp->status = stat;
3262 goto out;
3263 }
3264
3265 /*
3266 * Enter the critical region before calling VOP_RWLOCK
3267 * to avoid a deadlock with write requests.
3268 */
3269 if (nbl_need_check(vp)) {
3270 nbl_start_crit(vp, RW_READER);
3271 in_crit = 1;
3272 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3273 &ct)) {
3274 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3275 goto out;
3276 }
3277 }
3278
3279 if (args->wlist) {
3280 if (args->count > clist_len(args->wlist)) {
3281 *cs->statusp = resp->status = NFS4ERR_INVAL;
3282 goto out;
3283 }
3284 rdma_used = 1;
3285 }
3286
3287 /* use loaned buffers for TCP */
3288 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3289
3290 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3291 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3292
3293 /*
3294 * If we can't get the attributes, then we can't do the
3295 * right access checking. So, we'll fail the request.
3296 */
3297 if (verror) {
3298 *cs->statusp = resp->status = puterrno4(verror);
3299 goto out;
3300 }
3301
3302 if (vp->v_type != VREG) {
3303 *cs->statusp = resp->status =
3304 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3305 goto out;
3306 }
3307
3308 if (crgetuid(cs->cr) != va.va_uid &&
3309 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3310 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3311 *cs->statusp = resp->status = puterrno4(error);
3312 goto out;
3313 }
3314
3315 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3316 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3317 goto out;
3318 }
3319
3320 offset = args->offset;
3321 if (offset >= va.va_size) {
3322 *cs->statusp = resp->status = NFS4_OK;
3323 resp->eof = TRUE;
3324 resp->data_len = 0;
3325 resp->data_val = NULL;
3326 resp->mblk = NULL;
3327 /* RDMA */
3328 resp->wlist = args->wlist;
3329 resp->wlist_len = resp->data_len;
3330 *cs->statusp = resp->status = NFS4_OK;
3331 if (resp->wlist)
3332 clist_zero_len(resp->wlist);
3333 goto out;
3334 }
3335
3336 if (args->count == 0) {
3337 *cs->statusp = resp->status = NFS4_OK;
3338 resp->eof = FALSE;
3339 resp->data_len = 0;
3340 resp->data_val = NULL;
3341 resp->mblk = NULL;
3342 /* RDMA */
3343 resp->wlist = args->wlist;
3344 resp->wlist_len = resp->data_len;
3345 if (resp->wlist)
3346 clist_zero_len(resp->wlist);
3347 goto out;
3348 }
3349
3350 /*
3351 * Do not allocate memory more than maximum allowed
3352 * transfer size
3353 */
3354 if (args->count > rfs4_tsize(req))
3355 args->count = rfs4_tsize(req);
3356
3357 if (loaned_buffers) {
3358 uiop = (uio_t *)rfs_setup_xuio(vp);
3359 ASSERT(uiop != NULL);
3360 uiop->uio_segflg = UIO_SYSSPACE;
3361 uiop->uio_loffset = args->offset;
3362 uiop->uio_resid = args->count;
3363
3364 /* Jump to do the read if successful */
3365 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3366 /*
3367 * Need to hold the vnode until after VOP_RETZCBUF()
3368 * is called.
3369 */
3370 VN_HOLD(vp);
3371 goto doio_read;
3372 }
3373
3374 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3375 uiop->uio_loffset, int, uiop->uio_resid);
3376
3377 uiop->uio_extflg = 0;
3378
3379 /* failure to setup for zero copy */
3380 rfs_free_xuio((void *)uiop);
3381 loaned_buffers = 0;
3382 }
3383
3384 /*
3385 * If returning data via RDMA Write, then grab the chunk list. If we
3386 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3387 */
3388 if (rdma_used) {
3389 mp = NULL;
3390 (void) rdma_get_wchunk(req, &iov, args->wlist);
3391 uio.uio_iov = &iov;
3392 uio.uio_iovcnt = 1;
3393 } else {
3394 /*
3395 * mp will contain the data to be sent out in the read reply.
3396 * It will be freed after the reply has been sent.
3397 */
3398 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3399 ASSERT(mp != NULL);
3400 ASSERT(alloc_err == 0);
3401 uio.uio_iov = iovp;
3402 uio.uio_iovcnt = iovcnt;
3403 }
3404
3405 uio.uio_segflg = UIO_SYSSPACE;
3406 uio.uio_extflg = UIO_COPY_CACHED;
3407 uio.uio_loffset = args->offset;
3408 uio.uio_resid = args->count;
3409 uiop = &uio;
3410
3411 doio_read:
3412 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3413
3414 va.va_mask = AT_SIZE;
3415 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3416
3417 if (error) {
3418 if (mp)
3419 freemsg(mp);
3420 *cs->statusp = resp->status = puterrno4(error);
3421 goto out;
3422 }
3423
3424 /* make mblk using zc buffers */
3425 if (loaned_buffers) {
3426 mp = uio_to_mblk(uiop);
3427 ASSERT(mp != NULL);
3428 }
3429
3430 *cs->statusp = resp->status = NFS4_OK;
3431
3432 ASSERT(uiop->uio_resid >= 0);
3433 resp->data_len = args->count - uiop->uio_resid;
3434 if (mp) {
3435 resp->data_val = (char *)mp->b_datap->db_base;
3436 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3437 } else {
3438 resp->data_val = (caddr_t)iov.iov_base;
3439 }
3440
3441 resp->mblk = mp;
3442
3443 if (!verror && offset + resp->data_len == va.va_size)
3444 resp->eof = TRUE;
3445 else
3446 resp->eof = FALSE;
3447
3448 if (rdma_used) {
3449 if (!rdma_setup_read_data4(args, resp)) {
3450 *cs->statusp = resp->status = NFS4ERR_INVAL;
3451 }
3452 } else {
3453 resp->wlist = NULL;
3454 }
3455
3456 out:
3457 if (in_crit)
3458 nbl_end_crit(vp);
3459
3460 if (iovp != NULL)
3461 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3462
3463 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3464 READ4res *, resp);
3465 }
3466
3467 static void
3468 rfs4_op_read_free(nfs_resop4 *resop)
3469 {
3470 READ4res *resp = &resop->nfs_resop4_u.opread;
3471
3472 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3473 freemsg(resp->mblk);
3474 resp->mblk = NULL;
3475 resp->data_val = NULL;
3476 resp->data_len = 0;
3477 }
3478 }
3479
3480 static void
3481 rfs4_op_readdir_free(nfs_resop4 * resop)
3482 {
3483 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3484
3485 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3486 freeb(resp->mblk);
3487 resp->mblk = NULL;
3488 resp->data_len = 0;
3489 }
3490 }
3491
3492
3493 /* ARGSUSED */
3494 static void
3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3496 struct compound_state *cs)
3497 {
3498 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3499 int error;
3500 vnode_t *vp;
3501 struct exportinfo *exi, *sav_exi;
3502 nfs_fh4_fmt_t *fh_fmtp;
3503 nfs_export_t *ne = nfs_get_export();
3504
3505 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3506
3507 if (cs->vp) {
3508 VN_RELE(cs->vp);
3509 cs->vp = NULL;
3510 }
3511
3512 if (cs->cr)
3513 crfree(cs->cr);
3514
3515 cs->cr = crdup(cs->basecr);
3516
3517 vp = ne->exi_public->exi_vp;
3518 if (vp == NULL) {
3519 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3520 goto out;
3521 }
3522
3523 error = makefh4(&cs->fh, vp, ne->exi_public);
3524 if (error != 0) {
3525 *cs->statusp = resp->status = puterrno4(error);
3526 goto out;
3527 }
3528 sav_exi = cs->exi;
3529 if (ne->exi_public == ne->exi_root) {
3530 /*
3531 * No filesystem is actually shared public, so we default
3532 * to exi_root. In this case, we must check whether root
3533 * is exported.
3534 */
3535 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3536
3537 /*
3538 * if root filesystem is exported, the exportinfo struct that we
3539 * should use is what checkexport4 returns, because root_exi is
3540 * actually a mostly empty struct.
3541 */
3542 exi = checkexport4(&fh_fmtp->fh4_fsid,
3543 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3544 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3545 } else {
3546 /*
3547 * it's a properly shared filesystem
3548 */
3549 cs->exi = ne->exi_public;
3550 }
3551
3552 if (is_system_labeled()) {
3553 bslabel_t *clabel;
3554
3555 ASSERT(req->rq_label != NULL);
3556 clabel = req->rq_label;
3557 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3558 "got client label from request(1)",
3559 struct svc_req *, req);
3560 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3561 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3562 cs->exi)) {
3563 *cs->statusp = resp->status =
3564 NFS4ERR_SERVERFAULT;
3565 goto out;
3566 }
3567 }
3568 }
3569
3570 VN_HOLD(vp);
3571 cs->vp = vp;
3572
3573 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3574 VN_RELE(cs->vp);
3575 cs->vp = NULL;
3576 cs->exi = sav_exi;
3577 goto out;
3578 }
3579
3580 *cs->statusp = resp->status = NFS4_OK;
3581 out:
3582 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3583 PUTPUBFH4res *, resp);
3584 }
3585
3586 /*
3587 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3588 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3589 * or joe have restrictive search permissions, then we shouldn't let
3590 * the client get a file handle. This is easy to enforce. However, we
3591 * don't know what security flavor should be used until we resolve the
3592 * path name. Another complication is uid mapping. If root is
3593 * the user, then it will be mapped to the anonymous user by default,
3594 * but we won't know that till we've resolved the path name. And we won't
3595 * know what the anonymous user is.
3596 * Luckily, SECINFO is specified to take a full filename.
3597 * So what we will have to in rfs4_op_lookup is check that flavor of
3598 * the target object matches that of the request, and if root was the
3599 * caller, check for the root= and anon= options, and if necessary,
3600 * repeat the lookup using the right cred_t. But that's not done yet.
3601 */
3602 /* ARGSUSED */
3603 static void
3604 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3605 struct compound_state *cs)
3606 {
3607 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3608 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3609 nfs_fh4_fmt_t *fh_fmtp;
3610
3611 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3612 PUTFH4args *, args);
3613
3614 if (cs->vp) {
3615 VN_RELE(cs->vp);
3616 cs->vp = NULL;
3617 }
3618
3619 if (cs->cr) {
3620 crfree(cs->cr);
3621 cs->cr = NULL;
3622 }
3623
3624 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3625 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3626 goto out;
3627 }
3628
3629 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3630 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3631 NULL);
3632
3633 if (cs->exi == NULL) {
3634 *cs->statusp = resp->status = NFS4ERR_STALE;
3635 goto out;
3636 }
3637
3638 cs->cr = crdup(cs->basecr);
3639
3640 ASSERT(cs->cr != NULL);
3641
3642 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3643 *cs->statusp = resp->status;
3644 goto out;
3645 }
3646
3647 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3648 VN_RELE(cs->vp);
3649 cs->vp = NULL;
3650 goto out;
3651 }
3652
3653 nfs_fh4_copy(&args->object, &cs->fh);
3654 *cs->statusp = resp->status = NFS4_OK;
3655 cs->deleg = FALSE;
3656
3657 out:
3658 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3659 PUTFH4res *, resp);
3660 }
3661
3662 /* ARGSUSED */
3663 static void
3664 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3665 struct compound_state *cs)
3666 {
3667 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3668 int error;
3669 fid_t fid;
3670 struct exportinfo *exi, *sav_exi;
3671
3672 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3673
3674 if (cs->vp) {
3675 VN_RELE(cs->vp);
3676 cs->vp = NULL;
3677 }
3678
3679 if (cs->cr)
3680 crfree(cs->cr);
3681
3682 cs->cr = crdup(cs->basecr);
3683
3684 /*
3685 * Using rootdir, the system root vnode,
3686 * get its fid.
3687 */
3688 bzero(&fid, sizeof (fid));
3689 fid.fid_len = MAXFIDSZ;
3690 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3691 if (error != 0) {
3692 *cs->statusp = resp->status = puterrno4(error);
3693 goto out;
3694 }
3695
3696 /*
3697 * Then use the root fsid & fid it to find out if it's exported
3698 *
3699 * If the server root isn't exported directly, then
3700 * it should at least be a pseudo export based on
3701 * one or more exports further down in the server's
3702 * file tree.
3703 */
3704 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3705 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3706 NFS4_DEBUG(rfs4_debug,
3707 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3708 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3709 goto out;
3710 }
3711
3712 /*
3713 * Now make a filehandle based on the root
3714 * export and root vnode.
3715 */
3716 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3717 if (error != 0) {
3718 *cs->statusp = resp->status = puterrno4(error);
3719 goto out;
3720 }
3721
3722 sav_exi = cs->exi;
3723 cs->exi = exi;
3724
3725 VN_HOLD(ZONE_ROOTVP());
3726 cs->vp = ZONE_ROOTVP();
3727
3728 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3729 VN_RELE(cs->vp);
3730 cs->vp = NULL;
3731 cs->exi = sav_exi;
3732 goto out;
3733 }
3734
3735 *cs->statusp = resp->status = NFS4_OK;
3736 cs->deleg = FALSE;
3737 out:
3738 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3739 PUTROOTFH4res *, resp);
3740 }
3741
3742 /*
3743 * set_rdattr_params sets up the variables used to manage what information
3744 * to get for each directory entry.
3745 */
3746 static nfsstat4
3747 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3748 bitmap4 attrs, bool_t *need_to_lookup)
3749 {
3750 uint_t va_mask;
3751 nfsstat4 status;
3752 bitmap4 objbits;
3753
3754 status = bitmap4_to_attrmask(attrs, sargp);
3755 if (status != NFS4_OK) {
3756 /*
3757 * could not even figure attr mask
3758 */
3759 return (status);
3760 }
3761 va_mask = sargp->vap->va_mask;
3762
3763 /*
3764 * dirent's d_ino is always correct value for mounted_on_fileid.
3765 * mntdfid_set is set once here, but mounted_on_fileid is
3766 * set in main dirent processing loop for each dirent.
3767 * The mntdfid_set is a simple optimization that lets the
3768 * server attr code avoid work when caller is readdir.
3769 */
3770 sargp->mntdfid_set = TRUE;
3771
3772 /*
3773 * Lookup entry only if client asked for any of the following:
3774 * a) vattr attrs
3775 * b) vfs attrs
3776 * c) attrs w/per-object scope requested (change, filehandle, etc)
3777 * other than mounted_on_fileid (which we can take from dirent)
3778 */
3779 objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3780
3781 if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3782 *need_to_lookup = TRUE;
3783 else
3784 *need_to_lookup = FALSE;
3785
3786 if (sargp->sbp == NULL)
3787 return (NFS4_OK);
3788
3789 /*
3790 * If filesystem attrs are requested, get them now from the
3791 * directory vp, as most entries will have same filesystem. The only
3792 * exception are mounted over entries but we handle
3793 * those as we go (XXX mounted over detection not yet implemented).
3794 */
3795 sargp->vap->va_mask = 0; /* to avoid VOP_GETATTR */
3796 status = bitmap4_get_sysattrs(sargp);
3797 sargp->vap->va_mask = va_mask;
3798
3799 if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3800 /*
3801 * Failed to get filesystem attributes.
3802 * Return a rdattr_error for each entry, but don't fail.
3803 * However, don't get any obj-dependent attrs.
3804 */
3805 sargp->rdattr_error = status; /* for rdattr_error */
3806 *need_to_lookup = FALSE;
3807 /*
3808 * At least get fileid for regular readdir output
3809 */
3810 sargp->vap->va_mask &= AT_NODEID;
3811 status = NFS4_OK;
3812 }
3813
3814 return (status);
3815 }
3816
3817 /*
3818 * readlink: args: CURRENT_FH.
3819 * res: status. If success - CURRENT_FH unchanged, return linktext.
3820 */
3821
3822 /* ARGSUSED */
3823 static void
3824 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3825 struct compound_state *cs)
3826 {
3827 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3828 int error;
3829 vnode_t *vp;
3830 struct iovec iov;
3831 struct vattr va;
3832 struct uio uio;
3833 char *data;
3834 struct sockaddr *ca;
3835 char *name = NULL;
3836 int is_referral;
3837
3838 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3839
3840 /* CURRENT_FH: directory */
3841 vp = cs->vp;
3842 if (vp == NULL) {
3843 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3844 goto out;
3845 }
3846
3847 if (cs->access == CS_ACCESS_DENIED) {
3848 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3849 goto out;
3850 }
3851
3852 /* Is it a referral? */
3853 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3854
3855 is_referral = 1;
3856
3857 } else {
3858
3859 is_referral = 0;
3860
3861 if (vp->v_type == VDIR) {
3862 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3863 goto out;
3864 }
3865
3866 if (vp->v_type != VLNK) {
3867 *cs->statusp = resp->status = NFS4ERR_INVAL;
3868 goto out;
3869 }
3870
3871 }
3872
3873 va.va_mask = AT_MODE;
3874 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3875 if (error) {
3876 *cs->statusp = resp->status = puterrno4(error);
3877 goto out;
3878 }
3879
3880 if (MANDLOCK(vp, va.va_mode)) {
3881 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3882 goto out;
3883 }
3884
3885 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3886
3887 if (is_referral) {
3888 char *s;
3889 size_t strsz;
3890
3891 /* Get an artificial symlink based on a referral */
3892 s = build_symlink(vp, cs->cr, &strsz);
3893 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3894 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3895 vnode_t *, vp, char *, s);
3896 if (s == NULL)
3897 error = EINVAL;
3898 else {
3899 error = 0;
3900 (void) strlcpy(data, s, MAXPATHLEN + 1);
3901 kmem_free(s, strsz);
3902 }
3903
3904 } else {
3905
3906 iov.iov_base = data;
3907 iov.iov_len = MAXPATHLEN;
3908 uio.uio_iov = &iov;
3909 uio.uio_iovcnt = 1;
3910 uio.uio_segflg = UIO_SYSSPACE;
3911 uio.uio_extflg = UIO_COPY_CACHED;
3912 uio.uio_loffset = 0;
3913 uio.uio_resid = MAXPATHLEN;
3914
3915 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3916
3917 if (!error)
3918 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3919 }
3920
3921 if (error) {
3922 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3923 *cs->statusp = resp->status = puterrno4(error);
3924 goto out;
3925 }
3926
3927 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3928 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3929 MAXPATHLEN + 1);
3930
3931 if (name == NULL) {
3932 /*
3933 * Even though the conversion failed, we return
3934 * something. We just don't translate it.
3935 */
3936 name = data;
3937 }
3938
3939 /*
3940 * treat link name as data
3941 */
3942 (void) str_to_utf8(name, (utf8string *)&resp->link);
3943
3944 if (name != data)
3945 kmem_free(name, MAXPATHLEN + 1);
3946 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3947 *cs->statusp = resp->status = NFS4_OK;
3948
3949 out:
3950 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3951 READLINK4res *, resp);
3952 }
3953
3954 static void
3955 rfs4_op_readlink_free(nfs_resop4 *resop)
3956 {
3957 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3958 utf8string *symlink = (utf8string *)&resp->link;
3959
3960 if (symlink->utf8string_val) {
3961 UTF8STRING_FREE(*symlink)
3962 }
3963 }
3964
3965 /*
3966 * release_lockowner:
3967 * Release any state associated with the supplied
3968 * lockowner. Note if any lo_state is holding locks we will not
3969 * rele that lo_state and thus the lockowner will not be destroyed.
3970 * A client using lock after the lock owner stateid has been released
3971 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3972 * to reissue the lock with new_lock_owner set to TRUE.
3973 * args: lock_owner
3974 * res: status
3975 */
3976 /* ARGSUSED */
3977 static void
3978 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3979 struct svc_req *req, struct compound_state *cs)
3980 {
3981 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3982 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3983 rfs4_lockowner_t *lo;
3984 rfs4_openowner_t *oo;
3985 rfs4_state_t *sp;
3986 rfs4_lo_state_t *lsp;
3987 rfs4_client_t *cp;
3988 bool_t create = FALSE;
3989 locklist_t *llist;
3990 sysid_t sysid;
3991
3992 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3993 cs, RELEASE_LOCKOWNER4args *, ap);
3994
3995 /* Make sure there is a clientid around for this request */
3996 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3997
3998 if (cp == NULL) {
3999 *cs->statusp = resp->status =
4000 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4001 goto out;
4002 }
4003 rfs4_client_rele(cp);
4004
4005 lo = rfs4_findlockowner(&ap->lock_owner, &create);
4006 if (lo == NULL) {
4007 *cs->statusp = resp->status = NFS4_OK;
4008 goto out;
4009 }
4010 ASSERT(lo->rl_client != NULL);
4011
4012 /*
4013 * Check for EXPIRED client. If so will reap state with in a lease
4014 * period or on next set_clientid_confirm step
4015 */
4016 if (rfs4_lease_expired(lo->rl_client)) {
4017 rfs4_lockowner_rele(lo);
4018 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4019 goto out;
4020 }
4021
4022 /*
4023 * If no sysid has been assigned, then no locks exist; just return.
4024 */
4025 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4026 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4027 rfs4_lockowner_rele(lo);
4028 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4029 goto out;
4030 }
4031
4032 sysid = lo->rl_client->rc_sysidt;
4033 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4034
4035 /*
4036 * Mark the lockowner invalid.
4037 */
4038 rfs4_dbe_hide(lo->rl_dbe);
4039
4040 /*
4041 * sysid-pid pair should now not be used since the lockowner is
4042 * invalid. If the client were to instantiate the lockowner again
4043 * it would be assigned a new pid. Thus we can get the list of
4044 * current locks.
4045 */
4046
4047 llist = flk_get_active_locks(sysid, lo->rl_pid);
4048 /* If we are still holding locks fail */
4049 if (llist != NULL) {
4050
4051 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4052
4053 flk_free_locklist(llist);
4054 /*
4055 * We need to unhide the lockowner so the client can
4056 * try it again. The bad thing here is if the client
4057 * has a logic error that took it here in the first place
4058 * they probably have lost accounting of the locks that it
4059 * is holding. So we may have dangling state until the
4060 * open owner state is reaped via close. One scenario
4061 * that could possibly occur is that the client has
4062 * sent the unlock request(s) in separate threads
4063 * and has not waited for the replies before sending the
4064 * RELEASE_LOCKOWNER request. Presumably, it would expect
4065 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4066 * reissuing the request.
4067 */
4068 rfs4_dbe_unhide(lo->rl_dbe);
4069 rfs4_lockowner_rele(lo);
4070 goto out;
4071 }
4072
4073 /*
4074 * For the corresponding client we need to check each open
4075 * owner for any opens that have lockowner state associated
4076 * with this lockowner.
4077 */
4078
4079 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4080 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4081 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4082
4083 rfs4_dbe_lock(oo->ro_dbe);
4084 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4085 sp = list_next(&oo->ro_statelist, sp)) {
4086
4087 rfs4_dbe_lock(sp->rs_dbe);
4088 for (lsp = list_head(&sp->rs_lostatelist);
4089 lsp != NULL;
4090 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4091 if (lsp->rls_locker == lo) {
4092 rfs4_dbe_lock(lsp->rls_dbe);
4093 rfs4_dbe_invalidate(lsp->rls_dbe);
4094 rfs4_dbe_unlock(lsp->rls_dbe);
4095 }
4096 }
4097 rfs4_dbe_unlock(sp->rs_dbe);
4098 }
4099 rfs4_dbe_unlock(oo->ro_dbe);
4100 }
4101 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4102
4103 rfs4_lockowner_rele(lo);
4104
4105 *cs->statusp = resp->status = NFS4_OK;
4106
4107 out:
4108 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4109 cs, RELEASE_LOCKOWNER4res *, resp);
4110 }
4111
4112 /*
4113 * short utility function to lookup a file and recall the delegation
4114 */
4115 static rfs4_file_t *
4116 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4117 int *lkup_error, cred_t *cr)
4118 {
4119 vnode_t *vp;
4120 rfs4_file_t *fp = NULL;
4121 bool_t fcreate = FALSE;
4122 int error;
4123
4124 if (vpp)
4125 *vpp = NULL;
4126
4127 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4128 NULL)) == 0) {
4129 if (vp->v_type == VREG)
4130 fp = rfs4_findfile(vp, NULL, &fcreate);
4131 if (vpp)
4132 *vpp = vp;
4133 else
4134 VN_RELE(vp);
4135 }
4136
4137 if (lkup_error)
4138 *lkup_error = error;
4139
4140 return (fp);
4141 }
4142
4143 /*
4144 * remove: args: CURRENT_FH: directory; name.
4145 * res: status. If success - CURRENT_FH unchanged, return change_info
4146 * for directory.
4147 */
4148 /* ARGSUSED */
4149 static void
4150 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4151 struct compound_state *cs)
4152 {
4153 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4154 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4155 int error;
4156 vnode_t *dvp, *vp;
4157 struct vattr bdva, idva, adva;
4158 char *nm;
4159 uint_t len;
4160 rfs4_file_t *fp;
4161 int in_crit = 0;
4162 bslabel_t *clabel;
4163 struct sockaddr *ca;
4164 char *name = NULL;
4165 nfsstat4 status;
4166
4167 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4168 REMOVE4args *, args);
4169
4170 /* CURRENT_FH: directory */
4171 dvp = cs->vp;
4172 if (dvp == NULL) {
4173 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4174 goto out;
4175 }
4176
4177 if (cs->access == CS_ACCESS_DENIED) {
4178 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4179 goto out;
4180 }
4181
4182 /*
4183 * If there is an unshared filesystem mounted on this vnode,
4184 * Do not allow to remove anything in this directory.
4185 */
4186 if (vn_ismntpt(dvp)) {
4187 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4188 goto out;
4189 }
4190
4191 if (dvp->v_type != VDIR) {
4192 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4193 goto out;
4194 }
4195
4196 status = utf8_dir_verify(&args->target);
4197 if (status != NFS4_OK) {
4198 *cs->statusp = resp->status = status;
4199 goto out;
4200 }
4201
4202 /*
4203 * Lookup the file so that we can check if it's a directory
4204 */
4205 nm = utf8_to_fn(&args->target, &len, NULL);
4206 if (nm == NULL) {
4207 *cs->statusp = resp->status = NFS4ERR_INVAL;
4208 goto out;
4209 }
4210
4211 if (len > MAXNAMELEN) {
4212 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4213 kmem_free(nm, len);
4214 goto out;
4215 }
4216
4217 if (rdonly4(req, cs)) {
4218 *cs->statusp = resp->status = NFS4ERR_ROFS;
4219 kmem_free(nm, len);
4220 goto out;
4221 }
4222
4223 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4224 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4225 MAXPATHLEN + 1);
4226
4227 if (name == NULL) {
4228 *cs->statusp = resp->status = NFS4ERR_INVAL;
4229 kmem_free(nm, len);
4230 goto out;
4231 }
4232
4233 /*
4234 * Lookup the file to determine type and while we are see if
4235 * there is a file struct around and check for delegation.
4236 * We don't need to acquire va_seq before this lookup, if
4237 * it causes an update, cinfo.before will not match, which will
4238 * trigger a cache flush even if atomic is TRUE.
4239 */
4240 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4241 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4242 NULL)) {
4243 VN_RELE(vp);
4244 rfs4_file_rele(fp);
4245 *cs->statusp = resp->status = NFS4ERR_DELAY;
4246 if (nm != name)
4247 kmem_free(name, MAXPATHLEN + 1);
4248 kmem_free(nm, len);
4249 goto out;
4250 }
4251 }
4252
4253 /* Didn't find anything to remove */
4254 if (vp == NULL) {
4255 *cs->statusp = resp->status = error;
4256 if (nm != name)
4257 kmem_free(name, MAXPATHLEN + 1);
4258 kmem_free(nm, len);
4259 goto out;
4260 }
4261
4262 if (nbl_need_check(vp)) {
4263 nbl_start_crit(vp, RW_READER);
4264 in_crit = 1;
4265 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4266 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4267 if (nm != name)
4268 kmem_free(name, MAXPATHLEN + 1);
4269 kmem_free(nm, len);
4270 nbl_end_crit(vp);
4271 VN_RELE(vp);
4272 if (fp) {
4273 rfs4_clear_dont_grant(fp);
4274 rfs4_file_rele(fp);
4275 }
4276 goto out;
4277 }
4278 }
4279
4280 /* check label before allowing removal */
4281 if (is_system_labeled()) {
4282 ASSERT(req->rq_label != NULL);
4283 clabel = req->rq_label;
4284 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4285 "got client label from request(1)",
4286 struct svc_req *, req);
4287 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4288 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4289 cs->exi)) {
4290 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4291 if (name != nm)
4292 kmem_free(name, MAXPATHLEN + 1);
4293 kmem_free(nm, len);
4294 if (in_crit)
4295 nbl_end_crit(vp);
4296 VN_RELE(vp);
4297 if (fp) {
4298 rfs4_clear_dont_grant(fp);
4299 rfs4_file_rele(fp);
4300 }
4301 goto out;
4302 }
4303 }
4304 }
4305
4306 /* Get dir "before" change value */
4307 bdva.va_mask = AT_CTIME|AT_SEQ;
4308 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4309 if (error) {
4310 *cs->statusp = resp->status = puterrno4(error);
4311 if (nm != name)
4312 kmem_free(name, MAXPATHLEN + 1);
4313 kmem_free(nm, len);
4314 if (in_crit)
4315 nbl_end_crit(vp);
4316 VN_RELE(vp);
4317 if (fp) {
4318 rfs4_clear_dont_grant(fp);
4319 rfs4_file_rele(fp);
4320 }
4321 goto out;
4322 }
4323 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4324
4325 /* Actually do the REMOVE operation */
4326 if (vp->v_type == VDIR) {
4327 /*
4328 * Can't remove a directory that has a mounted-on filesystem.
4329 */
4330 if (vn_ismntpt(vp)) {
4331 error = EACCES;
4332 } else {
4333 /*
4334 * System V defines rmdir to return EEXIST,
4335 * not ENOTEMPTY, if the directory is not
4336 * empty. A System V NFS server needs to map
4337 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4338 * transmit over the wire.
4339 */
4340 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4341 NULL, 0)) == EEXIST)
4342 error = ENOTEMPTY;
4343 }
4344 } else {
4345 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4346 fp != NULL) {
4347 struct vattr va;
4348 vnode_t *tvp;
4349
4350 rfs4_dbe_lock(fp->rf_dbe);
4351 tvp = fp->rf_vp;
4352 if (tvp)
4353 VN_HOLD(tvp);
4354 rfs4_dbe_unlock(fp->rf_dbe);
4355
4356 if (tvp) {
4357 /*
4358 * This is va_seq safe because we are not
4359 * manipulating dvp.
4360 */
4361 va.va_mask = AT_NLINK;
4362 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4363 va.va_nlink == 0) {
4364 /* Remove state on file remove */
4365 if (in_crit) {
4366 nbl_end_crit(vp);
4367 in_crit = 0;
4368 }
4369 rfs4_close_all_state(fp);
4370 }
4371 VN_RELE(tvp);
4372 }
4373 }
4374 }
4375
4376 if (in_crit)
4377 nbl_end_crit(vp);
4378 VN_RELE(vp);
4379
4380 if (fp) {
4381 rfs4_clear_dont_grant(fp);
4382 rfs4_file_rele(fp);
4383 }
4384 if (nm != name)
4385 kmem_free(name, MAXPATHLEN + 1);
4386 kmem_free(nm, len);
4387
4388 if (error) {
4389 *cs->statusp = resp->status = puterrno4(error);
4390 goto out;
4391 }
4392
4393 /*
4394 * Get the initial "after" sequence number, if it fails, set to zero
4395 */
4396 idva.va_mask = AT_SEQ;
4397 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4398 idva.va_seq = 0;
4399
4400 /*
4401 * Force modified data and metadata out to stable storage.
4402 */
4403 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4404
4405 /*
4406 * Get "after" change value, if it fails, simply return the
4407 * before value.
4408 */
4409 adva.va_mask = AT_CTIME|AT_SEQ;
4410 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4411 adva.va_ctime = bdva.va_ctime;
4412 adva.va_seq = 0;
4413 }
4414
4415 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4416
4417 /*
4418 * The cinfo.atomic = TRUE only if we have
4419 * non-zero va_seq's, and it has incremented by exactly one
4420 * during the VOP_REMOVE/RMDIR and it didn't change during
4421 * the VOP_FSYNC.
4422 */
4423 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4424 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4425 resp->cinfo.atomic = TRUE;
4426 else
4427 resp->cinfo.atomic = FALSE;
4428
4429 *cs->statusp = resp->status = NFS4_OK;
4430
4431 out:
4432 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4433 REMOVE4res *, resp);
4434 }
4435
4436 /*
4437 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4438 * oldname and newname.
4439 * res: status. If success - CURRENT_FH unchanged, return change_info
4440 * for both from and target directories.
4441 */
4442 /* ARGSUSED */
4443 static void
4444 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4445 struct compound_state *cs)
4446 {
4447 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4448 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4449 int error;
4450 vnode_t *odvp;
4451 vnode_t *ndvp;
4452 vnode_t *srcvp, *targvp, *tvp;
4453 struct vattr obdva, oidva, oadva;
4454 struct vattr nbdva, nidva, nadva;
4455 char *onm, *nnm;
4456 uint_t olen, nlen;
4457 rfs4_file_t *fp, *sfp;
4458 int in_crit_src, in_crit_targ;
4459 int fp_rele_grant_hold, sfp_rele_grant_hold;
4460 int unlinked;
4461 bslabel_t *clabel;
4462 struct sockaddr *ca;
4463 char *converted_onm = NULL;
4464 char *converted_nnm = NULL;
4465 nfsstat4 status;
4466
4467 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4468 RENAME4args *, args);
4469
4470 fp = sfp = NULL;
4471 srcvp = targvp = tvp = NULL;
4472 in_crit_src = in_crit_targ = 0;
4473 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4474 unlinked = 0;
4475
4476 /* CURRENT_FH: target directory */
4477 ndvp = cs->vp;
4478 if (ndvp == NULL) {
4479 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4480 goto out;
4481 }
4482
4483 /* SAVED_FH: from directory */
4484 odvp = cs->saved_vp;
4485 if (odvp == NULL) {
4486 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4487 goto out;
4488 }
4489
4490 if (cs->access == CS_ACCESS_DENIED) {
4491 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4492 goto out;
4493 }
4494
4495 /*
4496 * If there is an unshared filesystem mounted on this vnode,
4497 * do not allow to rename objects in this directory.
4498 */
4499 if (vn_ismntpt(odvp)) {
4500 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4501 goto out;
4502 }
4503
4504 /*
4505 * If there is an unshared filesystem mounted on this vnode,
4506 * do not allow to rename to this directory.
4507 */
4508 if (vn_ismntpt(ndvp)) {
4509 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4510 goto out;
4511 }
4512
4513 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4514 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4515 goto out;
4516 }
4517
4518 if (cs->saved_exi != cs->exi) {
4519 *cs->statusp = resp->status = NFS4ERR_XDEV;
4520 goto out;
4521 }
4522
4523 status = utf8_dir_verify(&args->oldname);
4524 if (status != NFS4_OK) {
4525 *cs->statusp = resp->status = status;
4526 goto out;
4527 }
4528
4529 status = utf8_dir_verify(&args->newname);
4530 if (status != NFS4_OK) {
4531 *cs->statusp = resp->status = status;
4532 goto out;
4533 }
4534
4535 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4536 if (onm == NULL) {
4537 *cs->statusp = resp->status = NFS4ERR_INVAL;
4538 goto out;
4539 }
4540 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4541 nlen = MAXPATHLEN + 1;
4542 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4543 nlen);
4544
4545 if (converted_onm == NULL) {
4546 *cs->statusp = resp->status = NFS4ERR_INVAL;
4547 kmem_free(onm, olen);
4548 goto out;
4549 }
4550
4551 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4552 if (nnm == NULL) {
4553 *cs->statusp = resp->status = NFS4ERR_INVAL;
4554 if (onm != converted_onm)
4555 kmem_free(converted_onm, MAXPATHLEN + 1);
4556 kmem_free(onm, olen);
4557 goto out;
4558 }
4559 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4560 MAXPATHLEN + 1);
4561
4562 if (converted_nnm == NULL) {
4563 *cs->statusp = resp->status = NFS4ERR_INVAL;
4564 kmem_free(nnm, nlen);
4565 nnm = NULL;
4566 if (onm != converted_onm)
4567 kmem_free(converted_onm, MAXPATHLEN + 1);
4568 kmem_free(onm, olen);
4569 goto out;
4570 }
4571
4572
4573 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4574 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4575 kmem_free(onm, olen);
4576 kmem_free(nnm, nlen);
4577 goto out;
4578 }
4579
4580
4581 if (rdonly4(req, cs)) {
4582 *cs->statusp = resp->status = NFS4ERR_ROFS;
4583 if (onm != converted_onm)
4584 kmem_free(converted_onm, MAXPATHLEN + 1);
4585 kmem_free(onm, olen);
4586 if (nnm != converted_nnm)
4587 kmem_free(converted_nnm, MAXPATHLEN + 1);
4588 kmem_free(nnm, nlen);
4589 goto out;
4590 }
4591
4592 /* check label of the target dir */
4593 if (is_system_labeled()) {
4594 ASSERT(req->rq_label != NULL);
4595 clabel = req->rq_label;
4596 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4597 "got client label from request(1)",
4598 struct svc_req *, req);
4599 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4600 if (!do_rfs_label_check(clabel, ndvp,
4601 EQUALITY_CHECK, cs->exi)) {
4602 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4603 goto err_out;
4604 }
4605 }
4606 }
4607
4608 /*
4609 * Is the source a file and have a delegation?
4610 * We don't need to acquire va_seq before these lookups, if
4611 * it causes an update, cinfo.before will not match, which will
4612 * trigger a cache flush even if atomic is TRUE.
4613 */
4614 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4615 &error, cs->cr)) {
4616 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4617 NULL)) {
4618 *cs->statusp = resp->status = NFS4ERR_DELAY;
4619 goto err_out;
4620 }
4621 }
4622
4623 if (srcvp == NULL) {
4624 *cs->statusp = resp->status = puterrno4(error);
4625 if (onm != converted_onm)
4626 kmem_free(converted_onm, MAXPATHLEN + 1);
4627 kmem_free(onm, olen);
4628 if (nnm != converted_nnm)
4629 kmem_free(converted_nnm, MAXPATHLEN + 1);
4630 kmem_free(nnm, nlen);
4631 goto out;
4632 }
4633
4634 sfp_rele_grant_hold = 1;
4635
4636 /* Does the destination exist and a file and have a delegation? */
4637 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4638 NULL, cs->cr)) {
4639 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4640 NULL)) {
4641 *cs->statusp = resp->status = NFS4ERR_DELAY;
4642 goto err_out;
4643 }
4644 }
4645 fp_rele_grant_hold = 1;
4646
4647 /* Check for NBMAND lock on both source and target */
4648 if (nbl_need_check(srcvp)) {
4649 nbl_start_crit(srcvp, RW_READER);
4650 in_crit_src = 1;
4651 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4652 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4653 goto err_out;
4654 }
4655 }
4656
4657 if (targvp && nbl_need_check(targvp)) {
4658 nbl_start_crit(targvp, RW_READER);
4659 in_crit_targ = 1;
4660 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4661 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4662 goto err_out;
4663 }
4664 }
4665
4666 /* Get source "before" change value */
4667 obdva.va_mask = AT_CTIME|AT_SEQ;
4668 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4669 if (!error) {
4670 nbdva.va_mask = AT_CTIME|AT_SEQ;
4671 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4672 }
4673 if (error) {
4674 *cs->statusp = resp->status = puterrno4(error);
4675 goto err_out;
4676 }
4677
4678 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4679 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4680
4681 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4682 NULL, 0);
4683
4684 /*
4685 * If target existed and was unlinked by VOP_RENAME, state will need
4686 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4687 * any necessary nbl_end_crit on srcvp and tgtvp.
4688 */
4689 if (error == 0 && fp != NULL) {
4690 rfs4_dbe_lock(fp->rf_dbe);
4691 tvp = fp->rf_vp;
4692 if (tvp)
4693 VN_HOLD(tvp);
4694 rfs4_dbe_unlock(fp->rf_dbe);
4695
4696 if (tvp) {
4697 struct vattr va;
4698 va.va_mask = AT_NLINK;
4699
4700 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4701 va.va_nlink == 0) {
4702 unlinked = 1;
4703
4704 /* DEBUG data */
4705 if ((srcvp == targvp) || (tvp != targvp)) {
4706 cmn_err(CE_WARN, "rfs4_op_rename: "
4707 "srcvp %p, targvp: %p, tvp: %p",
4708 (void *)srcvp, (void *)targvp,
4709 (void *)tvp);
4710 }
4711 } else {
4712 VN_RELE(tvp);
4713 }
4714 }
4715 }
4716 if (error == 0)
4717 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4718
4719 if (in_crit_src)
4720 nbl_end_crit(srcvp);
4721 if (srcvp)
4722 VN_RELE(srcvp);
4723 if (in_crit_targ)
4724 nbl_end_crit(targvp);
4725 if (targvp)
4726 VN_RELE(targvp);
4727
4728 if (unlinked) {
4729 ASSERT(fp != NULL);
4730 ASSERT(tvp != NULL);
4731
4732 /* DEBUG data */
4733 if (RW_READ_HELD(&tvp->v_nbllock)) {
4734 cmn_err(CE_WARN, "rfs4_op_rename: "
4735 "RW_READ_HELD(%p)", (void *)tvp);
4736 }
4737
4738 /* The file is gone and so should the state */
4739 rfs4_close_all_state(fp);
4740 VN_RELE(tvp);
4741 }
4742
4743 if (sfp) {
4744 rfs4_clear_dont_grant(sfp);
4745 rfs4_file_rele(sfp);
4746 }
4747 if (fp) {
4748 rfs4_clear_dont_grant(fp);
4749 rfs4_file_rele(fp);
4750 }
4751
4752 if (converted_onm != onm)
4753 kmem_free(converted_onm, MAXPATHLEN + 1);
4754 kmem_free(onm, olen);
4755 if (converted_nnm != nnm)
4756 kmem_free(converted_nnm, MAXPATHLEN + 1);
4757 kmem_free(nnm, nlen);
4758
4759 /*
4760 * Get the initial "after" sequence number, if it fails, set to zero
4761 */
4762 oidva.va_mask = AT_SEQ;
4763 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4764 oidva.va_seq = 0;
4765
4766 nidva.va_mask = AT_SEQ;
4767 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4768 nidva.va_seq = 0;
4769
4770 /*
4771 * Force modified data and metadata out to stable storage.
4772 */
4773 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4774 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4775
4776 if (error) {
4777 *cs->statusp = resp->status = puterrno4(error);
4778 goto out;
4779 }
4780
4781 /*
4782 * Get "after" change values, if it fails, simply return the
4783 * before value.
4784 */
4785 oadva.va_mask = AT_CTIME|AT_SEQ;
4786 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4787 oadva.va_ctime = obdva.va_ctime;
4788 oadva.va_seq = 0;
4789 }
4790
4791 nadva.va_mask = AT_CTIME|AT_SEQ;
4792 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4793 nadva.va_ctime = nbdva.va_ctime;
4794 nadva.va_seq = 0;
4795 }
4796
4797 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4798 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4799
4800 /*
4801 * The cinfo.atomic = TRUE only if we have
4802 * non-zero va_seq's, and it has incremented by exactly one
4803 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4804 */
4805 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4806 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4807 resp->source_cinfo.atomic = TRUE;
4808 else
4809 resp->source_cinfo.atomic = FALSE;
4810
4811 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4812 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4813 resp->target_cinfo.atomic = TRUE;
4814 else
4815 resp->target_cinfo.atomic = FALSE;
4816
4817 #ifdef VOLATILE_FH_TEST
4818 {
4819 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4820
4821 /*
4822 * Add the renamed file handle to the volatile rename list
4823 */
4824 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4825 /* file handles may expire on rename */
4826 vnode_t *vp;
4827
4828 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4829 /*
4830 * Already know that nnm will be a valid string
4831 */
4832 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4833 NULL, NULL, NULL);
4834 kmem_free(nnm, nlen);
4835 if (!error) {
4836 add_volrnm_fh(cs->exi, vp);
4837 VN_RELE(vp);
4838 }
4839 }
4840 }
4841 #endif /* VOLATILE_FH_TEST */
4842
4843 *cs->statusp = resp->status = NFS4_OK;
4844 out:
4845 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4846 RENAME4res *, resp);
4847 return;
4848
4849 err_out:
4850 if (onm != converted_onm)
4851 kmem_free(converted_onm, MAXPATHLEN + 1);
4852 if (onm != NULL)
4853 kmem_free(onm, olen);
4854 if (nnm != converted_nnm)
4855 kmem_free(converted_nnm, MAXPATHLEN + 1);
4856 if (nnm != NULL)
4857 kmem_free(nnm, nlen);
4858
4859 if (in_crit_src) nbl_end_crit(srcvp);
4860 if (in_crit_targ) nbl_end_crit(targvp);
4861 if (targvp) VN_RELE(targvp);
4862 if (srcvp) VN_RELE(srcvp);
4863 if (sfp) {
4864 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4865 rfs4_file_rele(sfp);
4866 }
4867 if (fp) {
4868 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4869 rfs4_file_rele(fp);
4870 }
4871
4872 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4873 RENAME4res *, resp);
4874 }
4875
4876 /* ARGSUSED */
4877 static void
4878 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4879 struct compound_state *cs)
4880 {
4881 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4882 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4883 rfs4_client_t *cp;
4884
4885 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4886 RENEW4args *, args);
4887
4888 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4889 *cs->statusp = resp->status =
4890 rfs4_check_clientid(&args->clientid, 0);
4891 goto out;
4892 }
4893
4894 if (rfs4_lease_expired(cp)) {
4895 rfs4_client_rele(cp);
4896 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4897 goto out;
4898 }
4899
4900 rfs4_update_lease(cp);
4901
4902 mutex_enter(cp->rc_cbinfo.cb_lock);
4903 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4904 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4905 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4906 } else {
4907 *cs->statusp = resp->status = NFS4_OK;
4908 }
4909 mutex_exit(cp->rc_cbinfo.cb_lock);
4910
4911 rfs4_client_rele(cp);
4912
4913 out:
4914 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4915 RENEW4res *, resp);
4916 }
4917
4918 /* ARGSUSED */
4919 static void
4920 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4921 struct compound_state *cs)
4922 {
4923 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4924
4925 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4926
4927 /* No need to check cs->access - we are not accessing any object */
4928 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4929 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4930 goto out;
4931 }
4932 if (cs->vp != NULL) {
4933 VN_RELE(cs->vp);
4934 }
4935 cs->vp = cs->saved_vp;
4936 cs->saved_vp = NULL;
4937 cs->exi = cs->saved_exi;
4938 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4939 *cs->statusp = resp->status = NFS4_OK;
4940 cs->deleg = FALSE;
4941
4942 out:
4943 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4944 RESTOREFH4res *, resp);
4945 }
4946
4947 /* ARGSUSED */
4948 static void
4949 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4950 struct compound_state *cs)
4951 {
4952 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4953
4954 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4955
4956 /* No need to check cs->access - we are not accessing any object */
4957 if (cs->vp == NULL) {
4958 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4959 goto out;
4960 }
4961 if (cs->saved_vp != NULL) {
4962 VN_RELE(cs->saved_vp);
4963 }
4964 cs->saved_vp = cs->vp;
4965 VN_HOLD(cs->saved_vp);
4966 cs->saved_exi = cs->exi;
4967 /*
4968 * since SAVEFH is fairly rare, don't alloc space for its fh
4969 * unless necessary.
4970 */
4971 if (cs->saved_fh.nfs_fh4_val == NULL) {
4972 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4973 }
4974 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4975 *cs->statusp = resp->status = NFS4_OK;
4976
4977 out:
4978 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4979 SAVEFH4res *, resp);
4980 }
4981
4982 /*
4983 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4984 * return the bitmap of attrs that were set successfully. It is also
4985 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4986 * always be called only after rfs4_do_set_attrs().
4987 *
4988 * Verify that the attributes are same as the expected ones. sargp->vap
4989 * and sargp->sbp contain the input attributes as translated from fattr4.
4990 *
4991 * This function verifies only the attrs that correspond to a vattr or
4992 * vfsstat struct. That is because of the extra step needed to get the
4993 * corresponding system structs. Other attributes have already been set or
4994 * verified by do_rfs4_set_attrs.
4995 *
4996 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4997 */
4998 static int
4999 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5000 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5001 {
5002 int error, ret_error = 0;
5003 int i, k;
5004 uint_t sva_mask = sargp->vap->va_mask;
5005 uint_t vbit;
5006 union nfs4_attr_u *na;
5007 uint8_t *amap;
5008 bool_t getsb = ntovp->vfsstat;
5009
5010 if (sva_mask != 0) {
5011 /*
5012 * Okay to overwrite sargp->vap because we verify based
5013 * on the incoming values.
5014 */
5015 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5016 sargp->cs->cr, NULL);
5017 if (ret_error) {
5018 if (resp == NULL)
5019 return (ret_error);
5020 /*
5021 * Must return bitmap of successful attrs
5022 */
5023 sva_mask = 0; /* to prevent checking vap later */
5024 } else {
5025 /*
5026 * Some file systems clobber va_mask. it is probably
5027 * wrong of them to do so, nonethless we practice
5028 * defensive coding.
5029 * See bug id 4276830.
5030 */
5031 sargp->vap->va_mask = sva_mask;
5032 }
5033 }
5034
5035 if (getsb) {
5036 /*
5037 * Now get the superblock and loop on the bitmap, as there is
5038 * no simple way of translating from superblock to bitmap4.
5039 */
5040 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5041 if (ret_error) {
5042 if (resp == NULL)
5043 goto errout;
5044 getsb = FALSE;
5045 }
5046 }
5047
5048 /*
5049 * Now loop and verify each attribute which getattr returned
5050 * whether it's the same as the input.
5051 */
5052 if (resp == NULL && !getsb && (sva_mask == 0))
5053 goto errout;
5054
5055 na = ntovp->na;
5056 amap = ntovp->amap;
5057 k = 0;
5058 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5059 k = *amap;
5060 ASSERT(nfs4_ntov_map[k].nval == k);
5061 vbit = nfs4_ntov_map[k].vbit;
5062
5063 /*
5064 * If vattr attribute but VOP_GETATTR failed, or it's
5065 * superblock attribute but VFS_STATVFS failed, skip
5066 */
5067 if (vbit) {
5068 if ((vbit & sva_mask) == 0)
5069 continue;
5070 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5071 continue;
5072 }
5073 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5074 if (resp != NULL) {
5075 if (error)
5076 ret_error = -1; /* not all match */
5077 else /* update response bitmap */
5078 *resp |= nfs4_ntov_map[k].fbit;
5079 continue;
5080 }
5081 if (error) {
5082 ret_error = -1; /* not all match */
5083 break;
5084 }
5085 }
5086 errout:
5087 return (ret_error);
5088 }
5089
5090 /*
5091 * Decode the attribute to be set/verified. If the attr requires a sys op
5092 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5093 * call the sv_getit function for it, because the sys op hasn't yet been done.
5094 * Return 0 for success, error code if failed.
5095 *
5096 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5097 */
5098 static int
5099 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5100 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5101 {
5102 int error = 0;
5103 bool_t set_later;
5104
5105 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5106
5107 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5108 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5109 /*
5110 * don't verify yet if a vattr or sb dependent attr,
5111 * because we don't have their sys values yet.
5112 * Will be done later.
5113 */
5114 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5115 /*
5116 * ACLs are a special case, since setting the MODE
5117 * conflicts with setting the ACL. We delay setting
5118 * the ACL until all other attributes have been set.
5119 * The ACL gets set in do_rfs4_op_setattr().
5120 */
5121 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5122 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5123 sargp, nap);
5124 if (error) {
5125 xdr_free(nfs4_ntov_map[k].xfunc,
5126 (caddr_t)nap);
5127 }
5128 }
5129 }
5130 } else {
5131 #ifdef DEBUG
5132 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5133 "decoding attribute %d\n", k);
5134 #endif
5135 error = EINVAL;
5136 }
5137 if (!error && resp_bval && !set_later) {
5138 *resp_bval |= nfs4_ntov_map[k].fbit;
5139 }
5140
5141 return (error);
5142 }
5143
5144 /*
5145 * Set vattr based on incoming fattr4 attrs - used by setattr.
5146 * Set response mask. Ignore any values that are not writable vattr attrs.
5147 */
5148 static nfsstat4
5149 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5150 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5151 nfs4_attr_cmd_t cmd)
5152 {
5153 int error = 0;
5154 int i;
5155 char *attrs = fattrp->attrlist4;
5156 uint32_t attrslen = fattrp->attrlist4_len;
5157 XDR xdr;
5158 nfsstat4 status = NFS4_OK;
5159 vnode_t *vp = cs->vp;
5160 union nfs4_attr_u *na;
5161 uint8_t *amap;
5162
5163 #ifndef lint
5164 /*
5165 * Make sure that maximum attribute number can be expressed as an
5166 * 8 bit quantity.
5167 */
5168 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5169 #endif
5170
5171 if (vp == NULL) {
5172 if (resp)
5173 *resp = 0;
5174 return (NFS4ERR_NOFILEHANDLE);
5175 }
5176 if (cs->access == CS_ACCESS_DENIED) {
5177 if (resp)
5178 *resp = 0;
5179 return (NFS4ERR_ACCESS);
5180 }
5181
5182 sargp->op = cmd;
5183 sargp->cs = cs;
5184 sargp->flag = 0; /* may be set later */
5185 sargp->vap->va_mask = 0;
5186 sargp->rdattr_error = NFS4_OK;
5187 sargp->rdattr_error_req = FALSE;
5188 /* sargp->sbp is set by the caller */
5189
5190 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5191
5192 na = ntovp->na;
5193 amap = ntovp->amap;
5194
5195 /*
5196 * The following loop iterates on the nfs4_ntov_map checking
5197 * if the fbit is set in the requested bitmap.
5198 * If set then we process the arguments using the
5199 * rfs4_fattr4 conversion functions to populate the setattr
5200 * vattr and va_mask. Any settable attrs that are not using vattr
5201 * will be set in this loop.
5202 */
5203 for (i = 0; i < nfs4_ntov_map_size; i++) {
5204 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5205 continue;
5206 }
5207 /*
5208 * If setattr, must be a writable attr.
5209 * If verify/nverify, must be a readable attr.
5210 */
5211 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5212 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5213 /*
5214 * Client tries to set/verify an
5215 * unsupported attribute, tries to set
5216 * a read only attr or verify a write
5217 * only one - error!
5218 */
5219 break;
5220 }
5221 /*
5222 * Decode the attribute to set/verify
5223 */
5224 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5225 &xdr, resp ? resp : NULL, na);
5226 if (error)
5227 break;
5228 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5229 na++;
5230 (ntovp->attrcnt)++;
5231 if (nfs4_ntov_map[i].vfsstat)
5232 ntovp->vfsstat = TRUE;
5233 }
5234
5235 if (error != 0)
5236 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5237 puterrno4(error));
5238 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5239 return (status);
5240 }
5241
5242 static nfsstat4
5243 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5244 stateid4 *stateid)
5245 {
5246 int error = 0;
5247 struct nfs4_svgetit_arg sarg;
5248 bool_t trunc;
5249
5250 nfsstat4 status = NFS4_OK;
5251 cred_t *cr = cs->cr;
5252 vnode_t *vp = cs->vp;
5253 struct nfs4_ntov_table ntov;
5254 struct statvfs64 sb;
5255 struct vattr bva;
5256 struct flock64 bf;
5257 int in_crit = 0;
5258 uint_t saved_mask = 0;
5259 caller_context_t ct;
5260
5261 *resp = 0;
5262 sarg.sbp = &sb;
5263 sarg.is_referral = B_FALSE;
5264 nfs4_ntov_table_init(&ntov);
5265 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5266 NFS4ATTR_SETIT);
5267 if (status != NFS4_OK) {
5268 /*
5269 * failed set attrs
5270 */
5271 goto done;
5272 }
5273 if ((sarg.vap->va_mask == 0) &&
5274 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5275 /*
5276 * no further work to be done
5277 */
5278 goto done;
5279 }
5280
5281 /*
5282 * If we got a request to set the ACL and the MODE, only
5283 * allow changing VSUID, VSGID, and VSVTX. Attempting
5284 * to change any other bits, along with setting an ACL,
5285 * gives NFS4ERR_INVAL.
5286 */
5287 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5288 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5289 vattr_t va;
5290
5291 va.va_mask = AT_MODE;
5292 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5293 if (error) {
5294 status = puterrno4(error);
5295 goto done;
5296 }
5297 if ((sarg.vap->va_mode ^ va.va_mode) &
5298 ~(VSUID | VSGID | VSVTX)) {
5299 status = NFS4ERR_INVAL;
5300 goto done;
5301 }
5302 }
5303
5304 /* Check stateid only if size has been set */
5305 if (sarg.vap->va_mask & AT_SIZE) {
5306 trunc = (sarg.vap->va_size == 0);
5307 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5308 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5309 if (status != NFS4_OK)
5310 goto done;
5311 } else {
5312 ct.cc_sysid = 0;
5313 ct.cc_pid = 0;
5314 ct.cc_caller_id = nfs4_srv_caller_id;
5315 ct.cc_flags = CC_DONTBLOCK;
5316 }
5317
5318 /* XXX start of possible race with delegations */
5319
5320 /*
5321 * We need to specially handle size changes because it is
5322 * possible for the client to create a file with read-only
5323 * modes, but with the file opened for writing. If the client
5324 * then tries to set the file size, e.g. ftruncate(3C),
5325 * fcntl(F_FREESP), the normal access checking done in
5326 * VOP_SETATTR would prevent the client from doing it even though
5327 * it should be allowed to do so. To get around this, we do the
5328 * access checking for ourselves and use VOP_SPACE which doesn't
5329 * do the access checking.
5330 * Also the client should not be allowed to change the file
5331 * size if there is a conflicting non-blocking mandatory lock in
5332 * the region of the change.
5333 */
5334 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5335 u_offset_t offset;
5336 ssize_t length;
5337
5338 /*
5339 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5340 * before returning, sarg.vap->va_mask is used to
5341 * generate the setattr reply bitmap. We also clear
5342 * AT_SIZE below before calling VOP_SPACE. For both
5343 * of these cases, the va_mask needs to be saved here
5344 * and restored after calling VOP_SETATTR.
5345 */
5346 saved_mask = sarg.vap->va_mask;
5347
5348 /*
5349 * Check any possible conflict due to NBMAND locks.
5350 * Get into critical region before VOP_GETATTR, so the
5351 * size attribute is valid when checking conflicts.
5352 */
5353 if (nbl_need_check(vp)) {
5354 nbl_start_crit(vp, RW_READER);
5355 in_crit = 1;
5356 }
5357
5358 bva.va_mask = AT_UID|AT_SIZE;
5359 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5360 status = puterrno4(error);
5361 goto done;
5362 }
5363
5364 if (in_crit) {
5365 if (sarg.vap->va_size < bva.va_size) {
5366 offset = sarg.vap->va_size;
5367 length = bva.va_size - sarg.vap->va_size;
5368 } else {
5369 offset = bva.va_size;
5370 length = sarg.vap->va_size - bva.va_size;
5371 }
5372 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5373 &ct)) {
5374 status = NFS4ERR_LOCKED;
5375 goto done;
5376 }
5377 }
5378
5379 if (crgetuid(cr) == bva.va_uid) {
5380 sarg.vap->va_mask &= ~AT_SIZE;
5381 bf.l_type = F_WRLCK;
5382 bf.l_whence = 0;
5383 bf.l_start = (off64_t)sarg.vap->va_size;
5384 bf.l_len = 0;
5385 bf.l_sysid = 0;
5386 bf.l_pid = 0;
5387 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5388 (offset_t)sarg.vap->va_size, cr, &ct);
5389 }
5390 }
5391
5392 if (!error && sarg.vap->va_mask != 0)
5393 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5394
5395 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5396 if (saved_mask & AT_SIZE)
5397 sarg.vap->va_mask |= AT_SIZE;
5398
5399 /*
5400 * If an ACL was being set, it has been delayed until now,
5401 * in order to set the mode (via the VOP_SETATTR() above) first.
5402 */
5403 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5404 int i;
5405
5406 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5407 if (ntov.amap[i] == FATTR4_ACL)
5408 break;
5409 if (i < NFS4_MAXNUM_ATTRS) {
5410 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5411 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5412 if (error == 0) {
5413 *resp |= FATTR4_ACL_MASK;
5414 } else if (error == ENOTSUP) {
5415 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5416 status = NFS4ERR_ATTRNOTSUPP;
5417 goto done;
5418 }
5419 } else {
5420 NFS4_DEBUG(rfs4_debug,
5421 (CE_NOTE, "do_rfs4_op_setattr: "
5422 "unable to find ACL in fattr4"));
5423 error = EINVAL;
5424 }
5425 }
5426
5427 if (error) {
5428 /* check if a monitor detected a delegation conflict */
5429 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5430 status = NFS4ERR_DELAY;
5431 else
5432 status = puterrno4(error);
5433
5434 /*
5435 * Set the response bitmap when setattr failed.
5436 * If VOP_SETATTR partially succeeded, test by doing a
5437 * VOP_GETATTR on the object and comparing the data
5438 * to the setattr arguments.
5439 */
5440 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5441 } else {
5442 /*
5443 * Force modified metadata out to stable storage.
5444 */
5445 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5446 /*
5447 * Set response bitmap
5448 */
5449 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5450 }
5451
5452 /* Return early and already have a NFSv4 error */
5453 done:
5454 /*
5455 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5456 * conversion sets both readable and writeable NFS4 attrs
5457 * for AT_MTIME and AT_ATIME. The line below masks out
5458 * unrequested attrs from the setattr result bitmap. This
5459 * is placed after the done: label to catch the ATTRNOTSUP
5460 * case.
5461 */
5462 *resp &= fattrp->attrmask;
5463
5464 if (in_crit)
5465 nbl_end_crit(vp);
5466
5467 nfs4_ntov_table_free(&ntov, &sarg);
5468
5469 return (status);
5470 }
5471
5472 /* ARGSUSED */
5473 static void
5474 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5475 struct compound_state *cs)
5476 {
5477 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5478 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5479 bslabel_t *clabel;
5480
5481 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5482 SETATTR4args *, args);
5483
5484 if (cs->vp == NULL) {
5485 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5486 goto out;
5487 }
5488
5489 /*
5490 * If there is an unshared filesystem mounted on this vnode,
5491 * do not allow to setattr on this vnode.
5492 */
5493 if (vn_ismntpt(cs->vp)) {
5494 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5495 goto out;
5496 }
5497
5498 resp->attrsset = 0;
5499
5500 if (rdonly4(req, cs)) {
5501 *cs->statusp = resp->status = NFS4ERR_ROFS;
5502 goto out;
5503 }
5504
5505 /* check label before setting attributes */
5506 if (is_system_labeled()) {
5507 ASSERT(req->rq_label != NULL);
5508 clabel = req->rq_label;
5509 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5510 "got client label from request(1)",
5511 struct svc_req *, req);
5512 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5513 if (!do_rfs_label_check(clabel, cs->vp,
5514 EQUALITY_CHECK, cs->exi)) {
5515 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5516 goto out;
5517 }
5518 }
5519 }
5520
5521 *cs->statusp = resp->status =
5522 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5523 &args->stateid);
5524
5525 out:
5526 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5527 SETATTR4res *, resp);
5528 }
5529
5530 /* ARGSUSED */
5531 static void
5532 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5533 struct compound_state *cs)
5534 {
5535 /*
5536 * verify and nverify are exactly the same, except that nverify
5537 * succeeds when some argument changed, and verify succeeds when
5538 * when none changed.
5539 */
5540
5541 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5542 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5543
5544 int error;
5545 struct nfs4_svgetit_arg sarg;
5546 struct statvfs64 sb;
5547 struct nfs4_ntov_table ntov;
5548
5549 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5550 VERIFY4args *, args);
5551
5552 if (cs->vp == NULL) {
5553 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5554 goto out;
5555 }
5556
5557 sarg.sbp = &sb;
5558 sarg.is_referral = B_FALSE;
5559 nfs4_ntov_table_init(&ntov);
5560 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5561 &sarg, &ntov, NFS4ATTR_VERIT);
5562 if (resp->status != NFS4_OK) {
5563 /*
5564 * do_rfs4_set_attrs will try to verify systemwide attrs,
5565 * so could return -1 for "no match".
5566 */
5567 if (resp->status == -1)
5568 resp->status = NFS4ERR_NOT_SAME;
5569 goto done;
5570 }
5571 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5572 switch (error) {
5573 case 0:
5574 resp->status = NFS4_OK;
5575 break;
5576 case -1:
5577 resp->status = NFS4ERR_NOT_SAME;
5578 break;
5579 default:
5580 resp->status = puterrno4(error);
5581 break;
5582 }
5583 done:
5584 *cs->statusp = resp->status;
5585 nfs4_ntov_table_free(&ntov, &sarg);
5586 out:
5587 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5588 VERIFY4res *, resp);
5589 }
5590
5591 /* ARGSUSED */
5592 static void
5593 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5594 struct compound_state *cs)
5595 {
5596 /*
5597 * verify and nverify are exactly the same, except that nverify
5598 * succeeds when some argument changed, and verify succeeds when
5599 * when none changed.
5600 */
5601
5602 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5603 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5604
5605 int error;
5606 struct nfs4_svgetit_arg sarg;
5607 struct statvfs64 sb;
5608 struct nfs4_ntov_table ntov;
5609
5610 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5611 NVERIFY4args *, args);
5612
5613 if (cs->vp == NULL) {
5614 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5615 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5616 NVERIFY4res *, resp);
5617 return;
5618 }
5619 sarg.sbp = &sb;
5620 sarg.is_referral = B_FALSE;
5621 nfs4_ntov_table_init(&ntov);
5622 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5623 &sarg, &ntov, NFS4ATTR_VERIT);
5624 if (resp->status != NFS4_OK) {
5625 /*
5626 * do_rfs4_set_attrs will try to verify systemwide attrs,
5627 * so could return -1 for "no match".
5628 */
5629 if (resp->status == -1)
5630 resp->status = NFS4_OK;
5631 goto done;
5632 }
5633 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5634 switch (error) {
5635 case 0:
5636 resp->status = NFS4ERR_SAME;
5637 break;
5638 case -1:
5639 resp->status = NFS4_OK;
5640 break;
5641 default:
5642 resp->status = puterrno4(error);
5643 break;
5644 }
5645 done:
5646 *cs->statusp = resp->status;
5647 nfs4_ntov_table_free(&ntov, &sarg);
5648
5649 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5650 NVERIFY4res *, resp);
5651 }
5652
5653 /*
5654 * XXX - This should live in an NFS header file.
5655 */
5656 #define MAX_IOVECS 12
5657
5658 /* ARGSUSED */
5659 static void
5660 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5661 struct compound_state *cs)
5662 {
5663 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5664 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5665 int error;
5666 vnode_t *vp;
5667 struct vattr bva;
5668 u_offset_t rlimit;
5669 struct uio uio;
5670 struct iovec iov[MAX_IOVECS];
5671 struct iovec *iovp;
5672 int iovcnt;
5673 int ioflag;
5674 cred_t *savecred, *cr;
5675 bool_t *deleg = &cs->deleg;
5676 nfsstat4 stat;
5677 int in_crit = 0;
5678 caller_context_t ct;
5679 nfs4_srv_t *nsrv4;
5680
5681 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5682 WRITE4args *, args);
5683
5684 vp = cs->vp;
5685 if (vp == NULL) {
5686 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5687 goto out;
5688 }
5689 if (cs->access == CS_ACCESS_DENIED) {
5690 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5691 goto out;
5692 }
5693
5694 cr = cs->cr;
5695
5696 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5697 deleg, TRUE, &ct)) != NFS4_OK) {
5698 *cs->statusp = resp->status = stat;
5699 goto out;
5700 }
5701
5702 /*
5703 * We have to enter the critical region before calling VOP_RWLOCK
5704 * to avoid a deadlock with ufs.
5705 */
5706 if (nbl_need_check(vp)) {
5707 nbl_start_crit(vp, RW_READER);
5708 in_crit = 1;
5709 if (nbl_conflict(vp, NBL_WRITE,
5710 args->offset, args->data_len, 0, &ct)) {
5711 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5712 goto out;
5713 }
5714 }
5715
5716 bva.va_mask = AT_MODE | AT_UID;
5717 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5718
5719 /*
5720 * If we can't get the attributes, then we can't do the
5721 * right access checking. So, we'll fail the request.
5722 */
5723 if (error) {
5724 *cs->statusp = resp->status = puterrno4(error);
5725 goto out;
5726 }
5727
5728 if (rdonly4(req, cs)) {
5729 *cs->statusp = resp->status = NFS4ERR_ROFS;
5730 goto out;
5731 }
5732
5733 if (vp->v_type != VREG) {
5734 *cs->statusp = resp->status =
5735 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5736 goto out;
5737 }
5738
5739 if (crgetuid(cr) != bva.va_uid &&
5740 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5741 *cs->statusp = resp->status = puterrno4(error);
5742 goto out;
5743 }
5744
5745 if (MANDLOCK(vp, bva.va_mode)) {
5746 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5747 goto out;
5748 }
5749
5750 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5751 if (args->data_len == 0) {
5752 *cs->statusp = resp->status = NFS4_OK;
5753 resp->count = 0;
5754 resp->committed = args->stable;
5755 resp->writeverf = nsrv4->write4verf;
5756 goto out;
5757 }
5758
5759 if (args->mblk != NULL) {
5760 mblk_t *m;
5761 uint_t bytes, round_len;
5762
5763 iovcnt = 0;
5764 bytes = 0;
5765 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5766 for (m = args->mblk;
5767 m != NULL && bytes < round_len;
5768 m = m->b_cont) {
5769 iovcnt++;
5770 bytes += MBLKL(m);
5771 }
5772 #ifdef DEBUG
5773 /* should have ended on an mblk boundary */
5774 if (bytes != round_len) {
5775 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5776 bytes, round_len, args->data_len);
5777 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5778 (void *)args->mblk, (void *)m);
5779 ASSERT(bytes == round_len);
5780 }
5781 #endif
5782 if (iovcnt <= MAX_IOVECS) {
5783 iovp = iov;
5784 } else {
5785 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5786 }
5787 mblk_to_iov(args->mblk, iovcnt, iovp);
5788 } else if (args->rlist != NULL) {
5789 iovcnt = 1;
5790 iovp = iov;
5791 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5792 iovp->iov_len = args->data_len;
5793 } else {
5794 iovcnt = 1;
5795 iovp = iov;
5796 iovp->iov_base = args->data_val;
5797 iovp->iov_len = args->data_len;
5798 }
5799
5800 uio.uio_iov = iovp;
5801 uio.uio_iovcnt = iovcnt;
5802
5803 uio.uio_segflg = UIO_SYSSPACE;
5804 uio.uio_extflg = UIO_COPY_DEFAULT;
5805 uio.uio_loffset = args->offset;
5806 uio.uio_resid = args->data_len;
5807 uio.uio_llimit = curproc->p_fsz_ctl;
5808 rlimit = uio.uio_llimit - args->offset;
5809 if (rlimit < (u_offset_t)uio.uio_resid)
5810 uio.uio_resid = (int)rlimit;
5811
5812 if (args->stable == UNSTABLE4)
5813 ioflag = 0;
5814 else if (args->stable == FILE_SYNC4)
5815 ioflag = FSYNC;
5816 else if (args->stable == DATA_SYNC4)
5817 ioflag = FDSYNC;
5818 else {
5819 if (iovp != iov)
5820 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5821 *cs->statusp = resp->status = NFS4ERR_INVAL;
5822 goto out;
5823 }
5824
5825 /*
5826 * We're changing creds because VM may fault and we need
5827 * the cred of the current thread to be used if quota
5828 * checking is enabled.
5829 */
5830 savecred = curthread->t_cred;
5831 curthread->t_cred = cr;
5832 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5833 curthread->t_cred = savecred;
5834
5835 if (iovp != iov)
5836 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5837
5838 if (error) {
5839 *cs->statusp = resp->status = puterrno4(error);
5840 goto out;
5841 }
5842
5843 *cs->statusp = resp->status = NFS4_OK;
5844 resp->count = args->data_len - uio.uio_resid;
5845
5846 if (ioflag == 0)
5847 resp->committed = UNSTABLE4;
5848 else
5849 resp->committed = FILE_SYNC4;
5850
5851 resp->writeverf = nsrv4->write4verf;
5852
5853 out:
5854 if (in_crit)
5855 nbl_end_crit(vp);
5856
5857 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5858 WRITE4res *, resp);
5859 }
5860
5861
5862 /* XXX put in a header file */
5863 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5864
5865 void
5866 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5867 struct svc_req *req, cred_t *cr, int *rv)
5868 {
5869 uint_t i;
5870 struct compound_state cs;
5871 nfs4_srv_t *nsrv4;
5872 nfs_export_t *ne = nfs_get_export();
5873
5874 if (rv != NULL)
5875 *rv = 0;
5876 rfs4_init_compound_state(&cs);
5877 /*
5878 * Form a reply tag by copying over the reqeuest tag.
5879 */
5880 resp->tag.utf8string_val =
5881 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5882 resp->tag.utf8string_len = args->tag.utf8string_len;
5883 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5884 resp->tag.utf8string_len);
5885
5886 cs.statusp = &resp->status;
5887 cs.req = req;
5888 resp->array = NULL;
5889 resp->array_len = 0;
5890
5891 /*
5892 * XXX for now, minorversion should be zero
5893 */
5894 if (args->minorversion != NFS4_MINORVERSION) {
5895 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5896 &cs, COMPOUND4args *, args);
5897 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5898 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5899 &cs, COMPOUND4res *, resp);
5900 return;
5901 }
5902
5903 if (args->array_len == 0) {
5904 resp->status = NFS4_OK;
5905 return;
5906 }
5907
5908 ASSERT(exi == NULL);
5909 ASSERT(cr == NULL);
5910
5911 cr = crget();
5912 ASSERT(cr != NULL);
5913
5914 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5915 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5916 &cs, COMPOUND4args *, args);
5917 crfree(cr);
5918 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5919 &cs, COMPOUND4res *, resp);
5920 svcerr_badcred(req->rq_xprt);
5921 if (rv != NULL)
5922 *rv = 1;
5923 return;
5924 }
5925 resp->array_len = args->array_len;
5926 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5927 KM_SLEEP);
5928
5929 cs.basecr = cr;
5930 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5931
5932 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5933 COMPOUND4args *, args);
5934
5935 /*
5936 * For now, NFS4 compound processing must be protected by
5937 * exported_lock because it can access more than one exportinfo
5938 * per compound and share/unshare can now change multiple
5939 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5940 * per proc (excluding public exinfo), and exi_count design
5941 * is sufficient to protect concurrent execution of NFS2/3
5942 * ops along with unexport. This lock will be removed as
5943 * part of the NFSv4 phase 2 namespace redesign work.
5944 */
5945 rw_enter(&ne->exported_lock, RW_READER);
5946
5947 /*
5948 * If this is the first compound we've seen, we need to start all
5949 * new instances' grace periods.
5950 */
5951 if (nsrv4->seen_first_compound == 0) {
5952 rfs4_grace_start_new(nsrv4);
5953 /*
5954 * This must be set after rfs4_grace_start_new(), otherwise
5955 * another thread could proceed past here before the former
5956 * is finished.
5957 */
5958 nsrv4->seen_first_compound = 1;
5959 }
5960
5961 for (i = 0; i < args->array_len && cs.cont; i++) {
5962 nfs_argop4 *argop;
5963 nfs_resop4 *resop;
5964 uint_t op;
5965
5966 argop = &args->array[i];
5967 resop = &resp->array[i];
5968 resop->resop = argop->argop;
5969 op = (uint_t)resop->resop;
5970
5971 if (op < rfsv4disp_cnt) {
5972 kstat_t *ksp = rfsprocio_v4_ptr[op];
5973 kstat_t *exi_ksp = NULL;
5974
5975 /*
5976 * Count the individual ops here; NULL and COMPOUND
5977 * are counted in common_dispatch()
5978 */
5979 rfsproccnt_v4_ptr[op].value.ui64++;
5980
5981 if (ksp != NULL) {
5982 mutex_enter(ksp->ks_lock);
5983 kstat_runq_enter(KSTAT_IO_PTR(ksp));
5984 mutex_exit(ksp->ks_lock);
5985 }
5986
5987 switch (rfsv4disptab[op].op_type) {
5988 case NFS4_OP_CFH:
5989 resop->exi = cs.exi;
5990 break;
5991 case NFS4_OP_SFH:
5992 resop->exi = cs.saved_exi;
5993 break;
5994 default:
5995 ASSERT(resop->exi == NULL);
5996 break;
5997 }
5998
5999 if (resop->exi != NULL) {
6000 exi_ksp = NULL;
6001 if (resop->exi->exi_kstats != NULL) {
6002 exi_ksp = exp_kstats_v4(
6003 resop->exi->exi_kstats, op);
6004 }
6005 if (exi_ksp != NULL) {
6006 mutex_enter(exi_ksp->ks_lock);
6007 kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
6008 mutex_exit(exi_ksp->ks_lock);
6009 }
6010 }
6011
6012 NFS4_DEBUG(rfs4_debug > 1,
6013 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6014 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
6015 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6016 rfs4_op_string[op], *cs.statusp));
6017 if (*cs.statusp != NFS4_OK)
6018 cs.cont = FALSE;
6019
6020 if (rfsv4disptab[op].op_type == NFS4_OP_POSTCFH &&
6021 *cs.statusp == NFS4_OK &&
6022 (resop->exi = cs.exi) != NULL) {
6023 exi_ksp = NULL;
6024 if (resop->exi->exi_kstats != NULL) {
6025 exi_ksp = exp_kstats_v4(
6026 resop->exi->exi_kstats, op);
6027 }
6028 }
6029
6030 if (exi_ksp != NULL) {
6031 mutex_enter(exi_ksp->ks_lock);
6032 KSTAT_IO_PTR(exi_ksp)->nwritten +=
6033 argop->opsize;
6034 KSTAT_IO_PTR(exi_ksp)->writes++;
6035 if (rfsv4disptab[op].op_type != NFS4_OP_POSTCFH)
6036 kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
6037 mutex_exit(exi_ksp->ks_lock);
6038 } else {
6039 resop->exi = NULL;
6040 }
6041
6042 if (ksp != NULL) {
6043 mutex_enter(ksp->ks_lock);
6044 kstat_runq_exit(KSTAT_IO_PTR(ksp));
6045 mutex_exit(ksp->ks_lock);
6046 }
6047 } else {
6048 /*
6049 * This is effectively dead code since XDR code
6050 * will have already returned BADXDR if op doesn't
6051 * decode to legal value. This only done for a
6052 * day when XDR code doesn't verify v4 opcodes.
6053 */
6054 op = OP_ILLEGAL;
6055 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
6056
6057 rfs4_op_illegal(argop, resop, req, &cs);
6058 cs.cont = FALSE;
6059 }
6060
6061 /*
6062 * The exi saved in the resop to be used for kstats update
6063 * once the opsize is calculated during XDR response encoding.
6064 * Put a hold on resop->exi so that it can't be destroyed.
6065 */
6066 if (resop->exi != NULL)
6067 exi_hold(resop->exi);
6068
6069 /*
6070 * If not at last op, and if we are to stop, then
6071 * compact the results array.
6072 */
6073 if ((i + 1) < args->array_len && !cs.cont) {
6074 nfs_resop4 *new_res = kmem_alloc(
6075 (i + 1) * sizeof (nfs_resop4), KM_SLEEP);
6076 bcopy(resp->array,
6077 new_res, (i + 1) * sizeof (nfs_resop4));
6078 kmem_free(resp->array,
6079 args->array_len * sizeof (nfs_resop4));
6080
6081 resp->array_len = i + 1;
6082 resp->array = new_res;
6083 }
6084 }
6085
6086 rw_exit(&ne->exported_lock);
6087
6088 /*
6089 * clear exportinfo and vnode fields from compound_state before dtrace
6090 * probe, to avoid tracing residual values for path and share path.
6091 */
6092 if (cs.vp)
6093 VN_RELE(cs.vp);
6094 if (cs.saved_vp)
6095 VN_RELE(cs.saved_vp);
6096 cs.exi = cs.saved_exi = NULL;
6097 cs.vp = cs.saved_vp = NULL;
6098
6099 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
6100 COMPOUND4res *, resp);
6101
6102 if (cs.saved_fh.nfs_fh4_val)
6103 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
6104
6105 if (cs.basecr)
6106 crfree(cs.basecr);
6107 if (cs.cr)
6108 crfree(cs.cr);
6109 /*
6110 * done with this compound request, free the label
6111 */
6112
6113 if (req->rq_label != NULL) {
6114 kmem_free(req->rq_label, sizeof (bslabel_t));
6115 req->rq_label = NULL;
6116 }
6117 }
6118
6119 /*
6120 * XXX because of what appears to be duplicate calls to rfs4_compound_free
6121 * XXX zero out the tag and array values. Need to investigate why the
6122 * XXX calls occur, but at least prevent the panic for now.
6123 */
6124 void
6125 rfs4_compound_free(COMPOUND4res *resp)
6126 {
6127 uint_t i;
6128
6129 if (resp->tag.utf8string_val) {
6130 UTF8STRING_FREE(resp->tag)
6131 }
6132
6133 for (i = 0; i < resp->array_len; i++) {
6134 nfs_resop4 *resop;
6135 uint_t op;
6136
6137 resop = &resp->array[i];
6138 op = (uint_t)resop->resop;
6139 if (op < rfsv4disp_cnt) {
6140 (*rfsv4disptab[op].dis_resfree)(resop);
6141 }
6142 }
6143 if (resp->array != NULL) {
6144 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6145 }
6146 }
6147
6148 /*
6149 * Process the value of the compound request rpc flags, as a bit-AND
6150 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6151 */
6152 void
6153 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6154 {
6155 int i;
6156 int flag = RPC_ALL;
6157
6158 for (i = 0; flag && i < args->array_len; i++) {
6159 uint_t op;
6160
6161 op = (uint_t)args->array[i].argop;
6162
6163 if (op < rfsv4disp_cnt)
6164 flag &= rfsv4disptab[op].dis_flags;
6165 else
6166 flag = 0;
6167 }
6168 *flagp = flag;
6169 }
6170
6171 /*
6172 * Update the kstats for the received requests.
6173 * Note: writes/nwritten are used to hold count and nbytes of requests received.
6174 *
6175 * Per export request statistics need to be updated during the compound request
6176 * processing (rfs4_compound()) as that is where it is known which exportinfo to
6177 * associate the kstats with.
6178 */
6179 void
6180 rfs4_compound_kstat_args(COMPOUND4args *args)
6181 {
6182 int i;
6183
6184 for (i = 0; i < args->array_len; i++) {
6185 uint_t op = (uint_t)args->array[i].argop;
6186
6187 if (op < rfsv4disp_cnt) {
6188 kstat_t *ksp = rfsprocio_v4_ptr[op];
6189
6190 if (ksp != NULL) {
6191 mutex_enter(ksp->ks_lock);
6192 KSTAT_IO_PTR(ksp)->nwritten +=
6193 args->array[i].opsize;
6194 KSTAT_IO_PTR(ksp)->writes++;
6195 mutex_exit(ksp->ks_lock);
6196 }
6197 }
6198 }
6199 }
6200
6201 /*
6202 * Update the kstats for the sent responses.
6203 * Note: reads/nread are used to hold count and nbytes of responses sent.
6204 *
6205 * Per export response statistics cannot be updated until here, after the
6206 * response send has generated the opsize (bytes sent) in the XDR encoding.
6207 * The exportinfo with which the kstats should be associated is thus saved
6208 * in the response structure (by rfs4_compound()) for use here. A hold is
6209 * placed on the exi to ensure it cannot be deleted before use. This hold
6210 * is released, and the exi set to NULL, here.
6211 */
6212 void
6213 rfs4_compound_kstat_res(COMPOUND4res *res)
6214 {
6215 int i;
6216 nfs_export_t *ne = nfs_get_export();
6217
6218 for (i = 0; i < res->array_len; i++) {
6219 uint_t op = (uint_t)res->array[i].resop;
6220
6221 if (op < rfsv4disp_cnt) {
6222 kstat_t *ksp = rfsprocio_v4_ptr[op];
6223 struct exportinfo *exi = res->array[i].exi;
6224
6225 if (ksp != NULL) {
6226 mutex_enter(ksp->ks_lock);
6227 KSTAT_IO_PTR(ksp)->nread +=
6228 res->array[i].opsize;
6229 KSTAT_IO_PTR(ksp)->reads++;
6230 mutex_exit(ksp->ks_lock);
6231 }
6232
6233 if (exi != NULL) {
6234 kstat_t *exi_ksp = NULL;
6235
6236 rw_enter(&ne->exported_lock, RW_READER);
6237
6238 if (exi->exi_kstats != NULL) {
6239 /*CSTYLED*/
6240 exi_ksp = exp_kstats_v4(exi->exi_kstats, op);
6241 }
6242 if (exi_ksp != NULL) {
6243 mutex_enter(exi_ksp->ks_lock);
6244 KSTAT_IO_PTR(exi_ksp)->nread +=
6245 res->array[i].opsize;
6246 KSTAT_IO_PTR(exi_ksp)->reads++;
6247 mutex_exit(exi_ksp->ks_lock);
6248 }
6249
6250 exi_rele(&exi);
6251 res->array[i].exi = NULL;
6252 rw_exit(&ne->exported_lock);
6253 }
6254 }
6255 }
6256 }
6257
6258 nfsstat4
6259 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6260 {
6261 nfsstat4 e;
6262
6263 rfs4_dbe_lock(cp->rc_dbe);
6264
6265 if (cp->rc_sysidt != LM_NOSYSID) {
6266 *sp = cp->rc_sysidt;
6267 e = NFS4_OK;
6268
6269 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6270 *sp = cp->rc_sysidt;
6271 e = NFS4_OK;
6272
6273 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6274 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6275 } else
6276 e = NFS4ERR_DELAY;
6277
6278 rfs4_dbe_unlock(cp->rc_dbe);
6279 return (e);
6280 }
6281
6282 #if defined(DEBUG) && ! defined(lint)
6283 static void lock_print(char *str, int operation, struct flock64 *flk)
6284 {
6285 char *op, *type;
6286
6287 switch (operation) {
6288 case F_GETLK: op = "F_GETLK";
6289 break;
6290 case F_SETLK: op = "F_SETLK";
6291 break;
6292 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6293 break;
6294 default: op = "F_UNKNOWN";
6295 break;
6296 }
6297 switch (flk->l_type) {
6298 case F_UNLCK: type = "F_UNLCK";
6299 break;
6300 case F_RDLCK: type = "F_RDLCK";
6301 break;
6302 case F_WRLCK: type = "F_WRLCK";
6303 break;
6304 default: type = "F_UNKNOWN";
6305 break;
6306 }
6307
6308 ASSERT(flk->l_whence == 0);
6309 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6310 str, op, type, (longlong_t)flk->l_start,
6311 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6312 }
6313
6314 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6315 #else
6316 #define LOCK_PRINT(d, s, t, f)
6317 #endif
6318
6319 /*ARGSUSED*/
6320 static bool_t
6321 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6322 {
6323 return (TRUE);
6324 }
6325
6326 /*
6327 * Look up the pathname using the vp in cs as the directory vnode.
6328 * cs->vp will be the vnode for the file on success
6329 */
6330
6331 static nfsstat4
6332 rfs4_lookup(component4 *component, struct svc_req *req,
6333 struct compound_state *cs)
6334 {
6335 char *nm;
6336 uint32_t len;
6337 nfsstat4 status;
6338 struct sockaddr *ca;
6339 char *name;
6340
6341 if (cs->vp == NULL) {
6342 return (NFS4ERR_NOFILEHANDLE);
6343 }
6344 if (cs->vp->v_type != VDIR) {
6345 return (NFS4ERR_NOTDIR);
6346 }
6347
6348 status = utf8_dir_verify(component);
6349 if (status != NFS4_OK)
6350 return (status);
6351
6352 nm = utf8_to_fn(component, &len, NULL);
6353 if (nm == NULL) {
6354 return (NFS4ERR_INVAL);
6355 }
6356
6357 if (len > MAXNAMELEN) {
6358 kmem_free(nm, len);
6359 return (NFS4ERR_NAMETOOLONG);
6360 }
6361
6362 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6363 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6364 MAXPATHLEN + 1);
6365
6366 if (name == NULL) {
6367 kmem_free(nm, len);
6368 return (NFS4ERR_INVAL);
6369 }
6370
6371 status = do_rfs4_op_lookup(name, req, cs);
6372
6373 if (name != nm)
6374 kmem_free(name, MAXPATHLEN + 1);
6375
6376 kmem_free(nm, len);
6377
6378 return (status);
6379 }
6380
6381 static nfsstat4
6382 rfs4_lookupfile(component4 *component, struct svc_req *req,
6383 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6384 {
6385 nfsstat4 status;
6386 vnode_t *dvp = cs->vp;
6387 vattr_t bva, ava, fva;
6388 int error;
6389
6390 /* Get "before" change value */
6391 bva.va_mask = AT_CTIME|AT_SEQ;
6392 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6393 if (error)
6394 return (puterrno4(error));
6395
6396 /* rfs4_lookup may VN_RELE directory */
6397 VN_HOLD(dvp);
6398
6399 status = rfs4_lookup(component, req, cs);
6400 if (status != NFS4_OK) {
6401 VN_RELE(dvp);
6402 return (status);
6403 }
6404
6405 /*
6406 * Get "after" change value, if it fails, simply return the
6407 * before value.
6408 */
6409 ava.va_mask = AT_CTIME|AT_SEQ;
6410 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6411 ava.va_ctime = bva.va_ctime;
6412 ava.va_seq = 0;
6413 }
6414 VN_RELE(dvp);
6415
6416 /*
6417 * Validate the file is a file
6418 */
6419 fva.va_mask = AT_TYPE|AT_MODE;
6420 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6421 if (error)
6422 return (puterrno4(error));
6423
6424 if (fva.va_type != VREG) {
6425 if (fva.va_type == VDIR)
6426 return (NFS4ERR_ISDIR);
6427 if (fva.va_type == VLNK)
6428 return (NFS4ERR_SYMLINK);
6429 return (NFS4ERR_INVAL);
6430 }
6431
6432 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6433 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6434
6435 /*
6436 * It is undefined if VOP_LOOKUP will change va_seq, so
6437 * cinfo.atomic = TRUE only if we have
6438 * non-zero va_seq's, and they have not changed.
6439 */
6440 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6441 cinfo->atomic = TRUE;
6442 else
6443 cinfo->atomic = FALSE;
6444
6445 /* Check for mandatory locking */
6446 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6447 return (check_open_access(access, cs, req));
6448 }
6449
6450 static nfsstat4
6451 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6452 cred_t *cr, vnode_t **vpp, bool_t *created)
6453 {
6454 int error;
6455 nfsstat4 status = NFS4_OK;
6456 vattr_t va;
6457
6458 tryagain:
6459
6460 /*
6461 * The file open mode used is VWRITE. If the client needs
6462 * some other semantic, then it should do the access checking
6463 * itself. It would have been nice to have the file open mode
6464 * passed as part of the arguments.
6465 */
6466
6467 *created = TRUE;
6468 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6469
6470 if (error) {
6471 *created = FALSE;
6472
6473 /*
6474 * If we got something other than file already exists
6475 * then just return this error. Otherwise, we got
6476 * EEXIST. If we were doing a GUARDED create, then
6477 * just return this error. Otherwise, we need to
6478 * make sure that this wasn't a duplicate of an
6479 * exclusive create request.
6480 *
6481 * The assumption is made that a non-exclusive create
6482 * request will never return EEXIST.
6483 */
6484
6485 if (error != EEXIST || mode == GUARDED4) {
6486 status = puterrno4(error);
6487 return (status);
6488 }
6489 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6490 NULL, NULL, NULL);
6491
6492 if (error) {
6493 /*
6494 * We couldn't find the file that we thought that
6495 * we just created. So, we'll just try creating
6496 * it again.
6497 */
6498 if (error == ENOENT)
6499 goto tryagain;
6500
6501 status = puterrno4(error);
6502 return (status);
6503 }
6504
6505 if (mode == UNCHECKED4) {
6506 /* existing object must be regular file */
6507 if ((*vpp)->v_type != VREG) {
6508 if ((*vpp)->v_type == VDIR)
6509 status = NFS4ERR_ISDIR;
6510 else if ((*vpp)->v_type == VLNK)
6511 status = NFS4ERR_SYMLINK;
6512 else
6513 status = NFS4ERR_INVAL;
6514 VN_RELE(*vpp);
6515 return (status);
6516 }
6517
6518 return (NFS4_OK);
6519 }
6520
6521 /* Check for duplicate request */
6522 va.va_mask = AT_MTIME;
6523 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6524 if (!error) {
6525 /* We found the file */
6526 const timestruc_t *mtime = &vap->va_mtime;
6527
6528 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6529 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6530 /* but its not our creation */
6531 VN_RELE(*vpp);
6532 return (NFS4ERR_EXIST);
6533 }
6534 *created = TRUE; /* retrans of create == created */
6535 return (NFS4_OK);
6536 }
6537 VN_RELE(*vpp);
6538 return (NFS4ERR_EXIST);
6539 }
6540
6541 return (NFS4_OK);
6542 }
6543
6544 static nfsstat4
6545 check_open_access(uint32_t access, struct compound_state *cs,
6546 struct svc_req *req)
6547 {
6548 int error;
6549 vnode_t *vp;
6550 bool_t readonly;
6551 cred_t *cr = cs->cr;
6552
6553 /* For now we don't allow mandatory locking as per V2/V3 */
6554 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6555 return (NFS4ERR_ACCESS);
6556 }
6557
6558 vp = cs->vp;
6559 ASSERT(cr != NULL && vp->v_type == VREG);
6560
6561 /*
6562 * If the file system is exported read only and we are trying
6563 * to open for write, then return NFS4ERR_ROFS
6564 */
6565
6566 readonly = rdonly4(req, cs);
6567
6568 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6569 return (NFS4ERR_ROFS);
6570
6571 if (access & OPEN4_SHARE_ACCESS_READ) {
6572 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6573 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6574 return (NFS4ERR_ACCESS);
6575 }
6576 }
6577
6578 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6579 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6580 if (error)
6581 return (NFS4ERR_ACCESS);
6582 }
6583
6584 return (NFS4_OK);
6585 }
6586
6587 static nfsstat4
6588 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6589 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6590 {
6591 struct nfs4_svgetit_arg sarg;
6592 struct nfs4_ntov_table ntov;
6593
6594 bool_t ntov_table_init = FALSE;
6595 struct statvfs64 sb;
6596 nfsstat4 status;
6597 vnode_t *vp;
6598 vattr_t bva, ava, iva, cva, *vap;
6599 vnode_t *dvp;
6600 timespec32_t *mtime;
6601 char *nm = NULL;
6602 uint_t buflen;
6603 bool_t created;
6604 bool_t setsize = FALSE;
6605 len_t reqsize;
6606 int error;
6607 bool_t trunc;
6608 caller_context_t ct;
6609 component4 *component;
6610 bslabel_t *clabel;
6611 struct sockaddr *ca;
6612 char *name = NULL;
6613
6614 sarg.sbp = &sb;
6615 sarg.is_referral = B_FALSE;
6616
6617 dvp = cs->vp;
6618
6619 /* Check if the file system is read only */
6620 if (rdonly4(req, cs))
6621 return (NFS4ERR_ROFS);
6622
6623 /* check the label of including directory */
6624 if (is_system_labeled()) {
6625 ASSERT(req->rq_label != NULL);
6626 clabel = req->rq_label;
6627 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6628 "got client label from request(1)",
6629 struct svc_req *, req);
6630 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6631 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6632 cs->exi)) {
6633 return (NFS4ERR_ACCESS);
6634 }
6635 }
6636 }
6637
6638 /*
6639 * Get the last component of path name in nm. cs will reference
6640 * the including directory on success.
6641 */
6642 component = &args->open_claim4_u.file;
6643 status = utf8_dir_verify(component);
6644 if (status != NFS4_OK)
6645 return (status);
6646
6647 nm = utf8_to_fn(component, &buflen, NULL);
6648
6649 if (nm == NULL)
6650 return (NFS4ERR_RESOURCE);
6651
6652 if (buflen > MAXNAMELEN) {
6653 kmem_free(nm, buflen);
6654 return (NFS4ERR_NAMETOOLONG);
6655 }
6656
6657 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6658 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6659 if (error) {
6660 kmem_free(nm, buflen);
6661 return (puterrno4(error));
6662 }
6663
6664 if (bva.va_type != VDIR) {
6665 kmem_free(nm, buflen);
6666 return (NFS4ERR_NOTDIR);
6667 }
6668
6669 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6670
6671 switch (args->mode) {
6672 case GUARDED4:
6673 /*FALLTHROUGH*/
6674 case UNCHECKED4:
6675 nfs4_ntov_table_init(&ntov);
6676 ntov_table_init = TRUE;
6677
6678 *attrset = 0;
6679 status = do_rfs4_set_attrs(attrset,
6680 &args->createhow4_u.createattrs,
6681 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6682
6683 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6684 sarg.vap->va_type != VREG) {
6685 if (sarg.vap->va_type == VDIR)
6686 status = NFS4ERR_ISDIR;
6687 else if (sarg.vap->va_type == VLNK)
6688 status = NFS4ERR_SYMLINK;
6689 else
6690 status = NFS4ERR_INVAL;
6691 }
6692
6693 if (status != NFS4_OK) {
6694 kmem_free(nm, buflen);
6695 nfs4_ntov_table_free(&ntov, &sarg);
6696 *attrset = 0;
6697 return (status);
6698 }
6699
6700 vap = sarg.vap;
6701 vap->va_type = VREG;
6702 vap->va_mask |= AT_TYPE;
6703
6704 if ((vap->va_mask & AT_MODE) == 0) {
6705 vap->va_mask |= AT_MODE;
6706 vap->va_mode = (mode_t)0600;
6707 }
6708
6709 if (vap->va_mask & AT_SIZE) {
6710
6711 /* Disallow create with a non-zero size */
6712
6713 if ((reqsize = sarg.vap->va_size) != 0) {
6714 kmem_free(nm, buflen);
6715 nfs4_ntov_table_free(&ntov, &sarg);
6716 *attrset = 0;
6717 return (NFS4ERR_INVAL);
6718 }
6719 setsize = TRUE;
6720 }
6721 break;
6722
6723 case EXCLUSIVE4:
6724 /* prohibit EXCL create of named attributes */
6725 if (dvp->v_flag & V_XATTRDIR) {
6726 kmem_free(nm, buflen);
6727 *attrset = 0;
6728 return (NFS4ERR_INVAL);
6729 }
6730
6731 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6732 cva.va_type = VREG;
6733 /*
6734 * Ensure no time overflows. Assumes underlying
6735 * filesystem supports at least 32 bits.
6736 * Truncate nsec to usec resolution to allow valid
6737 * compares even if the underlying filesystem truncates.
6738 */
6739 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6740 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6741 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6742 cva.va_mode = (mode_t)0;
6743 vap = &cva;
6744
6745 /*
6746 * For EXCL create, attrset is set to the server attr
6747 * used to cache the client's verifier.
6748 */
6749 *attrset = FATTR4_TIME_MODIFY_MASK;
6750 break;
6751 }
6752
6753 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6754 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6755 MAXPATHLEN + 1);
6756
6757 if (name == NULL) {
6758 kmem_free(nm, buflen);
6759 return (NFS4ERR_SERVERFAULT);
6760 }
6761
6762 status = create_vnode(dvp, name, vap, args->mode,
6763 cs->cr, &vp, &created);
6764 if (nm != name)
6765 kmem_free(name, MAXPATHLEN + 1);
6766 kmem_free(nm, buflen);
6767
6768 if (status != NFS4_OK) {
6769 if (ntov_table_init)
6770 nfs4_ntov_table_free(&ntov, &sarg);
6771 *attrset = 0;
6772 return (status);
6773 }
6774
6775 trunc = (setsize && !created);
6776
6777 if (args->mode != EXCLUSIVE4) {
6778 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6779
6780 /*
6781 * True verification that object was created with correct
6782 * attrs is impossible. The attrs could have been changed
6783 * immediately after object creation. If attributes did
6784 * not verify, the only recourse for the server is to
6785 * destroy the object. Maybe if some attrs (like gid)
6786 * are set incorrectly, the object should be destroyed;
6787 * however, seems bad as a default policy. Do we really
6788 * want to destroy an object over one of the times not
6789 * verifying correctly? For these reasons, the server
6790 * currently sets bits in attrset for createattrs
6791 * that were set; however, no verification is done.
6792 *
6793 * vmask_to_nmask accounts for vattr bits set on create
6794 * [do_rfs4_set_attrs() only sets resp bits for
6795 * non-vattr/vfs bits.]
6796 * Mask off any bits we set by default so as not to return
6797 * more attrset bits than were requested in createattrs
6798 */
6799 if (created) {
6800 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6801 *attrset &= createmask;
6802 } else {
6803 /*
6804 * We did not create the vnode (we tried but it
6805 * already existed). In this case, the only createattr
6806 * that the spec allows the server to set is size,
6807 * and even then, it can only be set if it is 0.
6808 */
6809 *attrset = 0;
6810 if (trunc)
6811 *attrset = FATTR4_SIZE_MASK;
6812 }
6813 }
6814 if (ntov_table_init)
6815 nfs4_ntov_table_free(&ntov, &sarg);
6816
6817 /*
6818 * Get the initial "after" sequence number, if it fails,
6819 * set to zero, time to before.
6820 */
6821 iva.va_mask = AT_CTIME|AT_SEQ;
6822 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6823 iva.va_seq = 0;
6824 iva.va_ctime = bva.va_ctime;
6825 }
6826
6827 /*
6828 * create_vnode attempts to create the file exclusive,
6829 * if it already exists the VOP_CREATE will fail and
6830 * may not increase va_seq. It is atomic if
6831 * we haven't changed the directory, but if it has changed
6832 * we don't know what changed it.
6833 */
6834 if (!created) {
6835 if (bva.va_seq && iva.va_seq &&
6836 bva.va_seq == iva.va_seq)
6837 cinfo->atomic = TRUE;
6838 else
6839 cinfo->atomic = FALSE;
6840 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6841 } else {
6842 /*
6843 * The entry was created, we need to sync the
6844 * directory metadata.
6845 */
6846 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6847
6848 /*
6849 * Get "after" change value, if it fails, simply return the
6850 * before value.
6851 */
6852 ava.va_mask = AT_CTIME|AT_SEQ;
6853 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6854 ava.va_ctime = bva.va_ctime;
6855 ava.va_seq = 0;
6856 }
6857
6858 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6859
6860 /*
6861 * The cinfo->atomic = TRUE only if we have
6862 * non-zero va_seq's, and it has incremented by exactly one
6863 * during the create_vnode and it didn't
6864 * change during the VOP_FSYNC.
6865 */
6866 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6867 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6868 cinfo->atomic = TRUE;
6869 else
6870 cinfo->atomic = FALSE;
6871 }
6872
6873 /* Check for mandatory locking and that the size gets set. */
6874 cva.va_mask = AT_MODE;
6875 if (setsize)
6876 cva.va_mask |= AT_SIZE;
6877
6878 /* Assume the worst */
6879 cs->mandlock = TRUE;
6880
6881 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6882 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6883
6884 /*
6885 * Truncate the file if necessary; this would be
6886 * the case for create over an existing file.
6887 */
6888
6889 if (trunc) {
6890 int in_crit = 0;
6891 rfs4_file_t *fp;
6892 nfs4_srv_t *nsrv4;
6893 bool_t create = FALSE;
6894
6895 /*
6896 * We are writing over an existing file.
6897 * Check to see if we need to recall a delegation.
6898 */
6899 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6900 rfs4_hold_deleg_policy(nsrv4);
6901 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6902 if (rfs4_check_delegated_byfp(FWRITE, fp,
6903 (reqsize == 0), FALSE, FALSE, &clientid)) {
6904 rfs4_file_rele(fp);
6905 rfs4_rele_deleg_policy(nsrv4);
6906 VN_RELE(vp);
6907 *attrset = 0;
6908 return (NFS4ERR_DELAY);
6909 }
6910 rfs4_file_rele(fp);
6911 }
6912 rfs4_rele_deleg_policy(nsrv4);
6913
6914 if (nbl_need_check(vp)) {
6915 in_crit = 1;
6916
6917 ASSERT(reqsize == 0);
6918
6919 nbl_start_crit(vp, RW_READER);
6920 if (nbl_conflict(vp, NBL_WRITE, 0,
6921 cva.va_size, 0, NULL)) {
6922 in_crit = 0;
6923 nbl_end_crit(vp);
6924 VN_RELE(vp);
6925 *attrset = 0;
6926 return (NFS4ERR_ACCESS);
6927 }
6928 }
6929 ct.cc_sysid = 0;
6930 ct.cc_pid = 0;
6931 ct.cc_caller_id = nfs4_srv_caller_id;
6932 ct.cc_flags = CC_DONTBLOCK;
6933
6934 cva.va_mask = AT_SIZE;
6935 cva.va_size = reqsize;
6936 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6937 if (in_crit)
6938 nbl_end_crit(vp);
6939 }
6940 }
6941
6942 error = makefh4(&cs->fh, vp, cs->exi);
6943
6944 /*
6945 * Force modified data and metadata out to stable storage.
6946 */
6947 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6948
6949 if (error) {
6950 VN_RELE(vp);
6951 *attrset = 0;
6952 return (puterrno4(error));
6953 }
6954
6955 /* if parent dir is attrdir, set namedattr fh flag */
6956 if (dvp->v_flag & V_XATTRDIR)
6957 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6958
6959 if (cs->vp)
6960 VN_RELE(cs->vp);
6961
6962 cs->vp = vp;
6963
6964 /*
6965 * if we did not create the file, we will need to check
6966 * the access bits on the file
6967 */
6968
6969 if (!created) {
6970 if (setsize)
6971 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6972 status = check_open_access(args->share_access, cs, req);
6973 if (status != NFS4_OK)
6974 *attrset = 0;
6975 }
6976 return (status);
6977 }
6978
6979 /*ARGSUSED*/
6980 static void
6981 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6982 rfs4_openowner_t *oo, delegreq_t deleg,
6983 uint32_t access, uint32_t deny,
6984 OPEN4res *resp, int deleg_cur)
6985 {
6986 /* XXX Currently not using req */
6987 rfs4_state_t *sp;
6988 rfs4_file_t *fp;
6989 bool_t screate = TRUE;
6990 bool_t fcreate = TRUE;
6991 uint32_t open_a, share_a;
6992 uint32_t open_d, share_d;
6993 rfs4_deleg_state_t *dsp;
6994 sysid_t sysid;
6995 nfsstat4 status;
6996 caller_context_t ct;
6997 int fflags = 0;
6998 int recall = 0;
6999 int err;
7000 int first_open;
7001
7002 /* get the file struct and hold a lock on it during initial open */
7003 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
7004 if (fp == NULL) {
7005 resp->status = NFS4ERR_RESOURCE;
7006 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
7007 return;
7008 }
7009
7010 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
7011 if (sp == NULL) {
7012 resp->status = NFS4ERR_RESOURCE;
7013 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
7014 /* No need to keep any reference */
7015 rw_exit(&fp->rf_file_rwlock);
7016 rfs4_file_rele(fp);
7017 return;
7018 }
7019
7020 /* try to get the sysid before continuing */
7021 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
7022 resp->status = status;
7023 rfs4_file_rele(fp);
7024 /* Not a fully formed open; "close" it */
7025 if (screate == TRUE)
7026 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7027 rfs4_state_rele(sp);
7028 return;
7029 }
7030
7031 /* Calculate the fflags for this OPEN. */
7032 if (access & OPEN4_SHARE_ACCESS_READ)
7033 fflags |= FREAD;
7034 if (access & OPEN4_SHARE_ACCESS_WRITE)
7035 fflags |= FWRITE;
7036
7037 rfs4_dbe_lock(sp->rs_dbe);
7038
7039 /*
7040 * Calculate the new deny and access mode that this open is adding to
7041 * the file for this open owner;
7042 */
7043 open_d = (deny & ~sp->rs_open_deny);
7044 open_a = (access & ~sp->rs_open_access);
7045
7046 /*
7047 * Calculate the new share access and share deny modes that this open
7048 * is adding to the file for this open owner;
7049 */
7050 share_a = (access & ~sp->rs_share_access);
7051 share_d = (deny & ~sp->rs_share_deny);
7052
7053 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7054
7055 /*
7056 * Check to see the client has already sent an open for this
7057 * open owner on this file with the same share/deny modes.
7058 * If so, we don't need to check for a conflict and we don't
7059 * need to add another shrlock. If not, then we need to
7060 * check for conflicts in deny and access before checking for
7061 * conflicts in delegation. We don't want to recall a
7062 * delegation based on an open that will eventually fail based
7063 * on shares modes.
7064 */
7065
7066 if (share_a || share_d) {
7067 if ((err = rfs4_share(sp, access, deny)) != 0) {
7068 rfs4_dbe_unlock(sp->rs_dbe);
7069 resp->status = err;
7070
7071 rfs4_file_rele(fp);
7072 /* Not a fully formed open; "close" it */
7073 if (screate == TRUE)
7074 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7075 rfs4_state_rele(sp);
7076 return;
7077 }
7078 }
7079
7080 rfs4_dbe_lock(fp->rf_dbe);
7081
7082 /*
7083 * Check to see if this file is delegated and if so, if a
7084 * recall needs to be done.
7085 */
7086 if (rfs4_check_recall(sp, access)) {
7087 rfs4_dbe_unlock(fp->rf_dbe);
7088 rfs4_dbe_unlock(sp->rs_dbe);
7089 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7090 delay(NFS4_DELEGATION_CONFLICT_DELAY);
7091 rfs4_dbe_lock(sp->rs_dbe);
7092
7093 /* if state closed while lock was dropped */
7094 if (sp->rs_closed) {
7095 if (share_a || share_d)
7096 (void) rfs4_unshare(sp);
7097 rfs4_dbe_unlock(sp->rs_dbe);
7098 rfs4_file_rele(fp);
7099 /* Not a fully formed open; "close" it */
7100 if (screate == TRUE)
7101 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7102 rfs4_state_rele(sp);
7103 resp->status = NFS4ERR_OLD_STATEID;
7104 return;
7105 }
7106
7107 rfs4_dbe_lock(fp->rf_dbe);
7108 /* Let's see if the delegation was returned */
7109 if (rfs4_check_recall(sp, access)) {
7110 rfs4_dbe_unlock(fp->rf_dbe);
7111 if (share_a || share_d)
7112 (void) rfs4_unshare(sp);
7113 rfs4_dbe_unlock(sp->rs_dbe);
7114 rfs4_file_rele(fp);
7115 rfs4_update_lease(sp->rs_owner->ro_client);
7116
7117 /* Not a fully formed open; "close" it */
7118 if (screate == TRUE)
7119 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7120 rfs4_state_rele(sp);
7121 resp->status = NFS4ERR_DELAY;
7122 return;
7123 }
7124 }
7125 /*
7126 * the share check passed and any delegation conflict has been
7127 * taken care of, now call vop_open.
7128 * if this is the first open then call vop_open with fflags.
7129 * if not, call vn_open_upgrade with just the upgrade flags.
7130 *
7131 * if the file has been opened already, it will have the current
7132 * access mode in the state struct. if it has no share access, then
7133 * this is a new open.
7134 *
7135 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7136 * call VOP_OPEN(), just do the open upgrade.
7137 */
7138 if (first_open && !deleg_cur) {
7139 ct.cc_sysid = sysid;
7140 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7141 ct.cc_caller_id = nfs4_srv_caller_id;
7142 ct.cc_flags = CC_DONTBLOCK;
7143 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7144 if (err) {
7145 rfs4_dbe_unlock(fp->rf_dbe);
7146 if (share_a || share_d)
7147 (void) rfs4_unshare(sp);
7148 rfs4_dbe_unlock(sp->rs_dbe);
7149 rfs4_file_rele(fp);
7150
7151 /* Not a fully formed open; "close" it */
7152 if (screate == TRUE)
7153 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7154 rfs4_state_rele(sp);
7155 /* check if a monitor detected a delegation conflict */
7156 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7157 resp->status = NFS4ERR_DELAY;
7158 else
7159 resp->status = NFS4ERR_SERVERFAULT;
7160 return;
7161 }
7162 } else { /* open upgrade */
7163 /*
7164 * calculate the fflags for the new mode that is being added
7165 * by this upgrade.
7166 */
7167 fflags = 0;
7168 if (open_a & OPEN4_SHARE_ACCESS_READ)
7169 fflags |= FREAD;
7170 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7171 fflags |= FWRITE;
7172 vn_open_upgrade(cs->vp, fflags);
7173 }
7174 sp->rs_open_access |= access;
7175 sp->rs_open_deny |= deny;
7176
7177 if (open_d & OPEN4_SHARE_DENY_READ)
7178 fp->rf_deny_read++;
7179 if (open_d & OPEN4_SHARE_DENY_WRITE)
7180 fp->rf_deny_write++;
7181 fp->rf_share_deny |= deny;
7182
7183 if (open_a & OPEN4_SHARE_ACCESS_READ)
7184 fp->rf_access_read++;
7185 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7186 fp->rf_access_write++;
7187 fp->rf_share_access |= access;
7188
7189 /*
7190 * Check for delegation here. if the deleg argument is not
7191 * DELEG_ANY, then this is a reclaim from a client and
7192 * we must honor the delegation requested. If necessary we can
7193 * set the recall flag.
7194 */
7195
7196 dsp = rfs4_grant_delegation(deleg, sp, &recall);
7197
7198 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7199
7200 next_stateid(&sp->rs_stateid);
7201
7202 resp->stateid = sp->rs_stateid.stateid;
7203
7204 rfs4_dbe_unlock(fp->rf_dbe);
7205 rfs4_dbe_unlock(sp->rs_dbe);
7206
7207 if (dsp) {
7208 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7209 rfs4_deleg_state_rele(dsp);
7210 }
7211
7212 rfs4_file_rele(fp);
7213 rfs4_state_rele(sp);
7214
7215 resp->status = NFS4_OK;
7216 }
7217
7218 /*ARGSUSED*/
7219 static void
7220 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7221 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7222 {
7223 change_info4 *cinfo = &resp->cinfo;
7224 bitmap4 *attrset = &resp->attrset;
7225
7226 if (args->opentype == OPEN4_NOCREATE)
7227 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
7228 req, cs, args->share_access, cinfo);
7229 else {
7230 /* inhibit delegation grants during exclusive create */
7231
7232 if (args->mode == EXCLUSIVE4)
7233 rfs4_disable_delegation();
7234
7235 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7236 oo->ro_client->rc_clientid);
7237 }
7238
7239 if (resp->status == NFS4_OK) {
7240
7241 /* cs->vp cs->fh now reference the desired file */
7242
7243 rfs4_do_open(cs, req, oo,
7244 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7245 args->share_access, args->share_deny, resp, 0);
7246
7247 /*
7248 * If rfs4_createfile set attrset, we must
7249 * clear this attrset before the response is copied.
7250 */
7251 if (resp->status != NFS4_OK && resp->attrset) {
7252 resp->attrset = 0;
7253 }
7254 }
7255 else
7256 *cs->statusp = resp->status;
7257
7258 if (args->mode == EXCLUSIVE4)
7259 rfs4_enable_delegation();
7260 }
7261
7262 /*ARGSUSED*/
7263 static void
7264 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7265 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7266 {
7267 change_info4 *cinfo = &resp->cinfo;
7268 vattr_t va;
7269 vtype_t v_type = cs->vp->v_type;
7270 int error = 0;
7271
7272 /* Verify that we have a regular file */
7273 if (v_type != VREG) {
7274 if (v_type == VDIR)
7275 resp->status = NFS4ERR_ISDIR;
7276 else if (v_type == VLNK)
7277 resp->status = NFS4ERR_SYMLINK;
7278 else
7279 resp->status = NFS4ERR_INVAL;
7280 return;
7281 }
7282
7283 va.va_mask = AT_MODE|AT_UID;
7284 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7285 if (error) {
7286 resp->status = puterrno4(error);
7287 return;
7288 }
7289
7290 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7291
7292 /*
7293 * Check if we have access to the file, Note the the file
7294 * could have originally been open UNCHECKED or GUARDED
7295 * with mode bits that will now fail, but there is nothing
7296 * we can really do about that except in the case that the
7297 * owner of the file is the one requesting the open.
7298 */
7299 if (crgetuid(cs->cr) != va.va_uid) {
7300 resp->status = check_open_access(args->share_access, cs, req);
7301 if (resp->status != NFS4_OK) {
7302 return;
7303 }
7304 }
7305
7306 /*
7307 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7308 */
7309 cinfo->before = 0;
7310 cinfo->after = 0;
7311 cinfo->atomic = FALSE;
7312
7313 rfs4_do_open(cs, req, oo,
7314 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7315 args->share_access, args->share_deny, resp, 0);
7316 }
7317
7318 static void
7319 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7320 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7321 {
7322 int error;
7323 nfsstat4 status;
7324 stateid4 stateid =
7325 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7326 rfs4_deleg_state_t *dsp;
7327
7328 /*
7329 * Find the state info from the stateid and confirm that the
7330 * file is delegated. If the state openowner is the same as
7331 * the supplied openowner we're done. If not, get the file
7332 * info from the found state info. Use that file info to
7333 * create the state for this lock owner. Note solaris doen't
7334 * really need the pathname to find the file. We may want to
7335 * lookup the pathname and make sure that the vp exist and
7336 * matches the vp in the file structure. However it is
7337 * possible that the pathname nolonger exists (local process
7338 * unlinks the file), so this may not be that useful.
7339 */
7340
7341 status = rfs4_get_deleg_state(&stateid, &dsp);
7342 if (status != NFS4_OK) {
7343 resp->status = status;
7344 return;
7345 }
7346
7347 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7348
7349 /*
7350 * New lock owner, create state. Since this was probably called
7351 * in response to a CB_RECALL we set deleg to DELEG_NONE
7352 */
7353
7354 ASSERT(cs->vp != NULL);
7355 VN_RELE(cs->vp);
7356 VN_HOLD(dsp->rds_finfo->rf_vp);
7357 cs->vp = dsp->rds_finfo->rf_vp;
7358
7359 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7360 rfs4_deleg_state_rele(dsp);
7361 *cs->statusp = resp->status = puterrno4(error);
7362 return;
7363 }
7364
7365 /* Mark progress for delegation returns */
7366 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7367 rfs4_deleg_state_rele(dsp);
7368 rfs4_do_open(cs, req, oo, DELEG_NONE,
7369 args->share_access, args->share_deny, resp, 1);
7370 }
7371
7372 /*ARGSUSED*/
7373 static void
7374 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7375 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7376 {
7377 /*
7378 * Lookup the pathname, it must already exist since this file
7379 * was delegated.
7380 *
7381 * Find the file and state info for this vp and open owner pair.
7382 * check that they are in fact delegated.
7383 * check that the state access and deny modes are the same.
7384 *
7385 * Return the delgation possibly seting the recall flag.
7386 */
7387 rfs4_file_t *fp;
7388 rfs4_state_t *sp;
7389 bool_t create = FALSE;
7390 bool_t dcreate = FALSE;
7391 rfs4_deleg_state_t *dsp;
7392 nfsace4 *ace;
7393
7394 /* Note we ignore oflags */
7395 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7396 req, cs, args->share_access, &resp->cinfo);
7397
7398 if (resp->status != NFS4_OK) {
7399 return;
7400 }
7401
7402 /* get the file struct and hold a lock on it during initial open */
7403 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7404 if (fp == NULL) {
7405 resp->status = NFS4ERR_RESOURCE;
7406 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7407 return;
7408 }
7409
7410 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7411 if (sp == NULL) {
7412 resp->status = NFS4ERR_SERVERFAULT;
7413 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7414 rw_exit(&fp->rf_file_rwlock);
7415 rfs4_file_rele(fp);
7416 return;
7417 }
7418
7419 rfs4_dbe_lock(sp->rs_dbe);
7420 rfs4_dbe_lock(fp->rf_dbe);
7421 if (args->share_access != sp->rs_share_access ||
7422 args->share_deny != sp->rs_share_deny ||
7423 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7424 NFS4_DEBUG(rfs4_debug,
7425 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7426 rfs4_dbe_unlock(fp->rf_dbe);
7427 rfs4_dbe_unlock(sp->rs_dbe);
7428 rfs4_file_rele(fp);
7429 rfs4_state_rele(sp);
7430 resp->status = NFS4ERR_SERVERFAULT;
7431 return;
7432 }
7433 rfs4_dbe_unlock(fp->rf_dbe);
7434 rfs4_dbe_unlock(sp->rs_dbe);
7435
7436 dsp = rfs4_finddeleg(sp, &dcreate);
7437 if (dsp == NULL) {
7438 rfs4_state_rele(sp);
7439 rfs4_file_rele(fp);
7440 resp->status = NFS4ERR_SERVERFAULT;
7441 return;
7442 }
7443
7444 next_stateid(&sp->rs_stateid);
7445
7446 resp->stateid = sp->rs_stateid.stateid;
7447
7448 resp->delegation.delegation_type = dsp->rds_dtype;
7449
7450 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7451 open_read_delegation4 *rv =
7452 &resp->delegation.open_delegation4_u.read;
7453
7454 rv->stateid = dsp->rds_delegid.stateid;
7455 rv->recall = FALSE; /* no policy in place to set to TRUE */
7456 ace = &rv->permissions;
7457 } else {
7458 open_write_delegation4 *rv =
7459 &resp->delegation.open_delegation4_u.write;
7460
7461 rv->stateid = dsp->rds_delegid.stateid;
7462 rv->recall = FALSE; /* no policy in place to set to TRUE */
7463 ace = &rv->permissions;
7464 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7465 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7466 }
7467
7468 /* XXX For now */
7469 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7470 ace->flag = 0;
7471 ace->access_mask = 0;
7472 ace->who.utf8string_len = 0;
7473 ace->who.utf8string_val = 0;
7474
7475 rfs4_deleg_state_rele(dsp);
7476 rfs4_state_rele(sp);
7477 rfs4_file_rele(fp);
7478 }
7479
7480 typedef enum {
7481 NFS4_CHKSEQ_OKAY = 0,
7482 NFS4_CHKSEQ_REPLAY = 1,
7483 NFS4_CHKSEQ_BAD = 2
7484 } rfs4_chkseq_t;
7485
7486 /*
7487 * Generic function for sequence number checks.
7488 */
7489 static rfs4_chkseq_t
7490 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7491 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7492 {
7493 /* Same sequence ids and matching operations? */
7494 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7495 if (copyres == TRUE) {
7496 rfs4_free_reply(resop);
7497 rfs4_copy_reply(resop, lastop);
7498 }
7499 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7500 "Replayed SEQID %d\n", seqid));
7501 return (NFS4_CHKSEQ_REPLAY);
7502 }
7503
7504 /* If the incoming sequence is not the next expected then it is bad */
7505 if (rqst_seq != seqid + 1) {
7506 if (rqst_seq == seqid) {
7507 NFS4_DEBUG(rfs4_debug,
7508 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7509 "but last op was %d current op is %d\n",
7510 lastop->resop, resop->resop));
7511 return (NFS4_CHKSEQ_BAD);
7512 }
7513 NFS4_DEBUG(rfs4_debug,
7514 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7515 rqst_seq, seqid));
7516 return (NFS4_CHKSEQ_BAD);
7517 }
7518
7519 /* Everything okay -- next expected */
7520 return (NFS4_CHKSEQ_OKAY);
7521 }
7522
7523
7524 static rfs4_chkseq_t
7525 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7526 {
7527 rfs4_chkseq_t rc;
7528
7529 rfs4_dbe_lock(op->ro_dbe);
7530 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7531 TRUE);
7532 rfs4_dbe_unlock(op->ro_dbe);
7533
7534 if (rc == NFS4_CHKSEQ_OKAY)
7535 rfs4_update_lease(op->ro_client);
7536
7537 return (rc);
7538 }
7539
7540 static rfs4_chkseq_t
7541 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7542 {
7543 rfs4_chkseq_t rc;
7544
7545 rfs4_dbe_lock(op->ro_dbe);
7546 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7547 olo_seqid, resop, FALSE);
7548 rfs4_dbe_unlock(op->ro_dbe);
7549
7550 return (rc);
7551 }
7552
7553 static rfs4_chkseq_t
7554 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7555 {
7556 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7557
7558 rfs4_dbe_lock(lsp->rls_dbe);
7559 if (!lsp->rls_skip_seqid_check)
7560 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7561 resop, TRUE);
7562 rfs4_dbe_unlock(lsp->rls_dbe);
7563
7564 return (rc);
7565 }
7566
7567 static void
7568 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7569 struct svc_req *req, struct compound_state *cs)
7570 {
7571 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7572 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7573 open_owner4 *owner = &args->owner;
7574 open_claim_type4 claim = args->claim;
7575 rfs4_client_t *cp;
7576 rfs4_openowner_t *oo;
7577 bool_t create;
7578 bool_t replay = FALSE;
7579 int can_reclaim;
7580
7581 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7582 OPEN4args *, args);
7583
7584 if (cs->vp == NULL) {
7585 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7586 goto end;
7587 }
7588
7589 /*
7590 * Need to check clientid and lease expiration first based on
7591 * error ordering and incrementing sequence id.
7592 */
7593 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7594 if (cp == NULL) {
7595 *cs->statusp = resp->status =
7596 rfs4_check_clientid(&owner->clientid, 0);
7597 goto end;
7598 }
7599
7600 if (rfs4_lease_expired(cp)) {
7601 rfs4_client_close(cp);
7602 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7603 goto end;
7604 }
7605 can_reclaim = cp->rc_can_reclaim;
7606
7607 /*
7608 * Find the open_owner for use from this point forward. Take
7609 * care in updating the sequence id based on the type of error
7610 * being returned.
7611 */
7612 retry:
7613 create = TRUE;
7614 oo = rfs4_findopenowner(owner, &create, args->seqid);
7615 if (oo == NULL) {
7616 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7617 rfs4_client_rele(cp);
7618 goto end;
7619 }
7620
7621 /* Hold off access to the sequence space while the open is done */
7622 rfs4_sw_enter(&oo->ro_sw);
7623
7624 /*
7625 * If the open_owner existed before at the server, then check
7626 * the sequence id.
7627 */
7628 if (!create && !oo->ro_postpone_confirm) {
7629 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7630 case NFS4_CHKSEQ_BAD:
7631 if ((args->seqid > oo->ro_open_seqid) &&
7632 oo->ro_need_confirm) {
7633 rfs4_free_opens(oo, TRUE, FALSE);
7634 rfs4_sw_exit(&oo->ro_sw);
7635 rfs4_openowner_rele(oo);
7636 goto retry;
7637 }
7638 resp->status = NFS4ERR_BAD_SEQID;
7639 goto out;
7640 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7641 replay = TRUE;
7642 goto out;
7643 default:
7644 break;
7645 }
7646
7647 /*
7648 * Sequence was ok and open owner exists
7649 * check to see if we have yet to see an
7650 * open_confirm.
7651 */
7652 if (oo->ro_need_confirm) {
7653 rfs4_free_opens(oo, TRUE, FALSE);
7654 rfs4_sw_exit(&oo->ro_sw);
7655 rfs4_openowner_rele(oo);
7656 goto retry;
7657 }
7658 }
7659 /* Grace only applies to regular-type OPENs */
7660 if (rfs4_clnt_in_grace(cp) &&
7661 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7662 *cs->statusp = resp->status = NFS4ERR_GRACE;
7663 goto out;
7664 }
7665
7666 /*
7667 * If previous state at the server existed then can_reclaim
7668 * will be set. If not reply NFS4ERR_NO_GRACE to the
7669 * client.
7670 */
7671 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7672 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7673 goto out;
7674 }
7675
7676
7677 /*
7678 * Reject the open if the client has missed the grace period
7679 */
7680 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7681 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7682 goto out;
7683 }
7684
7685 /* Couple of up-front bookkeeping items */
7686 if (oo->ro_need_confirm) {
7687 /*
7688 * If this is a reclaim OPEN then we should not ask
7689 * for a confirmation of the open_owner per the
7690 * protocol specification.
7691 */
7692 if (claim == CLAIM_PREVIOUS)
7693 oo->ro_need_confirm = FALSE;
7694 else
7695 resp->rflags |= OPEN4_RESULT_CONFIRM;
7696 }
7697 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7698
7699 /*
7700 * If there is an unshared filesystem mounted on this vnode,
7701 * do not allow to open/create in this directory.
7702 */
7703 if (vn_ismntpt(cs->vp)) {
7704 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7705 goto out;
7706 }
7707
7708 /*
7709 * access must READ, WRITE, or BOTH. No access is invalid.
7710 * deny can be READ, WRITE, BOTH, or NONE.
7711 * bits not defined for access/deny are invalid.
7712 */
7713 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7714 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7715 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7716 *cs->statusp = resp->status = NFS4ERR_INVAL;
7717 goto out;
7718 }
7719
7720
7721 /*
7722 * make sure attrset is zero before response is built.
7723 */
7724 resp->attrset = 0;
7725
7726 switch (claim) {
7727 case CLAIM_NULL:
7728 rfs4_do_opennull(cs, req, args, oo, resp);
7729 break;
7730 case CLAIM_PREVIOUS:
7731 rfs4_do_openprev(cs, req, args, oo, resp);
7732 break;
7733 case CLAIM_DELEGATE_CUR:
7734 rfs4_do_opendelcur(cs, req, args, oo, resp);
7735 break;
7736 case CLAIM_DELEGATE_PREV:
7737 rfs4_do_opendelprev(cs, req, args, oo, resp);
7738 break;
7739 default:
7740 resp->status = NFS4ERR_INVAL;
7741 break;
7742 }
7743
7744 out:
7745 rfs4_client_rele(cp);
7746
7747 /* Catch sequence id handling here to make it a little easier */
7748 switch (resp->status) {
7749 case NFS4ERR_BADXDR:
7750 case NFS4ERR_BAD_SEQID:
7751 case NFS4ERR_BAD_STATEID:
7752 case NFS4ERR_NOFILEHANDLE:
7753 case NFS4ERR_RESOURCE:
7754 case NFS4ERR_STALE_CLIENTID:
7755 case NFS4ERR_STALE_STATEID:
7756 /*
7757 * The protocol states that if any of these errors are
7758 * being returned, the sequence id should not be
7759 * incremented. Any other return requires an
7760 * increment.
7761 */
7762 break;
7763 default:
7764 /* Always update the lease in this case */
7765 rfs4_update_lease(oo->ro_client);
7766
7767 /* Regular response - copy the result */
7768 if (!replay)
7769 rfs4_update_open_resp(oo, resop, &cs->fh);
7770
7771 /*
7772 * REPLAY case: Only if the previous response was OK
7773 * do we copy the filehandle. If not OK, no
7774 * filehandle to copy.
7775 */
7776 if (replay == TRUE &&
7777 resp->status == NFS4_OK &&
7778 oo->ro_reply_fh.nfs_fh4_val) {
7779 /*
7780 * If this is a replay, we must restore the
7781 * current filehandle/vp to that of what was
7782 * returned originally. Try our best to do
7783 * it.
7784 */
7785 nfs_fh4_fmt_t *fh_fmtp =
7786 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7787
7788 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7789 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7790
7791 if (cs->exi == NULL) {
7792 resp->status = NFS4ERR_STALE;
7793 goto finish;
7794 }
7795
7796 VN_RELE(cs->vp);
7797
7798 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7799 &resp->status);
7800
7801 if (cs->vp == NULL)
7802 goto finish;
7803
7804 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7805 }
7806
7807 /*
7808 * If this was a replay, no need to update the
7809 * sequence id. If the open_owner was not created on
7810 * this pass, then update. The first use of an
7811 * open_owner will not bump the sequence id.
7812 */
7813 if (replay == FALSE && !create)
7814 rfs4_update_open_sequence(oo);
7815 /*
7816 * If the client is receiving an error and the
7817 * open_owner needs to be confirmed, there is no way
7818 * to notify the client of this fact ignoring the fact
7819 * that the server has no method of returning a
7820 * stateid to confirm. Therefore, the server needs to
7821 * mark this open_owner in a way as to avoid the
7822 * sequence id checking the next time the client uses
7823 * this open_owner.
7824 */
7825 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7826 oo->ro_postpone_confirm = TRUE;
7827 /*
7828 * If OK response then clear the postpone flag and
7829 * reset the sequence id to keep in sync with the
7830 * client.
7831 */
7832 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7833 oo->ro_postpone_confirm = FALSE;
7834 oo->ro_open_seqid = args->seqid;
7835 }
7836 break;
7837 }
7838
7839 finish:
7840 *cs->statusp = resp->status;
7841
7842 rfs4_sw_exit(&oo->ro_sw);
7843 rfs4_openowner_rele(oo);
7844
7845 end:
7846 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7847 OPEN4res *, resp);
7848 }
7849
7850 /*ARGSUSED*/
7851 void
7852 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7853 struct svc_req *req, struct compound_state *cs)
7854 {
7855 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7856 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7857 rfs4_state_t *sp;
7858 nfsstat4 status;
7859
7860 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7861 OPEN_CONFIRM4args *, args);
7862
7863 if (cs->vp == NULL) {
7864 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7865 goto out;
7866 }
7867
7868 if (cs->vp->v_type != VREG) {
7869 *cs->statusp = resp->status =
7870 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7871 return;
7872 }
7873
7874 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7875 if (status != NFS4_OK) {
7876 *cs->statusp = resp->status = status;
7877 goto out;
7878 }
7879
7880 /* Ensure specified filehandle matches */
7881 if (cs->vp != sp->rs_finfo->rf_vp) {
7882 rfs4_state_rele(sp);
7883 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7884 goto out;
7885 }
7886
7887 /* hold off other access to open_owner while we tinker */
7888 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7889
7890 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7891 case NFS4_CHECK_STATEID_OKAY:
7892 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7893 resop) != 0) {
7894 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7895 break;
7896 }
7897 /*
7898 * If it is the appropriate stateid and determined to
7899 * be "OKAY" then this means that the stateid does not
7900 * need to be confirmed and the client is in error for
7901 * sending an OPEN_CONFIRM.
7902 */
7903 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7904 break;
7905 case NFS4_CHECK_STATEID_OLD:
7906 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7907 break;
7908 case NFS4_CHECK_STATEID_BAD:
7909 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7910 break;
7911 case NFS4_CHECK_STATEID_EXPIRED:
7912 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7913 break;
7914 case NFS4_CHECK_STATEID_CLOSED:
7915 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7916 break;
7917 case NFS4_CHECK_STATEID_REPLAY:
7918 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7919 resop)) {
7920 case NFS4_CHKSEQ_OKAY:
7921 /*
7922 * This is replayed stateid; if seqid matches
7923 * next expected, then client is using wrong seqid.
7924 */
7925 /* fall through */
7926 case NFS4_CHKSEQ_BAD:
7927 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7928 break;
7929 case NFS4_CHKSEQ_REPLAY:
7930 /*
7931 * Note this case is the duplicate case so
7932 * resp->status is already set.
7933 */
7934 *cs->statusp = resp->status;
7935 rfs4_update_lease(sp->rs_owner->ro_client);
7936 break;
7937 }
7938 break;
7939 case NFS4_CHECK_STATEID_UNCONFIRMED:
7940 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7941 resop) != NFS4_CHKSEQ_OKAY) {
7942 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7943 break;
7944 }
7945 *cs->statusp = resp->status = NFS4_OK;
7946
7947 next_stateid(&sp->rs_stateid);
7948 resp->open_stateid = sp->rs_stateid.stateid;
7949 sp->rs_owner->ro_need_confirm = FALSE;
7950 rfs4_update_lease(sp->rs_owner->ro_client);
7951 rfs4_update_open_sequence(sp->rs_owner);
7952 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7953 break;
7954 default:
7955 ASSERT(FALSE);
7956 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7957 break;
7958 }
7959 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7960 rfs4_state_rele(sp);
7961
7962 out:
7963 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7964 OPEN_CONFIRM4res *, resp);
7965 }
7966
7967 /*ARGSUSED*/
7968 void
7969 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7970 struct svc_req *req, struct compound_state *cs)
7971 {
7972 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7973 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7974 uint32_t access = args->share_access;
7975 uint32_t deny = args->share_deny;
7976 nfsstat4 status;
7977 rfs4_state_t *sp;
7978 rfs4_file_t *fp;
7979 int fflags = 0;
7980
7981 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7982 OPEN_DOWNGRADE4args *, args);
7983
7984 if (cs->vp == NULL) {
7985 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7986 goto out;
7987 }
7988
7989 if (cs->vp->v_type != VREG) {
7990 *cs->statusp = resp->status = NFS4ERR_INVAL;
7991 return;
7992 }
7993
7994 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7995 if (status != NFS4_OK) {
7996 *cs->statusp = resp->status = status;
7997 goto out;
7998 }
7999
8000 /* Ensure specified filehandle matches */
8001 if (cs->vp != sp->rs_finfo->rf_vp) {
8002 rfs4_state_rele(sp);
8003 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8004 goto out;
8005 }
8006
8007 /* hold off other access to open_owner while we tinker */
8008 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8009
8010 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8011 case NFS4_CHECK_STATEID_OKAY:
8012 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8013 resop) != NFS4_CHKSEQ_OKAY) {
8014 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8015 goto end;
8016 }
8017 break;
8018 case NFS4_CHECK_STATEID_OLD:
8019 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8020 goto end;
8021 case NFS4_CHECK_STATEID_BAD:
8022 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8023 goto end;
8024 case NFS4_CHECK_STATEID_EXPIRED:
8025 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8026 goto end;
8027 case NFS4_CHECK_STATEID_CLOSED:
8028 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8029 goto end;
8030 case NFS4_CHECK_STATEID_UNCONFIRMED:
8031 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8032 goto end;
8033 case NFS4_CHECK_STATEID_REPLAY:
8034 /* Check the sequence id for the open owner */
8035 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8036 resop)) {
8037 case NFS4_CHKSEQ_OKAY:
8038 /*
8039 * This is replayed stateid; if seqid matches
8040 * next expected, then client is using wrong seqid.
8041 */
8042 /* fall through */
8043 case NFS4_CHKSEQ_BAD:
8044 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8045 goto end;
8046 case NFS4_CHKSEQ_REPLAY:
8047 /*
8048 * Note this case is the duplicate case so
8049 * resp->status is already set.
8050 */
8051 *cs->statusp = resp->status;
8052 rfs4_update_lease(sp->rs_owner->ro_client);
8053 goto end;
8054 }
8055 break;
8056 default:
8057 ASSERT(FALSE);
8058 break;
8059 }
8060
8061 rfs4_dbe_lock(sp->rs_dbe);
8062 /*
8063 * Check that the new access modes and deny modes are valid.
8064 * Check that no invalid bits are set.
8065 */
8066 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8067 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8068 *cs->statusp = resp->status = NFS4ERR_INVAL;
8069 rfs4_update_open_sequence(sp->rs_owner);
8070 rfs4_dbe_unlock(sp->rs_dbe);
8071 goto end;
8072 }
8073
8074 /*
8075 * The new modes must be a subset of the current modes and
8076 * the access must specify at least one mode. To test that
8077 * the new mode is a subset of the current modes we bitwise
8078 * AND them together and check that the result equals the new
8079 * mode. For example:
8080 * New mode, access == R and current mode, sp->rs_open_access == RW
8081 * access & sp->rs_open_access == R == access, so the new access mode
8082 * is valid. Consider access == RW, sp->rs_open_access = R
8083 * access & sp->rs_open_access == R != access, so the new access mode
8084 * is invalid.
8085 */
8086 if ((access & sp->rs_open_access) != access ||
8087 (deny & sp->rs_open_deny) != deny ||
8088 (access &
8089 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8090 *cs->statusp = resp->status = NFS4ERR_INVAL;
8091 rfs4_update_open_sequence(sp->rs_owner);
8092 rfs4_dbe_unlock(sp->rs_dbe);
8093 goto end;
8094 }
8095
8096 /*
8097 * Release any share locks associated with this stateID.
8098 * Strictly speaking, this violates the spec because the
8099 * spec effectively requires that open downgrade be atomic.
8100 * At present, fs_shrlock does not have this capability.
8101 */
8102 (void) rfs4_unshare(sp);
8103
8104 status = rfs4_share(sp, access, deny);
8105 if (status != NFS4_OK) {
8106 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8107 rfs4_update_open_sequence(sp->rs_owner);
8108 rfs4_dbe_unlock(sp->rs_dbe);
8109 goto end;
8110 }
8111
8112 fp = sp->rs_finfo;
8113 rfs4_dbe_lock(fp->rf_dbe);
8114
8115 /*
8116 * If the current mode has deny read and the new mode
8117 * does not, decrement the number of deny read mode bits
8118 * and if it goes to zero turn off the deny read bit
8119 * on the file.
8120 */
8121 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8122 (deny & OPEN4_SHARE_DENY_READ) == 0) {
8123 fp->rf_deny_read--;
8124 if (fp->rf_deny_read == 0)
8125 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8126 }
8127
8128 /*
8129 * If the current mode has deny write and the new mode
8130 * does not, decrement the number of deny write mode bits
8131 * and if it goes to zero turn off the deny write bit
8132 * on the file.
8133 */
8134 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8135 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8136 fp->rf_deny_write--;
8137 if (fp->rf_deny_write == 0)
8138 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8139 }
8140
8141 /*
8142 * If the current mode has access read and the new mode
8143 * does not, decrement the number of access read mode bits
8144 * and if it goes to zero turn off the access read bit
8145 * on the file. set fflags to FREAD for the call to
8146 * vn_open_downgrade().
8147 */
8148 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8149 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8150 fp->rf_access_read--;
8151 if (fp->rf_access_read == 0)
8152 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8153 fflags |= FREAD;
8154 }
8155
8156 /*
8157 * If the current mode has access write and the new mode
8158 * does not, decrement the number of access write mode bits
8159 * and if it goes to zero turn off the access write bit
8160 * on the file. set fflags to FWRITE for the call to
8161 * vn_open_downgrade().
8162 */
8163 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8164 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8165 fp->rf_access_write--;
8166 if (fp->rf_access_write == 0)
8167 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8168 fflags |= FWRITE;
8169 }
8170
8171 /* Check that the file is still accessible */
8172 ASSERT(fp->rf_share_access);
8173
8174 rfs4_dbe_unlock(fp->rf_dbe);
8175
8176 /* now set the new open access and deny modes */
8177 sp->rs_open_access = access;
8178 sp->rs_open_deny = deny;
8179
8180 /*
8181 * we successfully downgraded the share lock, now we need to downgrade
8182 * the open. it is possible that the downgrade was only for a deny
8183 * mode and we have nothing else to do.
8184 */
8185 if ((fflags & (FREAD|FWRITE)) != 0)
8186 vn_open_downgrade(cs->vp, fflags);
8187
8188 /* Update the stateid */
8189 next_stateid(&sp->rs_stateid);
8190 resp->open_stateid = sp->rs_stateid.stateid;
8191
8192 rfs4_dbe_unlock(sp->rs_dbe);
8193
8194 *cs->statusp = resp->status = NFS4_OK;
8195 /* Update the lease */
8196 rfs4_update_lease(sp->rs_owner->ro_client);
8197 /* And the sequence */
8198 rfs4_update_open_sequence(sp->rs_owner);
8199 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8200
8201 end:
8202 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8203 rfs4_state_rele(sp);
8204 out:
8205 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8206 OPEN_DOWNGRADE4res *, resp);
8207 }
8208
8209 static void *
8210 memstr(const void *s1, const char *s2, size_t n)
8211 {
8212 size_t l = strlen(s2);
8213 char *p = (char *)s1;
8214
8215 while (n >= l) {
8216 if (bcmp(p, s2, l) == 0)
8217 return (p);
8218 p++;
8219 n--;
8220 }
8221
8222 return (NULL);
8223 }
8224
8225 /*
8226 * The logic behind this function is detailed in the NFSv4 RFC in the
8227 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
8228 * that section for explicit guidance to server behavior for
8229 * SETCLIENTID.
8230 */
8231 void
8232 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8233 struct svc_req *req, struct compound_state *cs)
8234 {
8235 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8236 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8237 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8238 rfs4_clntip_t *ci;
8239 bool_t create;
8240 char *addr, *netid;
8241 int len;
8242
8243 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8244 SETCLIENTID4args *, args);
8245 retry:
8246 newcp = cp_confirmed = cp_unconfirmed = NULL;
8247
8248 /*
8249 * Save the caller's IP address
8250 */
8251 args->client.cl_addr =
8252 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8253
8254 /*
8255 * Record if it is a Solaris client that cannot handle referrals.
8256 */
8257 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8258 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8259 /* Add a "yes, it's downrev" record */
8260 create = TRUE;
8261 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8262 ASSERT(ci != NULL);
8263 rfs4_dbe_rele(ci->ri_dbe);
8264 } else {
8265 /* Remove any previous record */
8266 rfs4_invalidate_clntip(args->client.cl_addr);
8267 }
8268
8269 /*
8270 * In search of an EXISTING client matching the incoming
8271 * request to establish a new client identifier at the server
8272 */
8273 create = TRUE;
8274 cp = rfs4_findclient(&args->client, &create, NULL);
8275
8276 /* Should never happen */
8277 ASSERT(cp != NULL);
8278
8279 if (cp == NULL) {
8280 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8281 goto out;
8282 }
8283
8284 /*
8285 * Easiest case. Client identifier is newly created and is
8286 * unconfirmed. Also note that for this case, no other
8287 * entries exist for the client identifier. Nothing else to
8288 * check. Just setup the response and respond.
8289 */
8290 if (create) {
8291 *cs->statusp = res->status = NFS4_OK;
8292 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8293 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8294 cp->rc_confirm_verf;
8295 /* Setup callback information; CB_NULL confirmation later */
8296 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8297
8298 rfs4_client_rele(cp);
8299 goto out;
8300 }
8301
8302 /*
8303 * An existing, confirmed client may exist but it may not have
8304 * been active for at least one lease period. If so, then
8305 * "close" the client and create a new client identifier
8306 */
8307 if (rfs4_lease_expired(cp)) {
8308 rfs4_client_close(cp);
8309 goto retry;
8310 }
8311
8312 if (cp->rc_need_confirm == TRUE)
8313 cp_unconfirmed = cp;
8314 else
8315 cp_confirmed = cp;
8316
8317 cp = NULL;
8318
8319 /*
8320 * We have a confirmed client, now check for an
8321 * unconfimred entry
8322 */
8323 if (cp_confirmed) {
8324 /* If creds don't match then client identifier is inuse */
8325 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8326 rfs4_cbinfo_t *cbp;
8327 /*
8328 * Some one else has established this client
8329 * id. Try and say * who they are. We will use
8330 * the call back address supplied by * the
8331 * first client.
8332 */
8333 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8334
8335 addr = netid = NULL;
8336
8337 cbp = &cp_confirmed->rc_cbinfo;
8338 if (cbp->cb_callback.cb_location.r_addr &&
8339 cbp->cb_callback.cb_location.r_netid) {
8340 cb_client4 *cbcp = &cbp->cb_callback;
8341
8342 len = strlen(cbcp->cb_location.r_addr)+1;
8343 addr = kmem_alloc(len, KM_SLEEP);
8344 bcopy(cbcp->cb_location.r_addr, addr, len);
8345 len = strlen(cbcp->cb_location.r_netid)+1;
8346 netid = kmem_alloc(len, KM_SLEEP);
8347 bcopy(cbcp->cb_location.r_netid, netid, len);
8348 }
8349
8350 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8351 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8352
8353 rfs4_client_rele(cp_confirmed);
8354 }
8355
8356 /*
8357 * Confirmed, creds match, and verifier matches; must
8358 * be an update of the callback info
8359 */
8360 if (cp_confirmed->rc_nfs_client.verifier ==
8361 args->client.verifier) {
8362 /* Setup callback information */
8363 rfs4_client_setcb(cp_confirmed, &args->callback,
8364 args->callback_ident);
8365
8366 /* everything okay -- move ahead */
8367 *cs->statusp = res->status = NFS4_OK;
8368 res->SETCLIENTID4res_u.resok4.clientid =
8369 cp_confirmed->rc_clientid;
8370
8371 /* update the confirm_verifier and return it */
8372 rfs4_client_scv_next(cp_confirmed);
8373 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8374 cp_confirmed->rc_confirm_verf;
8375
8376 rfs4_client_rele(cp_confirmed);
8377 goto out;
8378 }
8379
8380 /*
8381 * Creds match but the verifier doesn't. Must search
8382 * for an unconfirmed client that would be replaced by
8383 * this request.
8384 */
8385 create = FALSE;
8386 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8387 cp_confirmed);
8388 }
8389
8390 /*
8391 * At this point, we have taken care of the brand new client
8392 * struct, INUSE case, update of an existing, and confirmed
8393 * client struct.
8394 */
8395
8396 /*
8397 * check to see if things have changed while we originally
8398 * picked up the client struct. If they have, then return and
8399 * retry the processing of this SETCLIENTID request.
8400 */
8401 if (cp_unconfirmed) {
8402 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8403 if (!cp_unconfirmed->rc_need_confirm) {
8404 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8405 rfs4_client_rele(cp_unconfirmed);
8406 if (cp_confirmed)
8407 rfs4_client_rele(cp_confirmed);
8408 goto retry;
8409 }
8410 /* do away with the old unconfirmed one */
8411 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8412 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8413 rfs4_client_rele(cp_unconfirmed);
8414 cp_unconfirmed = NULL;
8415 }
8416
8417 /*
8418 * This search will temporarily hide the confirmed client
8419 * struct while a new client struct is created as the
8420 * unconfirmed one.
8421 */
8422 create = TRUE;
8423 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8424
8425 ASSERT(newcp != NULL);
8426
8427 if (newcp == NULL) {
8428 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8429 rfs4_client_rele(cp_confirmed);
8430 goto out;
8431 }
8432
8433 /*
8434 * If one was not created, then a similar request must be in
8435 * process so release and start over with this one
8436 */
8437 if (create != TRUE) {
8438 rfs4_client_rele(newcp);
8439 if (cp_confirmed)
8440 rfs4_client_rele(cp_confirmed);
8441 goto retry;
8442 }
8443
8444 *cs->statusp = res->status = NFS4_OK;
8445 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8446 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8447 newcp->rc_confirm_verf;
8448 /* Setup callback information; CB_NULL confirmation later */
8449 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8450
8451 newcp->rc_cp_confirmed = cp_confirmed;
8452
8453 rfs4_client_rele(newcp);
8454
8455 out:
8456 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8457 SETCLIENTID4res *, res);
8458 }
8459
8460 /*ARGSUSED*/
8461 void
8462 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8463 struct svc_req *req, struct compound_state *cs)
8464 {
8465 SETCLIENTID_CONFIRM4args *args =
8466 &argop->nfs_argop4_u.opsetclientid_confirm;
8467 SETCLIENTID_CONFIRM4res *res =
8468 &resop->nfs_resop4_u.opsetclientid_confirm;
8469 rfs4_client_t *cp, *cptoclose = NULL;
8470 nfs4_srv_t *nsrv4;
8471
8472 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8473 struct compound_state *, cs,
8474 SETCLIENTID_CONFIRM4args *, args);
8475
8476 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8477 *cs->statusp = res->status = NFS4_OK;
8478
8479 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8480
8481 if (cp == NULL) {
8482 *cs->statusp = res->status =
8483 rfs4_check_clientid(&args->clientid, 1);
8484 goto out;
8485 }
8486
8487 if (!creds_ok(cp, req, cs)) {
8488 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8489 rfs4_client_rele(cp);
8490 goto out;
8491 }
8492
8493 /* If the verifier doesn't match, the record doesn't match */
8494 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8495 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8496 rfs4_client_rele(cp);
8497 goto out;
8498 }
8499
8500 rfs4_dbe_lock(cp->rc_dbe);
8501 cp->rc_need_confirm = FALSE;
8502 if (cp->rc_cp_confirmed) {
8503 cptoclose = cp->rc_cp_confirmed;
8504 cptoclose->rc_ss_remove = 1;
8505 cp->rc_cp_confirmed = NULL;
8506 }
8507
8508 /*
8509 * Update the client's associated server instance, if it's changed
8510 * since the client was created.
8511 */
8512 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8513 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8514
8515 /*
8516 * Record clientid in stable storage.
8517 * Must be done after server instance has been assigned.
8518 */
8519 rfs4_ss_clid(nsrv4, cp);
8520
8521 rfs4_dbe_unlock(cp->rc_dbe);
8522
8523 if (cptoclose)
8524 /* don't need to rele, client_close does it */
8525 rfs4_client_close(cptoclose);
8526
8527 /* If needed, initiate CB_NULL call for callback path */
8528 rfs4_deleg_cb_check(cp);
8529 rfs4_update_lease(cp);
8530
8531 /*
8532 * Check to see if client can perform reclaims
8533 */
8534 rfs4_ss_chkclid(nsrv4, cp);
8535
8536 rfs4_client_rele(cp);
8537
8538 out:
8539 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8540 struct compound_state *, cs,
8541 SETCLIENTID_CONFIRM4 *, res);
8542 }
8543
8544
8545 /*ARGSUSED*/
8546 void
8547 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8548 struct svc_req *req, struct compound_state *cs)
8549 {
8550 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8551 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8552 rfs4_state_t *sp;
8553 nfsstat4 status;
8554
8555 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8556 CLOSE4args *, args);
8557
8558 if (cs->vp == NULL) {
8559 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8560 goto out;
8561 }
8562
8563 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8564 if (status != NFS4_OK) {
8565 *cs->statusp = resp->status = status;
8566 goto out;
8567 }
8568
8569 /* Ensure specified filehandle matches */
8570 if (cs->vp != sp->rs_finfo->rf_vp) {
8571 rfs4_state_rele(sp);
8572 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8573 goto out;
8574 }
8575
8576 /* hold off other access to open_owner while we tinker */
8577 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8578
8579 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8580 case NFS4_CHECK_STATEID_OKAY:
8581 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8582 resop) != NFS4_CHKSEQ_OKAY) {
8583 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8584 goto end;
8585 }
8586 break;
8587 case NFS4_CHECK_STATEID_OLD:
8588 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8589 goto end;
8590 case NFS4_CHECK_STATEID_BAD:
8591 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8592 goto end;
8593 case NFS4_CHECK_STATEID_EXPIRED:
8594 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8595 goto end;
8596 case NFS4_CHECK_STATEID_CLOSED:
8597 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8598 goto end;
8599 case NFS4_CHECK_STATEID_UNCONFIRMED:
8600 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8601 goto end;
8602 case NFS4_CHECK_STATEID_REPLAY:
8603 /* Check the sequence id for the open owner */
8604 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8605 resop)) {
8606 case NFS4_CHKSEQ_OKAY:
8607 /*
8608 * This is replayed stateid; if seqid matches
8609 * next expected, then client is using wrong seqid.
8610 */
8611 /* FALL THROUGH */
8612 case NFS4_CHKSEQ_BAD:
8613 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8614 goto end;
8615 case NFS4_CHKSEQ_REPLAY:
8616 /*
8617 * Note this case is the duplicate case so
8618 * resp->status is already set.
8619 */
8620 *cs->statusp = resp->status;
8621 rfs4_update_lease(sp->rs_owner->ro_client);
8622 goto end;
8623 }
8624 break;
8625 default:
8626 ASSERT(FALSE);
8627 break;
8628 }
8629
8630 rfs4_dbe_lock(sp->rs_dbe);
8631
8632 /* Update the stateid. */
8633 next_stateid(&sp->rs_stateid);
8634 resp->open_stateid = sp->rs_stateid.stateid;
8635
8636 rfs4_dbe_unlock(sp->rs_dbe);
8637
8638 rfs4_update_lease(sp->rs_owner->ro_client);
8639 rfs4_update_open_sequence(sp->rs_owner);
8640 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8641
8642 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8643
8644 *cs->statusp = resp->status = status;
8645
8646 end:
8647 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8648 rfs4_state_rele(sp);
8649 out:
8650 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8651 CLOSE4res *, resp);
8652 }
8653
8654 /*
8655 * Manage the counts on the file struct and close all file locks
8656 */
8657 /*ARGSUSED*/
8658 void
8659 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8660 bool_t close_of_client)
8661 {
8662 rfs4_file_t *fp = sp->rs_finfo;
8663 rfs4_lo_state_t *lsp;
8664 int fflags = 0;
8665
8666 /*
8667 * If this call is part of the larger closing down of client
8668 * state then it is just easier to release all locks
8669 * associated with this client instead of going through each
8670 * individual file and cleaning locks there.
8671 */
8672 if (close_of_client) {
8673 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8674 !list_is_empty(&sp->rs_lostatelist) &&
8675 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8676 /* Is the PxFS kernel module loaded? */
8677 if (lm_remove_file_locks != NULL) {
8678 int new_sysid;
8679
8680 /* Encode the cluster nodeid in new sysid */
8681 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8682 lm_set_nlmid_flk(&new_sysid);
8683
8684 /*
8685 * This PxFS routine removes file locks for a
8686 * client over all nodes of a cluster.
8687 */
8688 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8689 "lm_remove_file_locks(sysid=0x%x)\n",
8690 new_sysid));
8691 (*lm_remove_file_locks)(new_sysid);
8692 } else {
8693 struct flock64 flk;
8694
8695 /* Release all locks for this client */
8696 flk.l_type = F_UNLKSYS;
8697 flk.l_whence = 0;
8698 flk.l_start = 0;
8699 flk.l_len = 0;
8700 flk.l_sysid =
8701 sp->rs_owner->ro_client->rc_sysidt;
8702 flk.l_pid = 0;
8703 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8704 &flk, F_REMOTELOCK | FREAD | FWRITE,
8705 (u_offset_t)0, NULL, CRED(), NULL);
8706 }
8707
8708 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8709 }
8710 }
8711
8712 /*
8713 * Release all locks on this file by this lock owner or at
8714 * least mark the locks as having been released
8715 */
8716 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8717 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8718 lsp->rls_locks_cleaned = TRUE;
8719
8720 /* Was this already taken care of above? */
8721 if (!close_of_client &&
8722 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8723 (void) cleanlocks(sp->rs_finfo->rf_vp,
8724 lsp->rls_locker->rl_pid,
8725 lsp->rls_locker->rl_client->rc_sysidt);
8726 }
8727
8728 /*
8729 * Release any shrlocks associated with this open state ID.
8730 * This must be done before the rfs4_state gets marked closed.
8731 */
8732 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8733 (void) rfs4_unshare(sp);
8734
8735 if (sp->rs_open_access) {
8736 rfs4_dbe_lock(fp->rf_dbe);
8737
8738 /*
8739 * Decrement the count for each access and deny bit that this
8740 * state has contributed to the file.
8741 * If the file counts go to zero
8742 * clear the appropriate bit in the appropriate mask.
8743 */
8744 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8745 fp->rf_access_read--;
8746 fflags |= FREAD;
8747 if (fp->rf_access_read == 0)
8748 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8749 }
8750 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8751 fp->rf_access_write--;
8752 fflags |= FWRITE;
8753 if (fp->rf_access_write == 0)
8754 fp->rf_share_access &=
8755 ~OPEN4_SHARE_ACCESS_WRITE;
8756 }
8757 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8758 fp->rf_deny_read--;
8759 if (fp->rf_deny_read == 0)
8760 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8761 }
8762 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8763 fp->rf_deny_write--;
8764 if (fp->rf_deny_write == 0)
8765 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8766 }
8767
8768 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8769
8770 rfs4_dbe_unlock(fp->rf_dbe);
8771
8772 sp->rs_open_access = 0;
8773 sp->rs_open_deny = 0;
8774 }
8775 }
8776
8777 /*
8778 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8779 */
8780 static nfsstat4
8781 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8782 {
8783 rfs4_lockowner_t *lo;
8784 rfs4_client_t *cp;
8785 uint32_t len;
8786
8787 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8788 if (lo != NULL) {
8789 cp = lo->rl_client;
8790 if (rfs4_lease_expired(cp)) {
8791 rfs4_lockowner_rele(lo);
8792 rfs4_dbe_hold(cp->rc_dbe);
8793 rfs4_client_close(cp);
8794 return (NFS4ERR_EXPIRED);
8795 }
8796 dp->owner.clientid = lo->rl_owner.clientid;
8797 len = lo->rl_owner.owner_len;
8798 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8799 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8800 dp->owner.owner_len = len;
8801 rfs4_lockowner_rele(lo);
8802 goto finish;
8803 }
8804
8805 /*
8806 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8807 * of the client id contain the boot time for a NFS4 lock. So we
8808 * fabricate and identity by setting clientid to the sysid, and
8809 * the lock owner to the pid.
8810 */
8811 dp->owner.clientid = flk->l_sysid;
8812 len = sizeof (pid_t);
8813 dp->owner.owner_len = len;
8814 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8815 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8816 finish:
8817 dp->offset = flk->l_start;
8818 dp->length = flk->l_len;
8819
8820 if (flk->l_type == F_RDLCK)
8821 dp->locktype = READ_LT;
8822 else if (flk->l_type == F_WRLCK)
8823 dp->locktype = WRITE_LT;
8824 else
8825 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8826
8827 return (NFS4_OK);
8828 }
8829
8830 /*
8831 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8832 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8833 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8834 * for that (obviously); they are sending the LOCK requests with some delays
8835 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8836 * locking and delay implementation at the client side.
8837 *
8838 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8839 * fast retries on its own (the for loop below) in a hope the lock will be
8840 * available soon. And if not, the client won't need to resend the LOCK
8841 * requests so fast to check the lock availability. This basically saves some
8842 * network traffic and tries to make sure the client gets the lock ASAP.
8843 */
8844 static int
8845 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8846 {
8847 int error;
8848 struct flock64 flk;
8849 int i;
8850 clock_t delaytime;
8851 int cmd;
8852 int spin_cnt = 0;
8853
8854 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8855 retry:
8856 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8857
8858 for (i = 0; i < rfs4_maxlock_tries; i++) {
8859 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8860 error = VOP_FRLOCK(vp, cmd,
8861 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8862
8863 if (error != EAGAIN && error != EACCES)
8864 break;
8865
8866 if (i < rfs4_maxlock_tries - 1) {
8867 delay(delaytime);
8868 delaytime *= 2;
8869 }
8870 }
8871
8872 if (error == EAGAIN || error == EACCES) {
8873 /* Get the owner of the lock */
8874 flk = *flock;
8875 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8876 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8877 NULL) == 0) {
8878 /*
8879 * There's a race inherent in the current VOP_FRLOCK
8880 * design where:
8881 * a: "other guy" takes a lock that conflicts with a
8882 * lock we want
8883 * b: we attempt to take our lock (non-blocking) and
8884 * the attempt fails.
8885 * c: "other guy" releases the conflicting lock
8886 * d: we ask what lock conflicts with the lock we want,
8887 * getting F_UNLCK (no lock blocks us)
8888 *
8889 * If we retry the non-blocking lock attempt in this
8890 * case (restart at step 'b') there's some possibility
8891 * that many such attempts might fail. However a test
8892 * designed to actually provoke this race shows that
8893 * the vast majority of cases require no retry, and
8894 * only a few took as many as three retries. Here's
8895 * the test outcome:
8896 *
8897 * number of retries how many times we needed
8898 * that many retries
8899 * 0 79461
8900 * 1 862
8901 * 2 49
8902 * 3 5
8903 *
8904 * Given those empirical results, we arbitrarily limit
8905 * the retry count to ten.
8906 *
8907 * If we actually make to ten retries and give up,
8908 * nothing catastrophic happens, but we're unable to
8909 * return the information about the conflicting lock to
8910 * the NFS client. That's an acceptable trade off vs.
8911 * letting this retry loop run forever.
8912 */
8913 if (flk.l_type == F_UNLCK) {
8914 if (spin_cnt++ < 10) {
8915 /* No longer locked, retry */
8916 goto retry;
8917 }
8918 } else {
8919 *flock = flk;
8920 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8921 F_GETLK, &flk);
8922 }
8923 }
8924 }
8925
8926 return (error);
8927 }
8928
8929 /*ARGSUSED*/
8930 static nfsstat4
8931 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8932 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8933 {
8934 nfsstat4 status;
8935 rfs4_lockowner_t *lo = lsp->rls_locker;
8936 rfs4_state_t *sp = lsp->rls_state;
8937 struct flock64 flock;
8938 int16_t ltype;
8939 int flag;
8940 int error;
8941 sysid_t sysid;
8942 LOCK4res *lres;
8943 vnode_t *vp;
8944
8945 if (rfs4_lease_expired(lo->rl_client)) {
8946 return (NFS4ERR_EXPIRED);
8947 }
8948
8949 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8950 return (status);
8951
8952 /* Check for zero length. To lock to end of file use all ones for V4 */
8953 if (length == 0)
8954 return (NFS4ERR_INVAL);
8955 else if (length == (length4)(~0))
8956 length = 0; /* Posix to end of file */
8957
8958 retry:
8959 rfs4_dbe_lock(sp->rs_dbe);
8960 if (sp->rs_closed == TRUE) {
8961 rfs4_dbe_unlock(sp->rs_dbe);
8962 return (NFS4ERR_OLD_STATEID);
8963 }
8964
8965 if (resop->resop != OP_LOCKU) {
8966 switch (locktype) {
8967 case READ_LT:
8968 case READW_LT:
8969 if ((sp->rs_share_access
8970 & OPEN4_SHARE_ACCESS_READ) == 0) {
8971 rfs4_dbe_unlock(sp->rs_dbe);
8972
8973 return (NFS4ERR_OPENMODE);
8974 }
8975 ltype = F_RDLCK;
8976 break;
8977 case WRITE_LT:
8978 case WRITEW_LT:
8979 if ((sp->rs_share_access
8980 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8981 rfs4_dbe_unlock(sp->rs_dbe);
8982
8983 return (NFS4ERR_OPENMODE);
8984 }
8985 ltype = F_WRLCK;
8986 break;
8987 }
8988 } else
8989 ltype = F_UNLCK;
8990
8991 flock.l_type = ltype;
8992 flock.l_whence = 0; /* SEEK_SET */
8993 flock.l_start = offset;
8994 flock.l_len = length;
8995 flock.l_sysid = sysid;
8996 flock.l_pid = lsp->rls_locker->rl_pid;
8997
8998 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8999 if (flock.l_len < 0 || flock.l_start < 0) {
9000 rfs4_dbe_unlock(sp->rs_dbe);
9001 return (NFS4ERR_INVAL);
9002 }
9003
9004 /*
9005 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9006 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9007 */
9008 flag = (int)sp->rs_share_access | F_REMOTELOCK;
9009
9010 vp = sp->rs_finfo->rf_vp;
9011 VN_HOLD(vp);
9012
9013 /*
9014 * We need to unlock sp before we call the underlying filesystem to
9015 * acquire the file lock.
9016 */
9017 rfs4_dbe_unlock(sp->rs_dbe);
9018
9019 error = setlock(vp, &flock, flag, cred);
9020
9021 /*
9022 * Make sure the file is still open. In a case the file was closed in
9023 * the meantime, clean the lock we acquired using the setlock() call
9024 * above, and return the appropriate error.
9025 */
9026 rfs4_dbe_lock(sp->rs_dbe);
9027 if (sp->rs_closed == TRUE) {
9028 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9029 rfs4_dbe_unlock(sp->rs_dbe);
9030
9031 VN_RELE(vp);
9032
9033 return (NFS4ERR_OLD_STATEID);
9034 }
9035 rfs4_dbe_unlock(sp->rs_dbe);
9036
9037 VN_RELE(vp);
9038
9039 if (error == 0) {
9040 rfs4_dbe_lock(lsp->rls_dbe);
9041 next_stateid(&lsp->rls_lockid);
9042 rfs4_dbe_unlock(lsp->rls_dbe);
9043 }
9044
9045 /*
9046 * N.B. We map error values to nfsv4 errors. This is differrent
9047 * than puterrno4 routine.
9048 */
9049 switch (error) {
9050 case 0:
9051 status = NFS4_OK;
9052 break;
9053 case EAGAIN:
9054 case EACCES: /* Old value */
9055 /* Can only get here if op is OP_LOCK */
9056 ASSERT(resop->resop == OP_LOCK);
9057 lres = &resop->nfs_resop4_u.oplock;
9058 status = NFS4ERR_DENIED;
9059 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9060 == NFS4ERR_EXPIRED)
9061 goto retry;
9062 break;
9063 case ENOLCK:
9064 status = NFS4ERR_DELAY;
9065 break;
9066 case EOVERFLOW:
9067 status = NFS4ERR_INVAL;
9068 break;
9069 case EINVAL:
9070 status = NFS4ERR_NOTSUPP;
9071 break;
9072 default:
9073 status = NFS4ERR_SERVERFAULT;
9074 break;
9075 }
9076
9077 return (status);
9078 }
9079
9080 /*ARGSUSED*/
9081 void
9082 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9083 struct svc_req *req, struct compound_state *cs)
9084 {
9085 LOCK4args *args = &argop->nfs_argop4_u.oplock;
9086 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9087 nfsstat4 status;
9088 stateid4 *stateid;
9089 rfs4_lockowner_t *lo;
9090 rfs4_client_t *cp;
9091 rfs4_state_t *sp = NULL;
9092 rfs4_lo_state_t *lsp = NULL;
9093 bool_t ls_sw_held = FALSE;
9094 bool_t create = TRUE;
9095 bool_t lcreate = TRUE;
9096 bool_t dup_lock = FALSE;
9097 int rc;
9098
9099 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9100 LOCK4args *, args);
9101
9102 if (cs->vp == NULL) {
9103 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9104 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9105 cs, LOCK4res *, resp);
9106 return;
9107 }
9108
9109 if (args->locker.new_lock_owner) {
9110 /* Create a new lockowner for this instance */
9111 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9112
9113 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9114
9115 stateid = &olo->open_stateid;
9116 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9117 if (status != NFS4_OK) {
9118 NFS4_DEBUG(rfs4_debug,
9119 (CE_NOTE, "Get state failed in lock %d", status));
9120 *cs->statusp = resp->status = status;
9121 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9122 cs, LOCK4res *, resp);
9123 return;
9124 }
9125
9126 /* Ensure specified filehandle matches */
9127 if (cs->vp != sp->rs_finfo->rf_vp) {
9128 rfs4_state_rele(sp);
9129 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9130 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9131 cs, LOCK4res *, resp);
9132 return;
9133 }
9134
9135 /* hold off other access to open_owner while we tinker */
9136 rfs4_sw_enter(&sp->rs_owner->ro_sw);
9137
9138 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
9139 case NFS4_CHECK_STATEID_OLD:
9140 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9141 goto end;
9142 case NFS4_CHECK_STATEID_BAD:
9143 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9144 goto end;
9145 case NFS4_CHECK_STATEID_EXPIRED:
9146 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9147 goto end;
9148 case NFS4_CHECK_STATEID_UNCONFIRMED:
9149 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9150 goto end;
9151 case NFS4_CHECK_STATEID_CLOSED:
9152 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9153 goto end;
9154 case NFS4_CHECK_STATEID_OKAY:
9155 case NFS4_CHECK_STATEID_REPLAY:
9156 switch (rfs4_check_olo_seqid(olo->open_seqid,
9157 sp->rs_owner, resop)) {
9158 case NFS4_CHKSEQ_OKAY:
9159 if (rc == NFS4_CHECK_STATEID_OKAY)
9160 break;
9161 /*
9162 * This is replayed stateid; if seqid
9163 * matches next expected, then client
9164 * is using wrong seqid.
9165 */
9166 /* FALLTHROUGH */
9167 case NFS4_CHKSEQ_BAD:
9168 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9169 goto end;
9170 case NFS4_CHKSEQ_REPLAY:
9171 /* This is a duplicate LOCK request */
9172 dup_lock = TRUE;
9173
9174 /*
9175 * For a duplicate we do not want to
9176 * create a new lockowner as it should
9177 * already exist.
9178 * Turn off the lockowner create flag.
9179 */
9180 lcreate = FALSE;
9181 }
9182 break;
9183 }
9184
9185 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9186 if (lo == NULL) {
9187 NFS4_DEBUG(rfs4_debug,
9188 (CE_NOTE, "rfs4_op_lock: no lock owner"));
9189 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
9190 goto end;
9191 }
9192
9193 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9194 if (lsp == NULL) {
9195 rfs4_update_lease(sp->rs_owner->ro_client);
9196 /*
9197 * Only update theh open_seqid if this is not
9198 * a duplicate request
9199 */
9200 if (dup_lock == FALSE) {
9201 rfs4_update_open_sequence(sp->rs_owner);
9202 }
9203
9204 NFS4_DEBUG(rfs4_debug,
9205 (CE_NOTE, "rfs4_op_lock: no state"));
9206 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9207 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9208 rfs4_lockowner_rele(lo);
9209 goto end;
9210 }
9211
9212 /*
9213 * This is the new_lock_owner branch and the client is
9214 * supposed to be associating a new lock_owner with
9215 * the open file at this point. If we find that a
9216 * lock_owner/state association already exists and a
9217 * successful LOCK request was returned to the client,
9218 * an error is returned to the client since this is
9219 * not appropriate. The client should be using the
9220 * existing lock_owner branch.
9221 */
9222 if (dup_lock == FALSE && create == FALSE) {
9223 if (lsp->rls_lock_completed == TRUE) {
9224 *cs->statusp =
9225 resp->status = NFS4ERR_BAD_SEQID;
9226 rfs4_lockowner_rele(lo);
9227 goto end;
9228 }
9229 }
9230
9231 rfs4_update_lease(sp->rs_owner->ro_client);
9232
9233 /*
9234 * Only update theh open_seqid if this is not
9235 * a duplicate request
9236 */
9237 if (dup_lock == FALSE) {
9238 rfs4_update_open_sequence(sp->rs_owner);
9239 }
9240
9241 /*
9242 * If this is a duplicate lock request, just copy the
9243 * previously saved reply and return.
9244 */
9245 if (dup_lock == TRUE) {
9246 /* verify that lock_seqid's match */
9247 if (lsp->rls_seqid != olo->lock_seqid) {
9248 NFS4_DEBUG(rfs4_debug,
9249 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9250 "lsp->seqid=%d old->seqid=%d",
9251 lsp->rls_seqid, olo->lock_seqid));
9252 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9253 } else {
9254 rfs4_copy_reply(resop, &lsp->rls_reply);
9255 /*
9256 * Make sure to copy the just
9257 * retrieved reply status into the
9258 * overall compound status
9259 */
9260 *cs->statusp = resp->status;
9261 }
9262 rfs4_lockowner_rele(lo);
9263 goto end;
9264 }
9265
9266 rfs4_dbe_lock(lsp->rls_dbe);
9267
9268 /* Make sure to update the lock sequence id */
9269 lsp->rls_seqid = olo->lock_seqid;
9270
9271 NFS4_DEBUG(rfs4_debug,
9272 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9273
9274 /*
9275 * This is used to signify the newly created lockowner
9276 * stateid and its sequence number. The checks for
9277 * sequence number and increment don't occur on the
9278 * very first lock request for a lockowner.
9279 */
9280 lsp->rls_skip_seqid_check = TRUE;
9281
9282 /* hold off other access to lsp while we tinker */
9283 rfs4_sw_enter(&lsp->rls_sw);
9284 ls_sw_held = TRUE;
9285
9286 rfs4_dbe_unlock(lsp->rls_dbe);
9287
9288 rfs4_lockowner_rele(lo);
9289 } else {
9290 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9291 /* get lsp and hold the lock on the underlying file struct */
9292 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9293 != NFS4_OK) {
9294 *cs->statusp = resp->status = status;
9295 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9296 cs, LOCK4res *, resp);
9297 return;
9298 }
9299 create = FALSE; /* We didn't create lsp */
9300
9301 /* Ensure specified filehandle matches */
9302 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9303 rfs4_lo_state_rele(lsp, TRUE);
9304 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9305 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9306 cs, LOCK4res *, resp);
9307 return;
9308 }
9309
9310 /* hold off other access to lsp while we tinker */
9311 rfs4_sw_enter(&lsp->rls_sw);
9312 ls_sw_held = TRUE;
9313
9314 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9315 /*
9316 * The stateid looks like it was okay (expected to be
9317 * the next one)
9318 */
9319 case NFS4_CHECK_STATEID_OKAY:
9320 /*
9321 * The sequence id is now checked. Determine
9322 * if this is a replay or if it is in the
9323 * expected (next) sequence. In the case of a
9324 * replay, there are two replay conditions
9325 * that may occur. The first is the normal
9326 * condition where a LOCK is done with a
9327 * NFS4_OK response and the stateid is
9328 * updated. That case is handled below when
9329 * the stateid is identified as a REPLAY. The
9330 * second is the case where an error is
9331 * returned, like NFS4ERR_DENIED, and the
9332 * sequence number is updated but the stateid
9333 * is not updated. This second case is dealt
9334 * with here. So it may seem odd that the
9335 * stateid is okay but the sequence id is a
9336 * replay but it is okay.
9337 */
9338 switch (rfs4_check_lock_seqid(
9339 args->locker.locker4_u.lock_owner.lock_seqid,
9340 lsp, resop)) {
9341 case NFS4_CHKSEQ_REPLAY:
9342 if (resp->status != NFS4_OK) {
9343 /*
9344 * Here is our replay and need
9345 * to verify that the last
9346 * response was an error.
9347 */
9348 *cs->statusp = resp->status;
9349 goto end;
9350 }
9351 /*
9352 * This is done since the sequence id
9353 * looked like a replay but it didn't
9354 * pass our check so a BAD_SEQID is
9355 * returned as a result.
9356 */
9357 /*FALLTHROUGH*/
9358 case NFS4_CHKSEQ_BAD:
9359 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9360 goto end;
9361 case NFS4_CHKSEQ_OKAY:
9362 /* Everything looks okay move ahead */
9363 break;
9364 }
9365 break;
9366 case NFS4_CHECK_STATEID_OLD:
9367 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9368 goto end;
9369 case NFS4_CHECK_STATEID_BAD:
9370 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9371 goto end;
9372 case NFS4_CHECK_STATEID_EXPIRED:
9373 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9374 goto end;
9375 case NFS4_CHECK_STATEID_CLOSED:
9376 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9377 goto end;
9378 case NFS4_CHECK_STATEID_REPLAY:
9379 switch (rfs4_check_lock_seqid(
9380 args->locker.locker4_u.lock_owner.lock_seqid,
9381 lsp, resop)) {
9382 case NFS4_CHKSEQ_OKAY:
9383 /*
9384 * This is a replayed stateid; if
9385 * seqid matches the next expected,
9386 * then client is using wrong seqid.
9387 */
9388 case NFS4_CHKSEQ_BAD:
9389 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9390 goto end;
9391 case NFS4_CHKSEQ_REPLAY:
9392 rfs4_update_lease(lsp->rls_locker->rl_client);
9393 *cs->statusp = status = resp->status;
9394 goto end;
9395 }
9396 break;
9397 default:
9398 ASSERT(FALSE);
9399 break;
9400 }
9401
9402 rfs4_update_lock_sequence(lsp);
9403 rfs4_update_lease(lsp->rls_locker->rl_client);
9404 }
9405
9406 /*
9407 * NFS4 only allows locking on regular files, so
9408 * verify type of object.
9409 */
9410 if (cs->vp->v_type != VREG) {
9411 if (cs->vp->v_type == VDIR)
9412 status = NFS4ERR_ISDIR;
9413 else
9414 status = NFS4ERR_INVAL;
9415 goto out;
9416 }
9417
9418 cp = lsp->rls_state->rs_owner->ro_client;
9419
9420 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9421 status = NFS4ERR_GRACE;
9422 goto out;
9423 }
9424
9425 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9426 status = NFS4ERR_NO_GRACE;
9427 goto out;
9428 }
9429
9430 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9431 status = NFS4ERR_NO_GRACE;
9432 goto out;
9433 }
9434
9435 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9436 cs->deleg = TRUE;
9437
9438 status = rfs4_do_lock(lsp, args->locktype,
9439 args->offset, args->length, cs->cr, resop);
9440
9441 out:
9442 lsp->rls_skip_seqid_check = FALSE;
9443
9444 *cs->statusp = resp->status = status;
9445
9446 if (status == NFS4_OK) {
9447 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9448 lsp->rls_lock_completed = TRUE;
9449 }
9450 /*
9451 * Only update the "OPEN" response here if this was a new
9452 * lock_owner
9453 */
9454 if (sp)
9455 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9456
9457 rfs4_update_lock_resp(lsp, resop);
9458
9459 end:
9460 if (lsp) {
9461 if (ls_sw_held)
9462 rfs4_sw_exit(&lsp->rls_sw);
9463 /*
9464 * If an sp obtained, then the lsp does not represent
9465 * a lock on the file struct.
9466 */
9467 if (sp != NULL)
9468 rfs4_lo_state_rele(lsp, FALSE);
9469 else
9470 rfs4_lo_state_rele(lsp, TRUE);
9471 }
9472 if (sp) {
9473 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9474 rfs4_state_rele(sp);
9475 }
9476
9477 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9478 LOCK4res *, resp);
9479 }
9480
9481 /* free function for LOCK/LOCKT */
9482 static void
9483 lock_denied_free(nfs_resop4 *resop)
9484 {
9485 LOCK4denied *dp = NULL;
9486
9487 switch (resop->resop) {
9488 case OP_LOCK:
9489 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9490 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9491 break;
9492 case OP_LOCKT:
9493 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9494 dp = &resop->nfs_resop4_u.oplockt.denied;
9495 break;
9496 default:
9497 break;
9498 }
9499
9500 if (dp)
9501 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9502 }
9503
9504 /*ARGSUSED*/
9505 void
9506 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9507 struct svc_req *req, struct compound_state *cs)
9508 {
9509 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9510 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9511 nfsstat4 status;
9512 stateid4 *stateid = &args->lock_stateid;
9513 rfs4_lo_state_t *lsp;
9514
9515 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9516 LOCKU4args *, args);
9517
9518 if (cs->vp == NULL) {
9519 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9520 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9521 LOCKU4res *, resp);
9522 return;
9523 }
9524
9525 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9526 *cs->statusp = resp->status = status;
9527 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9528 LOCKU4res *, resp);
9529 return;
9530 }
9531
9532 /* Ensure specified filehandle matches */
9533 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9534 rfs4_lo_state_rele(lsp, TRUE);
9535 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9536 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9537 LOCKU4res *, resp);
9538 return;
9539 }
9540
9541 /* hold off other access to lsp while we tinker */
9542 rfs4_sw_enter(&lsp->rls_sw);
9543
9544 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9545 case NFS4_CHECK_STATEID_OKAY:
9546 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9547 != NFS4_CHKSEQ_OKAY) {
9548 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9549 goto end;
9550 }
9551 break;
9552 case NFS4_CHECK_STATEID_OLD:
9553 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9554 goto end;
9555 case NFS4_CHECK_STATEID_BAD:
9556 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9557 goto end;
9558 case NFS4_CHECK_STATEID_EXPIRED:
9559 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9560 goto end;
9561 case NFS4_CHECK_STATEID_CLOSED:
9562 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9563 goto end;
9564 case NFS4_CHECK_STATEID_REPLAY:
9565 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9566 case NFS4_CHKSEQ_OKAY:
9567 /*
9568 * This is a replayed stateid; if
9569 * seqid matches the next expected,
9570 * then client is using wrong seqid.
9571 */
9572 case NFS4_CHKSEQ_BAD:
9573 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9574 goto end;
9575 case NFS4_CHKSEQ_REPLAY:
9576 rfs4_update_lease(lsp->rls_locker->rl_client);
9577 *cs->statusp = status = resp->status;
9578 goto end;
9579 }
9580 break;
9581 default:
9582 ASSERT(FALSE);
9583 break;
9584 }
9585
9586 rfs4_update_lock_sequence(lsp);
9587 rfs4_update_lease(lsp->rls_locker->rl_client);
9588
9589 /*
9590 * NFS4 only allows locking on regular files, so
9591 * verify type of object.
9592 */
9593 if (cs->vp->v_type != VREG) {
9594 if (cs->vp->v_type == VDIR)
9595 status = NFS4ERR_ISDIR;
9596 else
9597 status = NFS4ERR_INVAL;
9598 goto out;
9599 }
9600
9601 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9602 status = NFS4ERR_GRACE;
9603 goto out;
9604 }
9605
9606 status = rfs4_do_lock(lsp, args->locktype,
9607 args->offset, args->length, cs->cr, resop);
9608
9609 out:
9610 *cs->statusp = resp->status = status;
9611
9612 if (status == NFS4_OK)
9613 resp->lock_stateid = lsp->rls_lockid.stateid;
9614
9615 rfs4_update_lock_resp(lsp, resop);
9616
9617 end:
9618 rfs4_sw_exit(&lsp->rls_sw);
9619 rfs4_lo_state_rele(lsp, TRUE);
9620
9621 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9622 LOCKU4res *, resp);
9623 }
9624
9625 /*
9626 * LOCKT is a best effort routine, the client can not be guaranteed that
9627 * the status return is still in effect by the time the reply is received.
9628 * They are numerous race conditions in this routine, but we are not required
9629 * and can not be accurate.
9630 */
9631 /*ARGSUSED*/
9632 void
9633 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9634 struct svc_req *req, struct compound_state *cs)
9635 {
9636 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9637 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9638 rfs4_lockowner_t *lo;
9639 rfs4_client_t *cp;
9640 bool_t create = FALSE;
9641 struct flock64 flk;
9642 int error;
9643 int flag = FREAD | FWRITE;
9644 int ltype;
9645 length4 posix_length;
9646 sysid_t sysid;
9647 pid_t pid;
9648
9649 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9650 LOCKT4args *, args);
9651
9652 if (cs->vp == NULL) {
9653 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9654 goto out;
9655 }
9656
9657 /*
9658 * NFS4 only allows locking on regular files, so
9659 * verify type of object.
9660 */
9661 if (cs->vp->v_type != VREG) {
9662 if (cs->vp->v_type == VDIR)
9663 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9664 else
9665 *cs->statusp = resp->status = NFS4ERR_INVAL;
9666 goto out;
9667 }
9668
9669 /*
9670 * Check out the clientid to ensure the server knows about it
9671 * so that we correctly inform the client of a server reboot.
9672 */
9673 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9674 == NULL) {
9675 *cs->statusp = resp->status =
9676 rfs4_check_clientid(&args->owner.clientid, 0);
9677 goto out;
9678 }
9679 if (rfs4_lease_expired(cp)) {
9680 rfs4_client_close(cp);
9681 /*
9682 * Protocol doesn't allow returning NFS4ERR_STALE as
9683 * other operations do on this check so STALE_CLIENTID
9684 * is returned instead
9685 */
9686 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9687 goto out;
9688 }
9689
9690 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9691 *cs->statusp = resp->status = NFS4ERR_GRACE;
9692 rfs4_client_rele(cp);
9693 goto out;
9694 }
9695 rfs4_client_rele(cp);
9696
9697 resp->status = NFS4_OK;
9698
9699 switch (args->locktype) {
9700 case READ_LT:
9701 case READW_LT:
9702 ltype = F_RDLCK;
9703 break;
9704 case WRITE_LT:
9705 case WRITEW_LT:
9706 ltype = F_WRLCK;
9707 break;
9708 }
9709
9710 posix_length = args->length;
9711 /* Check for zero length. To lock to end of file use all ones for V4 */
9712 if (posix_length == 0) {
9713 *cs->statusp = resp->status = NFS4ERR_INVAL;
9714 goto out;
9715 } else if (posix_length == (length4)(~0)) {
9716 posix_length = 0; /* Posix to end of file */
9717 }
9718
9719 /* Find or create a lockowner */
9720 lo = rfs4_findlockowner(&args->owner, &create);
9721
9722 if (lo) {
9723 pid = lo->rl_pid;
9724 if ((resp->status =
9725 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9726 goto err;
9727 } else {
9728 pid = 0;
9729 sysid = lockt_sysid;
9730 }
9731 retry:
9732 flk.l_type = ltype;
9733 flk.l_whence = 0; /* SEEK_SET */
9734 flk.l_start = args->offset;
9735 flk.l_len = posix_length;
9736 flk.l_sysid = sysid;
9737 flk.l_pid = pid;
9738 flag |= F_REMOTELOCK;
9739
9740 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9741
9742 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9743 if (flk.l_len < 0 || flk.l_start < 0) {
9744 resp->status = NFS4ERR_INVAL;
9745 goto err;
9746 }
9747 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9748 NULL, cs->cr, NULL);
9749
9750 /*
9751 * N.B. We map error values to nfsv4 errors. This is differrent
9752 * than puterrno4 routine.
9753 */
9754 switch (error) {
9755 case 0:
9756 if (flk.l_type == F_UNLCK)
9757 resp->status = NFS4_OK;
9758 else {
9759 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9760 goto retry;
9761 resp->status = NFS4ERR_DENIED;
9762 }
9763 break;
9764 case EOVERFLOW:
9765 resp->status = NFS4ERR_INVAL;
9766 break;
9767 case EINVAL:
9768 resp->status = NFS4ERR_NOTSUPP;
9769 break;
9770 default:
9771 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9772 error);
9773 resp->status = NFS4ERR_SERVERFAULT;
9774 break;
9775 }
9776
9777 err:
9778 if (lo)
9779 rfs4_lockowner_rele(lo);
9780 *cs->statusp = resp->status;
9781 out:
9782 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9783 LOCKT4res *, resp);
9784 }
9785
9786 int
9787 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9788 {
9789 int err;
9790 int cmd;
9791 vnode_t *vp;
9792 struct shrlock shr;
9793 struct shr_locowner shr_loco;
9794 int fflags = 0;
9795
9796 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9797 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9798
9799 if (sp->rs_closed)
9800 return (NFS4ERR_OLD_STATEID);
9801
9802 vp = sp->rs_finfo->rf_vp;
9803 ASSERT(vp);
9804
9805 shr.s_access = shr.s_deny = 0;
9806
9807 if (access & OPEN4_SHARE_ACCESS_READ) {
9808 fflags |= FREAD;
9809 shr.s_access |= F_RDACC;
9810 }
9811 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9812 fflags |= FWRITE;
9813 shr.s_access |= F_WRACC;
9814 }
9815 ASSERT(shr.s_access);
9816
9817 if (deny & OPEN4_SHARE_DENY_READ)
9818 shr.s_deny |= F_RDDNY;
9819 if (deny & OPEN4_SHARE_DENY_WRITE)
9820 shr.s_deny |= F_WRDNY;
9821
9822 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9823 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9824 shr_loco.sl_pid = shr.s_pid;
9825 shr_loco.sl_id = shr.s_sysid;
9826 shr.s_owner = (caddr_t)&shr_loco;
9827 shr.s_own_len = sizeof (shr_loco);
9828
9829 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9830
9831 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9832 if (err != 0) {
9833 if (err == EAGAIN)
9834 err = NFS4ERR_SHARE_DENIED;
9835 else
9836 err = puterrno4(err);
9837 return (err);
9838 }
9839
9840 sp->rs_share_access |= access;
9841 sp->rs_share_deny |= deny;
9842
9843 return (0);
9844 }
9845
9846 int
9847 rfs4_unshare(rfs4_state_t *sp)
9848 {
9849 int err;
9850 struct shrlock shr;
9851 struct shr_locowner shr_loco;
9852
9853 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9854
9855 if (sp->rs_closed || sp->rs_share_access == 0)
9856 return (0);
9857
9858 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9859 ASSERT(sp->rs_finfo->rf_vp);
9860
9861 shr.s_access = shr.s_deny = 0;
9862 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9863 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9864 shr_loco.sl_pid = shr.s_pid;
9865 shr_loco.sl_id = shr.s_sysid;
9866 shr.s_owner = (caddr_t)&shr_loco;
9867 shr.s_own_len = sizeof (shr_loco);
9868
9869 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9870 NULL);
9871 if (err != 0) {
9872 err = puterrno4(err);
9873 return (err);
9874 }
9875
9876 sp->rs_share_access = 0;
9877 sp->rs_share_deny = 0;
9878
9879 return (0);
9880
9881 }
9882
9883 static int
9884 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9885 {
9886 struct clist *wcl;
9887 count4 count = rok->data_len;
9888 int wlist_len;
9889
9890 wcl = args->wlist;
9891 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9892 return (FALSE);
9893 }
9894 wcl = args->wlist;
9895 rok->wlist_len = wlist_len;
9896 rok->wlist = wcl;
9897 return (TRUE);
9898 }
9899
9900 /* tunable to disable server referrals */
9901 int rfs4_no_referrals = 0;
9902
9903 /*
9904 * Find an NFS record in reparse point data.
9905 * Returns 0 for success and <0 or an errno value on failure.
9906 */
9907 int
9908 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9909 {
9910 int err;
9911 char *stype, *val;
9912 nvlist_t *nvl;
9913 nvpair_t *curr;
9914
9915 if ((nvl = reparse_init()) == NULL)
9916 return (-1);
9917
9918 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9919 reparse_free(nvl);
9920 return (err);
9921 }
9922
9923 curr = NULL;
9924 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9925 if ((stype = nvpair_name(curr)) == NULL) {
9926 reparse_free(nvl);
9927 return (-2);
9928 }
9929 if (strncasecmp(stype, "NFS", 3) == 0)
9930 break;
9931 }
9932
9933 if ((curr == NULL) ||
9934 (nvpair_value_string(curr, &val))) {
9935 reparse_free(nvl);
9936 return (-3);
9937 }
9938 *nvlp = nvl;
9939 *svcp = stype;
9940 *datap = val;
9941 return (0);
9942 }
9943
9944 int
9945 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9946 {
9947 nvlist_t *nvl;
9948 char *s, *d;
9949
9950 if (rfs4_no_referrals != 0)
9951 return (B_FALSE);
9952
9953 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9954 return (B_FALSE);
9955
9956 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9957 return (B_FALSE);
9958
9959 reparse_free(nvl);
9960
9961 return (B_TRUE);
9962 }
9963
9964 /*
9965 * There is a user-level copy of this routine in ref_subr.c.
9966 * Changes should be kept in sync.
9967 */
9968 static int
9969 nfs4_create_components(char *path, component4 *comp4)
9970 {
9971 int slen, plen, ncomp;
9972 char *ori_path, *nxtc, buf[MAXNAMELEN];
9973
9974 if (path == NULL)
9975 return (0);
9976
9977 plen = strlen(path) + 1; /* include the terminator */
9978 ori_path = path;
9979 ncomp = 0;
9980
9981 /* count number of components in the path */
9982 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9983 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9984 if ((slen = nxtc - path) == 0) {
9985 path = nxtc + 1;
9986 continue;
9987 }
9988
9989 if (comp4 != NULL) {
9990 bcopy(path, buf, slen);
9991 buf[slen] = '\0';
9992 (void) str_to_utf8(buf, &comp4[ncomp]);
9993 }
9994
9995 ncomp++; /* 1 valid component */
9996 path = nxtc + 1;
9997 }
9998 if (*nxtc == '\0' || *nxtc == '\n')
9999 break;
10000 }
10001
10002 return (ncomp);
10003 }
10004
10005 /*
10006 * There is a user-level copy of this routine in ref_subr.c.
10007 * Changes should be kept in sync.
10008 */
10009 static int
10010 make_pathname4(char *path, pathname4 *pathname)
10011 {
10012 int ncomp;
10013 component4 *comp4;
10014
10015 if (pathname == NULL)
10016 return (0);
10017
10018 if (path == NULL) {
10019 pathname->pathname4_val = NULL;
10020 pathname->pathname4_len = 0;
10021 return (0);
10022 }
10023
10024 /* count number of components to alloc buffer */
10025 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10026 pathname->pathname4_val = NULL;
10027 pathname->pathname4_len = 0;
10028 return (0);
10029 }
10030 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10031
10032 /* copy components into allocated buffer */
10033 ncomp = nfs4_create_components(path, comp4);
10034
10035 pathname->pathname4_val = comp4;
10036 pathname->pathname4_len = ncomp;
10037
10038 return (ncomp);
10039 }
10040
10041 #define xdr_fs_locations4 xdr_fattr4_fs_locations
10042
10043 fs_locations4 *
10044 fetch_referral(vnode_t *vp, cred_t *cr)
10045 {
10046 nvlist_t *nvl;
10047 char *stype, *sdata;
10048 fs_locations4 *result;
10049 char buf[1024];
10050 size_t bufsize;
10051 XDR xdr;
10052 int err;
10053
10054 /*
10055 * Check attrs to ensure it's a reparse point
10056 */
10057 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10058 return (NULL);
10059
10060 /*
10061 * Look for an NFS record and get the type and data
10062 */
10063 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10064 return (NULL);
10065
10066 /*
10067 * With the type and data, upcall to get the referral
10068 */
10069 bufsize = sizeof (buf);
10070 bzero(buf, sizeof (buf));
10071 err = reparse_kderef((const char *)stype, (const char *)sdata,
10072 buf, &bufsize);
10073 reparse_free(nvl);
10074
10075 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10076 char *, stype, char *, sdata, char *, buf, int, err);
10077 if (err) {
10078 cmn_err(CE_NOTE,
10079 "reparsed daemon not running: unable to get referral (%d)",
10080 err);
10081 return (NULL);
10082 }
10083
10084 /*
10085 * We get an XDR'ed record back from the kderef call
10086 */
10087 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10088 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10089 err = xdr_fs_locations4(&xdr, result);
10090 XDR_DESTROY(&xdr);
10091 if (err != TRUE) {
10092 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10093 int, err);
10094 return (NULL);
10095 }
10096
10097 /*
10098 * Look at path to recover fs_root, ignoring the leading '/'
10099 */
10100 (void) make_pathname4(vp->v_path, &result->fs_root);
10101
10102 return (result);
10103 }
10104
10105 char *
10106 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10107 {
10108 fs_locations4 *fsl;
10109 fs_location4 *fs;
10110 char *server, *path, *symbuf;
10111 static char *prefix = "/net/";
10112 int i, size, npaths;
10113 uint_t len;
10114
10115 /* Get the referral */
10116 if ((fsl = fetch_referral(vp, cr)) == NULL)
10117 return (NULL);
10118
10119 /* Deal with only the first location and first server */
10120 fs = &fsl->locations_val[0];
10121 server = utf8_to_str(&fs->server_val[0], &len, NULL);
10122 if (server == NULL) {
10123 rfs4_free_fs_locations4(fsl);
10124 kmem_free(fsl, sizeof (fs_locations4));
10125 return (NULL);
10126 }
10127
10128 /* Figure out size for "/net/" + host + /path/path/path + NULL */
10129 size = strlen(prefix) + len;
10130 for (i = 0; i < fs->rootpath.pathname4_len; i++)
10131 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10132
10133 /* Allocate the symlink buffer and fill it */
10134 symbuf = kmem_zalloc(size, KM_SLEEP);
10135 (void) strcat(symbuf, prefix);
10136 (void) strcat(symbuf, server);
10137 kmem_free(server, len);
10138
10139 npaths = 0;
10140 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10141 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10142 if (path == NULL)
10143 continue;
10144 (void) strcat(symbuf, "/");
10145 (void) strcat(symbuf, path);
10146 npaths++;
10147 kmem_free(path, len);
10148 }
10149
10150 rfs4_free_fs_locations4(fsl);
10151 kmem_free(fsl, sizeof (fs_locations4));
10152
10153 if (strsz != NULL)
10154 *strsz = size;
10155 return (symbuf);
10156 }
10157
10158 /*
10159 * Check to see if we have a downrev Solaris client, so that we
10160 * can send it a symlink instead of a referral.
10161 */
10162 int
10163 client_is_downrev(struct svc_req *req)
10164 {
10165 struct sockaddr *ca;
10166 rfs4_clntip_t *ci;
10167 bool_t create = FALSE;
10168 int is_downrev;
10169
10170 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10171 ASSERT(ca);
10172 ci = rfs4_find_clntip(ca, &create);
10173 if (ci == NULL)
10174 return (0);
10175 is_downrev = ci->ri_no_referrals;
10176 rfs4_dbe_rele(ci->ri_dbe);
10177 return (is_downrev);
10178 }
10179
10180 /*
10181 * Do the main work of handling HA-NFSv4 Resource Group failover on
10182 * Sun Cluster.
10183 * We need to detect whether any RG admin paths have been added or removed,
10184 * and adjust resources accordingly.
10185 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10186 * order to scale, the list and array of paths need to be held in more
10187 * suitable data structures.
10188 */
10189 static void
10190 hanfsv4_failover(nfs4_srv_t *nsrv4)
10191 {
10192 int i, start_grace, numadded_paths = 0;
10193 char **added_paths = NULL;
10194 rfs4_dss_path_t *dss_path;
10195
10196 /*
10197 * Note: currently, dss_pathlist cannot be NULL, since
10198 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10199 * make the latter dynamically specified too, the following will
10200 * need to be adjusted.
10201 */
10202
10203 /*
10204 * First, look for removed paths: RGs that have been failed-over
10205 * away from this node.
10206 * Walk the "currently-serving" dss_pathlist and, for each
10207 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10208 * from nfsd. If not, that RG path has been removed.
10209 *
10210 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10211 * any duplicates.
10212 */
10213 dss_path = nsrv4->dss_pathlist;
10214 do {
10215 int found = 0;
10216 char *path = dss_path->path;
10217
10218 /* used only for non-HA so may not be removed */
10219 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10220 dss_path = dss_path->next;
10221 continue;
10222 }
10223
10224 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10225 int cmpret;
10226 char *newpath = rfs4_dss_newpaths[i];
10227
10228 /*
10229 * Since nfsd has sorted rfs4_dss_newpaths for us,
10230 * once the return from strcmp is negative we know
10231 * we've passed the point where "path" should be,
10232 * and can stop searching: "path" has been removed.
10233 */
10234 cmpret = strcmp(path, newpath);
10235 if (cmpret < 0)
10236 break;
10237 if (cmpret == 0) {
10238 found = 1;
10239 break;
10240 }
10241 }
10242
10243 if (found == 0) {
10244 unsigned index = dss_path->index;
10245 rfs4_servinst_t *sip = dss_path->sip;
10246 rfs4_dss_path_t *path_next = dss_path->next;
10247
10248 /*
10249 * This path has been removed.
10250 * We must clear out the servinst reference to
10251 * it, since it's now owned by another
10252 * node: we should not attempt to touch it.
10253 */
10254 ASSERT(dss_path == sip->dss_paths[index]);
10255 sip->dss_paths[index] = NULL;
10256
10257 /* remove from "currently-serving" list, and destroy */
10258 remque(dss_path);
10259 /* allow for NUL */
10260 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10261 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10262
10263 dss_path = path_next;
10264 } else {
10265 /* path was found; not removed */
10266 dss_path = dss_path->next;
10267 }
10268 } while (dss_path != nsrv4->dss_pathlist);
10269
10270 /*
10271 * Now, look for added paths: RGs that have been failed-over
10272 * to this node.
10273 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10274 * for each path, check if it is on the "currently-serving"
10275 * dss_pathlist. If not, that RG path has been added.
10276 *
10277 * Note: we don't do duplicate detection here; nfsd does that for us.
10278 *
10279 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10280 * an upper bound for the size needed for added_paths[numadded_paths].
10281 */
10282
10283 /* probably more space than we need, but guaranteed to be enough */
10284 if (rfs4_dss_numnewpaths > 0) {
10285 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10286 added_paths = kmem_zalloc(sz, KM_SLEEP);
10287 }
10288
10289 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10290 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10291 int found = 0;
10292 char *newpath = rfs4_dss_newpaths[i];
10293
10294 dss_path = nsrv4->dss_pathlist;
10295 do {
10296 char *path = dss_path->path;
10297
10298 /* used only for non-HA */
10299 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10300 dss_path = dss_path->next;
10301 continue;
10302 }
10303
10304 if (strncmp(path, newpath, strlen(path)) == 0) {
10305 found = 1;
10306 break;
10307 }
10308
10309 dss_path = dss_path->next;
10310 } while (dss_path != nsrv4->dss_pathlist);
10311
10312 if (found == 0) {
10313 added_paths[numadded_paths] = newpath;
10314 numadded_paths++;
10315 }
10316 }
10317
10318 /* did we find any added paths? */
10319 if (numadded_paths > 0) {
10320
10321 /* create a new server instance, and start its grace period */
10322 start_grace = 1;
10323 /* CSTYLED */
10324 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10325
10326 /* read in the stable storage state from these paths */
10327 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10328
10329 /*
10330 * Multiple failovers during a grace period will cause
10331 * clients of the same resource group to be partitioned
10332 * into different server instances, with different
10333 * grace periods. Since clients of the same resource
10334 * group must be subject to the same grace period,
10335 * we need to reset all currently active grace periods.
10336 */
10337 rfs4_grace_reset_all(nsrv4);
10338 }
10339
10340 if (rfs4_dss_numnewpaths > 0)
10341 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10342 }