1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 */
36
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/cred.h>
41 #include <sys/buf.h>
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/sysmacros.h>
48 #include <sys/statvfs.h>
49 #include <sys/kmem.h>
50 #include <sys/dirent.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/systeminfo.h>
54 #include <sys/flock.h>
55 #include <sys/pathname.h>
56 #include <sys/nbmlock.h>
57 #include <sys/share.h>
58 #include <sys/atomic.h>
59 #include <sys/policy.h>
60 #include <sys/fem.h>
61 #include <sys/sdt.h>
62 #include <sys/ddi.h>
63 #include <sys/zone.h>
64
65 #include <fs/fs_reparse.h>
66
67 #include <rpc/types.h>
68 #include <rpc/auth.h>
69 #include <rpc/rpcsec_gss.h>
70 #include <rpc/svc.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfssys.h>
74 #include <nfs/export.h>
75 #include <nfs/nfs_cmd.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_drc.h>
79
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82
83 #include <inet/common.h>
84 #include <inet/ip.h>
85 #include <inet/ip6.h>
86
87 #include <sys/tsol/label.h>
88 #include <sys/tsol/tndb.h>
89
90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 extern struct svc_ops rdma_svc_ops;
95 extern int nfs_loaned_buffers;
96 /* End of Tunables */
97
98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99
100 /*
101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 */
103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104
105 /*
106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 * maxcount that isn't large enough to hold the smallest possible
109 * XDR encoded dirent.
110 *
111 * sizeof cookie (8 bytes) +
112 * sizeof name_len (4 bytes) +
113 * sizeof smallest (padded) name (4 bytes) +
114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 * sizeof attrlist4_len (4 bytes) +
116 * sizeof next boolean (4 bytes)
117 *
118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 * the smallest possible entry4 (assumes no attrs requested).
120 * sizeof nfsstat4 (4 bytes) +
121 * sizeof verifier4 (8 bytes) +
122 * sizeof entry4list bool (4 bytes) +
123 * sizeof entry4 (36 bytes) +
124 * sizeof eof bool (4 bytes)
125 *
126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 * required for a given name length. MAXNAMELEN is the maximum
130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 * to hold ., .., and the largest possible solaris dirent64.
134 */
135 #define RFS4_MINLEN_ENTRY4 36
136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 #define RFS4_MINLEN_RDDIR_BUF \
138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139
140 /*
141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 * but the dirents UFS gives us are already padded to 8, so just take
143 * what we're given. Dircount is only a hint anyway. Currently the
144 * solaris kernel is ASCII only, so there's no point in calling the
145 * UTF8 functions.
146 *
147 * dirent64: named padded to provide 8 byte struct alignment
148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 *
150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 *
152 */
153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155
156
157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
158
159 u_longlong_t nfs4_srv_caller_id;
160 uint_t nfs4_srv_vkey = 0;
161
162 void rfs4_init_compound_state(struct compound_state *);
163
164 static void nullfree(caddr_t);
165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 struct compound_state *);
175 static void rfs4_op_create_free(nfs_resop4 *resop);
176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 struct svc_req *, struct compound_state *);
180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 struct compound_state *);
182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 struct compound_state *);
185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void lock_denied_free(nfs_resop4 *);
193 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 struct compound_state *);
195 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 struct compound_state *);
201 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
202 struct svc_req *req, struct compound_state *cs);
203 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 struct compound_state *);
205 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 struct compound_state *);
207 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
208 struct svc_req *, struct compound_state *);
209 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
210 struct svc_req *, struct compound_state *);
211 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 struct compound_state *);
213 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 struct compound_state *);
219 static void rfs4_op_read_free(nfs_resop4 *);
220 static void rfs4_op_readdir_free(nfs_resop4 *resop);
221 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 struct compound_state *);
223 static void rfs4_op_readlink_free(nfs_resop4 *);
224 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
225 struct svc_req *, struct compound_state *);
226 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 struct compound_state *);
228 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 struct compound_state *);
242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *, struct compound_state *);
244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
245 struct svc_req *req, struct compound_state *);
246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
247 struct compound_state *);
248 static void rfs4_op_secinfo_free(nfs_resop4 *);
249
250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
251 struct svc_req *);
252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
254
255
256 /*
257 * translation table for attrs
258 */
259 struct nfs4_ntov_table {
260 union nfs4_attr_u *na;
261 uint8_t amap[NFS4_MAXNUM_ATTRS];
262 int attrcnt;
263 bool_t vfsstat;
264 };
265
266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
268 struct nfs4_svgetit_arg *sargp);
269
270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
273
274 static void hanfsv4_failover(nfs4_srv_t *);
275
276 fem_t *deleg_rdops;
277 fem_t *deleg_wrops;
278
279 /*
280 * NFS4 op dispatch table
281 */
282
283 struct rfsv4disp {
284 void (*dis_proc)(); /* proc to call */
285 void (*dis_resfree)(); /* frees space allocated by proc */
286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 };
288
289 static struct rfsv4disp rfsv4disptab[] = {
290 /*
291 * NFS VERSION 4
292 */
293
294 /* RFS_NULL = 0 */
295 {rfs4_op_illegal, nullfree, 0},
296
297 /* UNUSED = 1 */
298 {rfs4_op_illegal, nullfree, 0},
299
300 /* UNUSED = 2 */
301 {rfs4_op_illegal, nullfree, 0},
302
303 /* OP_ACCESS = 3 */
304 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
305
306 /* OP_CLOSE = 4 */
307 {rfs4_op_close, nullfree, 0},
308
309 /* OP_COMMIT = 5 */
310 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
311
312 /* OP_CREATE = 6 */
313 {rfs4_op_create, nullfree, 0},
314
315 /* OP_DELEGPURGE = 7 */
316 {rfs4_op_delegpurge, nullfree, 0},
317
318 /* OP_DELEGRETURN = 8 */
319 {rfs4_op_delegreturn, nullfree, 0},
320
321 /* OP_GETATTR = 9 */
322 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
323
324 /* OP_GETFH = 10 */
325 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
326
327 /* OP_LINK = 11 */
328 {rfs4_op_link, nullfree, 0},
329
330 /* OP_LOCK = 12 */
331 {rfs4_op_lock, lock_denied_free, 0},
332
333 /* OP_LOCKT = 13 */
334 {rfs4_op_lockt, lock_denied_free, 0},
335
336 /* OP_LOCKU = 14 */
337 {rfs4_op_locku, nullfree, 0},
338
339 /* OP_LOOKUP = 15 */
340 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
341
342 /* OP_LOOKUPP = 16 */
343 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
344
345 /* OP_NVERIFY = 17 */
346 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
347
348 /* OP_OPEN = 18 */
349 {rfs4_op_open, rfs4_free_reply, 0},
350
351 /* OP_OPENATTR = 19 */
352 {rfs4_op_openattr, nullfree, 0},
353
354 /* OP_OPEN_CONFIRM = 20 */
355 {rfs4_op_open_confirm, nullfree, 0},
356
357 /* OP_OPEN_DOWNGRADE = 21 */
358 {rfs4_op_open_downgrade, nullfree, 0},
359
360 /* OP_OPEN_PUTFH = 22 */
361 {rfs4_op_putfh, nullfree, RPC_ALL},
362
363 /* OP_PUTPUBFH = 23 */
364 {rfs4_op_putpubfh, nullfree, RPC_ALL},
365
366 /* OP_PUTROOTFH = 24 */
367 {rfs4_op_putrootfh, nullfree, RPC_ALL},
368
369 /* OP_READ = 25 */
370 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
371
372 /* OP_READDIR = 26 */
373 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
374
375 /* OP_READLINK = 27 */
376 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
377
378 /* OP_REMOVE = 28 */
379 {rfs4_op_remove, nullfree, 0},
380
381 /* OP_RENAME = 29 */
382 {rfs4_op_rename, nullfree, 0},
383
384 /* OP_RENEW = 30 */
385 {rfs4_op_renew, nullfree, 0},
386
387 /* OP_RESTOREFH = 31 */
388 {rfs4_op_restorefh, nullfree, RPC_ALL},
389
390 /* OP_SAVEFH = 32 */
391 {rfs4_op_savefh, nullfree, RPC_ALL},
392
393 /* OP_SECINFO = 33 */
394 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
395
396 /* OP_SETATTR = 34 */
397 {rfs4_op_setattr, nullfree, 0},
398
399 /* OP_SETCLIENTID = 35 */
400 {rfs4_op_setclientid, nullfree, 0},
401
402 /* OP_SETCLIENTID_CONFIRM = 36 */
403 {rfs4_op_setclientid_confirm, nullfree, 0},
404
405 /* OP_VERIFY = 37 */
406 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
407
408 /* OP_WRITE = 38 */
409 {rfs4_op_write, nullfree, 0},
410
411 /* OP_RELEASE_LOCKOWNER = 39 */
412 {rfs4_op_release_lockowner, nullfree, 0},
413 };
414
415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
416
417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
418
419 #ifdef DEBUG
420
421 int rfs4_fillone_debug = 0;
422 int rfs4_no_stub_access = 1;
423 int rfs4_rddir_debug = 0;
424
425 static char *rfs4_op_string[] = {
426 "rfs4_op_null",
427 "rfs4_op_1 unused",
428 "rfs4_op_2 unused",
429 "rfs4_op_access",
430 "rfs4_op_close",
431 "rfs4_op_commit",
432 "rfs4_op_create",
433 "rfs4_op_delegpurge",
434 "rfs4_op_delegreturn",
435 "rfs4_op_getattr",
436 "rfs4_op_getfh",
437 "rfs4_op_link",
438 "rfs4_op_lock",
439 "rfs4_op_lockt",
440 "rfs4_op_locku",
441 "rfs4_op_lookup",
442 "rfs4_op_lookupp",
443 "rfs4_op_nverify",
444 "rfs4_op_open",
445 "rfs4_op_openattr",
446 "rfs4_op_open_confirm",
447 "rfs4_op_open_downgrade",
448 "rfs4_op_putfh",
449 "rfs4_op_putpubfh",
450 "rfs4_op_putrootfh",
451 "rfs4_op_read",
452 "rfs4_op_readdir",
453 "rfs4_op_readlink",
454 "rfs4_op_remove",
455 "rfs4_op_rename",
456 "rfs4_op_renew",
457 "rfs4_op_restorefh",
458 "rfs4_op_savefh",
459 "rfs4_op_secinfo",
460 "rfs4_op_setattr",
461 "rfs4_op_setclientid",
462 "rfs4_op_setclient_confirm",
463 "rfs4_op_verify",
464 "rfs4_op_write",
465 "rfs4_op_release_lockowner",
466 "rfs4_op_illegal"
467 };
468 #endif
469
470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
471
472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
473
474 extern void rfs4_free_fs_locations4(fs_locations4 *);
475
476 #ifdef nextdp
477 #undef nextdp
478 #endif
479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
480
481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
489 NULL, NULL
490 };
491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
493 VOPNAME_READ, { .femop_read = deleg_wr_read },
494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
500 NULL, NULL
501 };
502
503 nfs4_srv_t *
504 nfs4_get_srv(void)
505 {
506 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
507 nfs4_srv_t *srv = ng->nfs4_srv;
508 ASSERT(srv != NULL);
509 return (srv);
510 }
511
512 void
513 rfs4_srv_zone_init(nfs_globals_t *ng)
514 {
515 nfs4_srv_t *nsrv4;
516 timespec32_t verf;
517
518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
519
520 /*
521 * The following algorithm attempts to find a unique verifier
522 * to be used as the write verifier returned from the server
523 * to the client. It is important that this verifier change
524 * whenever the server reboots. Of secondary importance, it
525 * is important for the verifier to be unique between two
526 * different servers.
527 *
528 * Thus, an attempt is made to use the system hostid and the
529 * current time in seconds when the nfssrv kernel module is
530 * loaded. It is assumed that an NFS server will not be able
531 * to boot and then to reboot in less than a second. If the
532 * hostid has not been set, then the current high resolution
533 * time is used. This will ensure different verifiers each
534 * time the server reboots and minimize the chances that two
535 * different servers will have the same verifier.
536 * XXX - this is broken on LP64 kernels.
537 */
538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
539 if (verf.tv_sec != 0) {
540 verf.tv_nsec = gethrestime_sec();
541 } else {
542 timespec_t tverf;
543
544 gethrestime(&tverf);
545 verf.tv_sec = (time_t)tverf.tv_sec;
546 verf.tv_nsec = tverf.tv_nsec;
547 }
548 nsrv4->write4verf = *(uint64_t *)&verf;
549
550 /* Used to manage create/destroy of server state */
551 nsrv4->nfs4_server_state = NULL;
552 nsrv4->nfs4_cur_servinst = NULL;
553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
558
559 ng->nfs4_srv = nsrv4;
560 }
561
562 void
563 rfs4_srv_zone_fini(nfs_globals_t *ng)
564 {
565 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
566
567 ng->nfs4_srv = NULL;
568
569 mutex_destroy(&nsrv4->deleg_lock);
570 mutex_destroy(&nsrv4->state_lock);
571 mutex_destroy(&nsrv4->servinst_lock);
572 rw_destroy(&nsrv4->deleg_policy_lock);
573
574 kmem_free(nsrv4, sizeof (*nsrv4));
575 }
576
577 void
578 rfs4_srvrinit(void)
579 {
580 extern void rfs4_attr_init();
581
582 rfs4_attr_init();
583
584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
585 rfs4_disable_delegation();
586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
587 &deleg_wrops) != 0) {
588 rfs4_disable_delegation();
589 fem_free(deleg_rdops);
590 }
591
592 nfs4_srv_caller_id = fs_new_caller_id();
593 lockt_sysid = lm_alloc_sysidt();
594 vsd_create(&nfs4_srv_vkey, NULL);
595 rfs4_state_g_init();
596 }
597
598 void
599 rfs4_srvrfini(void)
600 {
601 if (lockt_sysid != LM_NOSYSID) {
602 lm_free_sysidt(lockt_sysid);
603 lockt_sysid = LM_NOSYSID;
604 }
605
606 rfs4_state_g_fini();
607
608 fem_free(deleg_rdops);
609 fem_free(deleg_wrops);
610 }
611
612 void
613 rfs4_do_server_start(int server_upordown,
614 int srv_delegation, int cluster_booted)
615 {
616 nfs4_srv_t *nsrv4 = nfs4_get_srv();
617
618 /* Is this a warm start? */
619 if (server_upordown == NFS_SERVER_QUIESCED) {
620 cmn_err(CE_NOTE, "nfs4_srv: "
621 "server was previously quiesced; "
622 "existing NFSv4 state will be re-used");
623
624 /*
625 * HA-NFSv4: this is also the signal
626 * that a Resource Group failover has
627 * occurred.
628 */
629 if (cluster_booted)
630 hanfsv4_failover(nsrv4);
631 } else {
632 /* Cold start */
633 nsrv4->rfs4_start_time = 0;
634 rfs4_state_zone_init(nsrv4);
635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
636 nfs4_drc_hash);
637
638 /*
639 * The nfsd service was started with the -s option
640 * we need to pull in any state from the paths indicated.
641 */
642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
643 /* read in the stable storage state from these paths */
644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
645 rfs4_dss_newpaths);
646 }
647 }
648
649 /* Check if delegation is to be enabled */
650 if (srv_delegation != FALSE)
651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
652 }
653
654 void
655 rfs4_init_compound_state(struct compound_state *cs)
656 {
657 bzero(cs, sizeof (*cs));
658 cs->cont = TRUE;
659 cs->access = CS_ACCESS_DENIED;
660 cs->deleg = FALSE;
661 cs->mandlock = FALSE;
662 cs->fh.nfs_fh4_val = cs->fhbuf;
663 }
664
665 void
666 rfs4_grace_start(rfs4_servinst_t *sip)
667 {
668 rw_enter(&sip->rwlock, RW_WRITER);
669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
670 sip->grace_period = rfs4_grace_period;
671 rw_exit(&sip->rwlock);
672 }
673
674 /*
675 * returns true if the instance's grace period has never been started
676 */
677 int
678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
679 {
680 time_t start_time;
681
682 rw_enter(&sip->rwlock, RW_READER);
683 start_time = sip->start_time;
684 rw_exit(&sip->rwlock);
685
686 return (start_time == 0);
687 }
688
689 /*
690 * Indicates if server instance is within the
691 * grace period.
692 */
693 int
694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
695 {
696 time_t grace_expiry;
697
698 rw_enter(&sip->rwlock, RW_READER);
699 grace_expiry = sip->start_time + sip->grace_period;
700 rw_exit(&sip->rwlock);
701
702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
703 }
704
705 int
706 rfs4_clnt_in_grace(rfs4_client_t *cp)
707 {
708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
709
710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
711 }
712
713 /*
714 * reset all currently active grace periods
715 */
716 void
717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
718 {
719 rfs4_servinst_t *sip;
720
721 mutex_enter(&nsrv4->servinst_lock);
722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
723 if (rfs4_servinst_in_grace(sip))
724 rfs4_grace_start(sip);
725 mutex_exit(&nsrv4->servinst_lock);
726 }
727
728 /*
729 * start any new instances' grace periods
730 */
731 void
732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
733 {
734 rfs4_servinst_t *sip;
735
736 mutex_enter(&nsrv4->servinst_lock);
737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
738 if (rfs4_servinst_grace_new(sip))
739 rfs4_grace_start(sip);
740 mutex_exit(&nsrv4->servinst_lock);
741 }
742
743 static rfs4_dss_path_t *
744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
745 char *path, unsigned index)
746 {
747 size_t len;
748 rfs4_dss_path_t *dss_path;
749
750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
751
752 /*
753 * Take a copy of the string, since the original may be overwritten.
754 * Sadly, no strdup() in the kernel.
755 */
756 /* allow for NUL */
757 len = strlen(path) + 1;
758 dss_path->path = kmem_alloc(len, KM_SLEEP);
759 (void) strlcpy(dss_path->path, path, len);
760
761 /* associate with servinst */
762 dss_path->sip = sip;
763 dss_path->index = index;
764
765 /*
766 * Add to list of served paths.
767 * No locking required, as we're only ever called at startup.
768 */
769 if (nsrv4->dss_pathlist == NULL) {
770 /* this is the first dss_path_t */
771
772 /* needed for insque/remque */
773 dss_path->next = dss_path->prev = dss_path;
774
775 nsrv4->dss_pathlist = dss_path;
776 } else {
777 insque(dss_path, nsrv4->dss_pathlist);
778 }
779
780 return (dss_path);
781 }
782
783 /*
784 * Create a new server instance, and make it the currently active instance.
785 * Note that starting the grace period too early will reduce the clients'
786 * recovery window.
787 */
788 void
789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
790 int dss_npaths, char **dss_paths)
791 {
792 unsigned i;
793 rfs4_servinst_t *sip;
794 rfs4_oldstate_t *oldstate;
795
796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
798
799 sip->start_time = (time_t)0;
800 sip->grace_period = (time_t)0;
801 sip->next = NULL;
802 sip->prev = NULL;
803
804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
805 /*
806 * This initial dummy entry is required to setup for insque/remque.
807 * It must be skipped over whenever the list is traversed.
808 */
809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
810 /* insque/remque require initial list entry to be self-terminated */
811 oldstate->next = oldstate;
812 oldstate->prev = oldstate;
813 sip->oldstate = oldstate;
814
815
816 sip->dss_npaths = dss_npaths;
817 sip->dss_paths = kmem_alloc(dss_npaths *
818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
819
820 for (i = 0; i < dss_npaths; i++) {
821 sip->dss_paths[i] =
822 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
823 }
824
825 mutex_enter(&nsrv4->servinst_lock);
826 if (nsrv4->nfs4_cur_servinst != NULL) {
827 /* add to linked list */
828 sip->prev = nsrv4->nfs4_cur_servinst;
829 nsrv4->nfs4_cur_servinst->next = sip;
830 }
831 if (start_grace)
832 rfs4_grace_start(sip);
833 /* make the new instance "current" */
834 nsrv4->nfs4_cur_servinst = sip;
835
836 mutex_exit(&nsrv4->servinst_lock);
837 }
838
839 /*
840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
841 * all instances directly.
842 */
843 void
844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
845 {
846 rfs4_servinst_t *sip, *prev, *current;
847 #ifdef DEBUG
848 int n = 0;
849 #endif
850
851 mutex_enter(&nsrv4->servinst_lock);
852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
853 current = nsrv4->nfs4_cur_servinst;
854 nsrv4->nfs4_cur_servinst = NULL;
855 for (sip = current; sip != NULL; sip = prev) {
856 prev = sip->prev;
857 rw_destroy(&sip->rwlock);
858 if (sip->oldstate)
859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
860 if (sip->dss_paths) {
861 int i = sip->dss_npaths;
862
863 while (i > 0) {
864 i--;
865 if (sip->dss_paths[i] != NULL) {
866 char *path = sip->dss_paths[i]->path;
867
868 if (path != NULL) {
869 kmem_free(path,
870 strlen(path) + 1);
871 }
872 kmem_free(sip->dss_paths[i],
873 sizeof (rfs4_dss_path_t));
874 }
875 }
876 kmem_free(sip->dss_paths,
877 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
878 }
879 kmem_free(sip, sizeof (rfs4_servinst_t));
880 #ifdef DEBUG
881 n++;
882 #endif
883 }
884 mutex_exit(&nsrv4->servinst_lock);
885 }
886
887 /*
888 * Assign the current server instance to a client_t.
889 * Should be called with cp->rc_dbe held.
890 */
891 void
892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
893 rfs4_servinst_t *sip)
894 {
895 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
896
897 /*
898 * The lock ensures that if the current instance is in the process
899 * of changing, we will see the new one.
900 */
901 mutex_enter(&nsrv4->servinst_lock);
902 cp->rc_server_instance = sip;
903 mutex_exit(&nsrv4->servinst_lock);
904 }
905
906 rfs4_servinst_t *
907 rfs4_servinst(rfs4_client_t *cp)
908 {
909 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
910
911 return (cp->rc_server_instance);
912 }
913
914 /* ARGSUSED */
915 static void
916 nullfree(caddr_t resop)
917 {
918 }
919
920 /*
921 * This is a fall-through for invalid or not implemented (yet) ops
922 */
923 /* ARGSUSED */
924 static void
925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
926 struct compound_state *cs)
927 {
928 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
929 }
930
931 /*
932 * Check if the security flavor, nfsnum, is in the flavor_list.
933 */
934 bool_t
935 in_flavor_list(int nfsnum, int *flavor_list, int count)
936 {
937 int i;
938
939 for (i = 0; i < count; i++) {
940 if (nfsnum == flavor_list[i])
941 return (TRUE);
942 }
943 return (FALSE);
944 }
945
946 /*
947 * Used by rfs4_op_secinfo to get the security information from the
948 * export structure associated with the component.
949 */
950 /* ARGSUSED */
951 static nfsstat4
952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
953 {
954 int error, different_export = 0;
955 vnode_t *dvp, *vp;
956 struct exportinfo *exi;
957 fid_t fid;
958 uint_t count, i;
959 secinfo4 *resok_val;
960 struct secinfo *secp;
961 seconfig_t *si;
962 bool_t did_traverse = FALSE;
963 int dotdot, walk;
964 nfs_export_t *ne = nfs_get_export();
965
966 dvp = cs->vp;
967 exi = cs->exi;
968 ASSERT(exi != NULL);
969 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
970
971 /*
972 * If dotdotting, then need to check whether it's above the
973 * root of a filesystem, or above an export point.
974 */
975 if (dotdot) {
976 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
977 /*
978 * If dotdotting at the root of a filesystem, then
979 * need to traverse back to the mounted-on filesystem
980 * and do the dotdot lookup there.
981 */
982 if ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp)) {
983
984 /*
985 * If at the system root, then can
986 * go up no further.
987 */
988 if (VN_CMP(dvp, ZONE_ROOTVP()))
989 return (puterrno4(ENOENT));
990
991 /*
992 * Traverse back to the mounted-on filesystem
993 */
994 dvp = untraverse(dvp);
995
996 /*
997 * Set the different_export flag so we remember
998 * to pick up a new exportinfo entry for
999 * this new filesystem.
1000 */
1001 different_export = 1;
1002 } else {
1003
1004 /*
1005 * If dotdotting above an export point then set
1006 * the different_export to get new export info.
1007 */
1008 different_export = nfs_exported(exi, dvp);
1009 }
1010 }
1011
1012 /*
1013 * Get the vnode for the component "nm".
1014 */
1015 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1016 NULL, NULL, NULL);
1017 if (error)
1018 return (puterrno4(error));
1019
1020 /*
1021 * If the vnode is in a pseudo filesystem, or if the security flavor
1022 * used in the request is valid but not an explicitly shared flavor,
1023 * or the access bit indicates that this is a limited access,
1024 * check whether this vnode is visible.
1025 */
1026 if (!different_export &&
1027 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1028 cs->access & CS_ACCESS_LIMITED)) {
1029 if (! nfs_visible(exi, vp, &different_export)) {
1030 VN_RELE(vp);
1031 return (puterrno4(ENOENT));
1032 }
1033 }
1034
1035 /*
1036 * If it's a mountpoint, then traverse it.
1037 */
1038 if (vn_ismntpt(vp)) {
1039 if ((error = traverse(&vp)) != 0) {
1040 VN_RELE(vp);
1041 return (puterrno4(error));
1042 }
1043 /* remember that we had to traverse mountpoint */
1044 did_traverse = TRUE;
1045 different_export = 1;
1046 } else if (vp->v_vfsp != dvp->v_vfsp) {
1047 /*
1048 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1049 * then vp is probably an LOFS object. We don't need the
1050 * realvp, we just need to know that we might have crossed
1051 * a server fs boundary and need to call checkexport4.
1052 * (LOFS lookup hides server fs mountpoints, and actually calls
1053 * traverse)
1054 */
1055 different_export = 1;
1056 }
1057
1058 /*
1059 * Get the export information for it.
1060 */
1061 if (different_export) {
1062
1063 bzero(&fid, sizeof (fid));
1064 fid.fid_len = MAXFIDSZ;
1065 error = vop_fid_pseudo(vp, &fid);
1066 if (error) {
1067 VN_RELE(vp);
1068 return (puterrno4(error));
1069 }
1070
1071 /* We'll need to reassign "exi". */
1072 if (dotdot)
1073 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1074 else
1075 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1076
1077 if (exi == NULL) {
1078 if (did_traverse == TRUE) {
1079 /*
1080 * If this vnode is a mounted-on vnode,
1081 * but the mounted-on file system is not
1082 * exported, send back the secinfo for
1083 * the exported node that the mounted-on
1084 * vnode lives in.
1085 */
1086 exi = cs->exi;
1087 } else {
1088 VN_RELE(vp);
1089 return (puterrno4(EACCES));
1090 }
1091 }
1092 }
1093 ASSERT(exi != NULL);
1094
1095
1096 /*
1097 * Create the secinfo result based on the security information
1098 * from the exportinfo structure (exi).
1099 *
1100 * Return all flavors for a pseudo node.
1101 * For a real export node, return the flavor that the client
1102 * has access with.
1103 */
1104 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1105 if (PSEUDO(exi)) {
1106 count = exi->exi_export.ex_seccnt; /* total sec count */
1107 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1108 secp = exi->exi_export.ex_secinfo;
1109
1110 for (i = 0; i < count; i++) {
1111 si = &secp[i].s_secinfo;
1112 resok_val[i].flavor = si->sc_rpcnum;
1113 if (resok_val[i].flavor == RPCSEC_GSS) {
1114 rpcsec_gss_info *info;
1115
1116 info = &resok_val[i].flavor_info;
1117 info->qop = si->sc_qop;
1118 info->service = (rpc_gss_svc_t)si->sc_service;
1119
1120 /* get oid opaque data */
1121 info->oid.sec_oid4_len =
1122 si->sc_gss_mech_type->length;
1123 info->oid.sec_oid4_val = kmem_alloc(
1124 si->sc_gss_mech_type->length, KM_SLEEP);
1125 bcopy(
1126 si->sc_gss_mech_type->elements,
1127 info->oid.sec_oid4_val,
1128 info->oid.sec_oid4_len);
1129 }
1130 }
1131 resp->SECINFO4resok_len = count;
1132 resp->SECINFO4resok_val = resok_val;
1133 } else {
1134 int ret_cnt = 0, k = 0;
1135 int *flavor_list;
1136
1137 count = exi->exi_export.ex_seccnt; /* total sec count */
1138 secp = exi->exi_export.ex_secinfo;
1139
1140 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1141 /* find out which flavors to return */
1142 for (i = 0; i < count; i ++) {
1143 int access, flavor, perm;
1144
1145 flavor = secp[i].s_secinfo.sc_nfsnum;
1146 perm = secp[i].s_flags;
1147
1148 access = nfsauth4_secinfo_access(exi, cs->req,
1149 flavor, perm, cs->basecr);
1150
1151 if (! (access & NFSAUTH_DENIED) &&
1152 ! (access & NFSAUTH_WRONGSEC)) {
1153 flavor_list[ret_cnt] = flavor;
1154 ret_cnt++;
1155 }
1156 }
1157
1158 /* Create the returning SECINFO value */
1159 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1160
1161 for (i = 0; i < count; i++) {
1162 /*
1163 * If the flavor is in the flavor list,
1164 * fill in resok_val.
1165 */
1166 si = &secp[i].s_secinfo;
1167 if (in_flavor_list(si->sc_nfsnum,
1168 flavor_list, ret_cnt)) {
1169 resok_val[k].flavor = si->sc_rpcnum;
1170 if (resok_val[k].flavor == RPCSEC_GSS) {
1171 rpcsec_gss_info *info;
1172
1173 info = &resok_val[k].flavor_info;
1174 info->qop = si->sc_qop;
1175 info->service = (rpc_gss_svc_t)
1176 si->sc_service;
1177
1178 /* get oid opaque data */
1179 info->oid.sec_oid4_len =
1180 si->sc_gss_mech_type->length;
1181 info->oid.sec_oid4_val = kmem_alloc(
1182 si->sc_gss_mech_type->length,
1183 KM_SLEEP);
1184 bcopy(si->sc_gss_mech_type->elements,
1185 info->oid.sec_oid4_val,
1186 info->oid.sec_oid4_len);
1187 }
1188 k++;
1189 }
1190 if (k >= ret_cnt)
1191 break;
1192 }
1193 resp->SECINFO4resok_len = ret_cnt;
1194 resp->SECINFO4resok_val = resok_val;
1195 kmem_free(flavor_list, count * sizeof (int));
1196 }
1197
1198 VN_RELE(vp);
1199 return (NFS4_OK);
1200 }
1201
1202 /*
1203 * SECINFO (Operation 33): Obtain required security information on
1204 * the component name in the format of (security-mechanism-oid, qop, service)
1205 * triplets.
1206 */
1207 /* ARGSUSED */
1208 static void
1209 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1210 struct compound_state *cs)
1211 {
1212 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1213 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1214 utf8string *utfnm = &args->name;
1215 uint_t len;
1216 char *nm;
1217 struct sockaddr *ca;
1218 char *name = NULL;
1219 nfsstat4 status = NFS4_OK;
1220
1221 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1222 SECINFO4args *, args);
1223
1224 /*
1225 * Current file handle (cfh) should have been set before getting
1226 * into this function. If not, return error.
1227 */
1228 if (cs->vp == NULL) {
1229 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1230 goto out;
1231 }
1232
1233 if (cs->vp->v_type != VDIR) {
1234 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1235 goto out;
1236 }
1237
1238 /*
1239 * Verify the component name. If failed, error out, but
1240 * do not error out if the component name is a "..".
1241 * SECINFO will return its parents secinfo data for SECINFO "..".
1242 */
1243 status = utf8_dir_verify(utfnm);
1244 if (status != NFS4_OK) {
1245 if (utfnm->utf8string_len != 2 ||
1246 utfnm->utf8string_val[0] != '.' ||
1247 utfnm->utf8string_val[1] != '.') {
1248 *cs->statusp = resp->status = status;
1249 goto out;
1250 }
1251 }
1252
1253 nm = utf8_to_str(utfnm, &len, NULL);
1254 if (nm == NULL) {
1255 *cs->statusp = resp->status = NFS4ERR_INVAL;
1256 goto out;
1257 }
1258
1259 if (len > MAXNAMELEN) {
1260 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1261 kmem_free(nm, len);
1262 goto out;
1263 }
1264
1265 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1266 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1267 MAXPATHLEN + 1);
1268
1269 if (name == NULL) {
1270 *cs->statusp = resp->status = NFS4ERR_INVAL;
1271 kmem_free(nm, len);
1272 goto out;
1273 }
1274
1275
1276 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1277
1278 if (name != nm)
1279 kmem_free(name, MAXPATHLEN + 1);
1280 kmem_free(nm, len);
1281
1282 out:
1283 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1284 SECINFO4res *, resp);
1285 }
1286
1287 /*
1288 * Free SECINFO result.
1289 */
1290 /* ARGSUSED */
1291 static void
1292 rfs4_op_secinfo_free(nfs_resop4 *resop)
1293 {
1294 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1295 int count, i;
1296 secinfo4 *resok_val;
1297
1298 /* If this is not an Ok result, nothing to free. */
1299 if (resp->status != NFS4_OK) {
1300 return;
1301 }
1302
1303 count = resp->SECINFO4resok_len;
1304 resok_val = resp->SECINFO4resok_val;
1305
1306 for (i = 0; i < count; i++) {
1307 if (resok_val[i].flavor == RPCSEC_GSS) {
1308 rpcsec_gss_info *info;
1309
1310 info = &resok_val[i].flavor_info;
1311 kmem_free(info->oid.sec_oid4_val,
1312 info->oid.sec_oid4_len);
1313 }
1314 }
1315 kmem_free(resok_val, count * sizeof (secinfo4));
1316 resp->SECINFO4resok_len = 0;
1317 resp->SECINFO4resok_val = NULL;
1318 }
1319
1320 /* ARGSUSED */
1321 static void
1322 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1323 struct compound_state *cs)
1324 {
1325 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1326 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1327 int error;
1328 vnode_t *vp;
1329 struct vattr va;
1330 int checkwriteperm;
1331 cred_t *cr = cs->cr;
1332 bslabel_t *clabel, *slabel;
1333 ts_label_t *tslabel;
1334 boolean_t admin_low_client;
1335
1336 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1337 ACCESS4args *, args);
1338
1339 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1340 if (cs->access == CS_ACCESS_DENIED) {
1341 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1342 goto out;
1343 }
1344 #endif
1345 if (cs->vp == NULL) {
1346 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1347 goto out;
1348 }
1349
1350 ASSERT(cr != NULL);
1351
1352 vp = cs->vp;
1353
1354 /*
1355 * If the file system is exported read only, it is not appropriate
1356 * to check write permissions for regular files and directories.
1357 * Special files are interpreted by the client, so the underlying
1358 * permissions are sent back to the client for interpretation.
1359 */
1360 if (rdonly4(req, cs) &&
1361 (vp->v_type == VREG || vp->v_type == VDIR))
1362 checkwriteperm = 0;
1363 else
1364 checkwriteperm = 1;
1365
1366 /*
1367 * XXX
1368 * We need the mode so that we can correctly determine access
1369 * permissions relative to a mandatory lock file. Access to
1370 * mandatory lock files is denied on the server, so it might
1371 * as well be reflected to the server during the open.
1372 */
1373 va.va_mask = AT_MODE;
1374 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1375 if (error) {
1376 *cs->statusp = resp->status = puterrno4(error);
1377 goto out;
1378 }
1379 resp->access = 0;
1380 resp->supported = 0;
1381
1382 if (is_system_labeled()) {
1383 ASSERT(req->rq_label != NULL);
1384 clabel = req->rq_label;
1385 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1386 "got client label from request(1)",
1387 struct svc_req *, req);
1388 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1389 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1390 *cs->statusp = resp->status = puterrno4(EACCES);
1391 goto out;
1392 }
1393 slabel = label2bslabel(tslabel);
1394 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1395 char *, "got server label(1) for vp(2)",
1396 bslabel_t *, slabel, vnode_t *, vp);
1397
1398 admin_low_client = B_FALSE;
1399 } else
1400 admin_low_client = B_TRUE;
1401 }
1402
1403 if (args->access & ACCESS4_READ) {
1404 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1405 if (!error && !MANDLOCK(vp, va.va_mode) &&
1406 (!is_system_labeled() || admin_low_client ||
1407 bldominates(clabel, slabel)))
1408 resp->access |= ACCESS4_READ;
1409 resp->supported |= ACCESS4_READ;
1410 }
1411 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1412 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1413 if (!error && (!is_system_labeled() || admin_low_client ||
1414 bldominates(clabel, slabel)))
1415 resp->access |= ACCESS4_LOOKUP;
1416 resp->supported |= ACCESS4_LOOKUP;
1417 }
1418 if (checkwriteperm &&
1419 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1420 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1421 if (!error && !MANDLOCK(vp, va.va_mode) &&
1422 (!is_system_labeled() || admin_low_client ||
1423 blequal(clabel, slabel)))
1424 resp->access |=
1425 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1426 resp->supported |=
1427 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1428 }
1429
1430 if (checkwriteperm &&
1431 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1432 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1433 if (!error && (!is_system_labeled() || admin_low_client ||
1434 blequal(clabel, slabel)))
1435 resp->access |= ACCESS4_DELETE;
1436 resp->supported |= ACCESS4_DELETE;
1437 }
1438 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1439 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1440 if (!error && !MANDLOCK(vp, va.va_mode) &&
1441 (!is_system_labeled() || admin_low_client ||
1442 bldominates(clabel, slabel)))
1443 resp->access |= ACCESS4_EXECUTE;
1444 resp->supported |= ACCESS4_EXECUTE;
1445 }
1446
1447 if (is_system_labeled() && !admin_low_client)
1448 label_rele(tslabel);
1449
1450 *cs->statusp = resp->status = NFS4_OK;
1451 out:
1452 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1453 ACCESS4res *, resp);
1454 }
1455
1456 /* ARGSUSED */
1457 static void
1458 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1459 struct compound_state *cs)
1460 {
1461 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1462 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1463 int error;
1464 vnode_t *vp = cs->vp;
1465 cred_t *cr = cs->cr;
1466 vattr_t va;
1467 nfs4_srv_t *nsrv4;
1468
1469 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1470 COMMIT4args *, args);
1471
1472 if (vp == NULL) {
1473 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1474 goto out;
1475 }
1476 if (cs->access == CS_ACCESS_DENIED) {
1477 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1478 goto out;
1479 }
1480
1481 if (args->offset + args->count < args->offset) {
1482 *cs->statusp = resp->status = NFS4ERR_INVAL;
1483 goto out;
1484 }
1485
1486 va.va_mask = AT_UID;
1487 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1488
1489 /*
1490 * If we can't get the attributes, then we can't do the
1491 * right access checking. So, we'll fail the request.
1492 */
1493 if (error) {
1494 *cs->statusp = resp->status = puterrno4(error);
1495 goto out;
1496 }
1497 if (rdonly4(req, cs)) {
1498 *cs->statusp = resp->status = NFS4ERR_ROFS;
1499 goto out;
1500 }
1501
1502 if (vp->v_type != VREG) {
1503 if (vp->v_type == VDIR)
1504 resp->status = NFS4ERR_ISDIR;
1505 else
1506 resp->status = NFS4ERR_INVAL;
1507 *cs->statusp = resp->status;
1508 goto out;
1509 }
1510
1511 if (crgetuid(cr) != va.va_uid &&
1512 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1513 *cs->statusp = resp->status = puterrno4(error);
1514 goto out;
1515 }
1516
1517 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1518
1519 if (error) {
1520 *cs->statusp = resp->status = puterrno4(error);
1521 goto out;
1522 }
1523
1524 nsrv4 = nfs4_get_srv();
1525 *cs->statusp = resp->status = NFS4_OK;
1526 resp->writeverf = nsrv4->write4verf;
1527 out:
1528 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1529 COMMIT4res *, resp);
1530 }
1531
1532 /*
1533 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1534 * was completed. It does the nfsv4 create for special files.
1535 */
1536 /* ARGSUSED */
1537 static vnode_t *
1538 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1539 struct compound_state *cs, vattr_t *vap, char *nm)
1540 {
1541 int error;
1542 cred_t *cr = cs->cr;
1543 vnode_t *dvp = cs->vp;
1544 vnode_t *vp = NULL;
1545 int mode;
1546 enum vcexcl excl;
1547
1548 switch (args->type) {
1549 case NF4CHR:
1550 case NF4BLK:
1551 if (secpolicy_sys_devices(cr) != 0) {
1552 *cs->statusp = resp->status = NFS4ERR_PERM;
1553 return (NULL);
1554 }
1555 if (args->type == NF4CHR)
1556 vap->va_type = VCHR;
1557 else
1558 vap->va_type = VBLK;
1559 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1560 args->ftype4_u.devdata.specdata2);
1561 vap->va_mask |= AT_RDEV;
1562 break;
1563 case NF4SOCK:
1564 vap->va_type = VSOCK;
1565 break;
1566 case NF4FIFO:
1567 vap->va_type = VFIFO;
1568 break;
1569 default:
1570 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1571 return (NULL);
1572 }
1573
1574 /*
1575 * Must specify the mode.
1576 */
1577 if (!(vap->va_mask & AT_MODE)) {
1578 *cs->statusp = resp->status = NFS4ERR_INVAL;
1579 return (NULL);
1580 }
1581
1582 excl = EXCL;
1583
1584 mode = 0;
1585
1586 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1587 if (error) {
1588 *cs->statusp = resp->status = puterrno4(error);
1589 return (NULL);
1590 }
1591 return (vp);
1592 }
1593
1594 /*
1595 * nfsv4 create is used to create non-regular files. For regular files,
1596 * use nfsv4 open.
1597 */
1598 /* ARGSUSED */
1599 static void
1600 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1601 struct compound_state *cs)
1602 {
1603 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1604 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1605 int error;
1606 struct vattr bva, iva, iva2, ava, *vap;
1607 cred_t *cr = cs->cr;
1608 vnode_t *dvp = cs->vp;
1609 vnode_t *vp = NULL;
1610 vnode_t *realvp;
1611 char *nm, *lnm;
1612 uint_t len, llen;
1613 int syncval = 0;
1614 struct nfs4_svgetit_arg sarg;
1615 struct nfs4_ntov_table ntov;
1616 struct statvfs64 sb;
1617 nfsstat4 status;
1618 struct sockaddr *ca;
1619 char *name = NULL;
1620 char *lname = NULL;
1621
1622 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1623 CREATE4args *, args);
1624
1625 resp->attrset = 0;
1626
1627 if (dvp == NULL) {
1628 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1629 goto out;
1630 }
1631
1632 /*
1633 * If there is an unshared filesystem mounted on this vnode,
1634 * do not allow to create an object in this directory.
1635 */
1636 if (vn_ismntpt(dvp)) {
1637 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1638 goto out;
1639 }
1640
1641 /* Verify that type is correct */
1642 switch (args->type) {
1643 case NF4LNK:
1644 case NF4BLK:
1645 case NF4CHR:
1646 case NF4SOCK:
1647 case NF4FIFO:
1648 case NF4DIR:
1649 break;
1650 default:
1651 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1652 goto out;
1653 };
1654
1655 if (cs->access == CS_ACCESS_DENIED) {
1656 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1657 goto out;
1658 }
1659 if (dvp->v_type != VDIR) {
1660 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1661 goto out;
1662 }
1663 status = utf8_dir_verify(&args->objname);
1664 if (status != NFS4_OK) {
1665 *cs->statusp = resp->status = status;
1666 goto out;
1667 }
1668
1669 if (rdonly4(req, cs)) {
1670 *cs->statusp = resp->status = NFS4ERR_ROFS;
1671 goto out;
1672 }
1673
1674 /*
1675 * Name of newly created object
1676 */
1677 nm = utf8_to_fn(&args->objname, &len, NULL);
1678 if (nm == NULL) {
1679 *cs->statusp = resp->status = NFS4ERR_INVAL;
1680 goto out;
1681 }
1682
1683 if (len > MAXNAMELEN) {
1684 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1685 kmem_free(nm, len);
1686 goto out;
1687 }
1688
1689 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1690 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1691 MAXPATHLEN + 1);
1692
1693 if (name == NULL) {
1694 *cs->statusp = resp->status = NFS4ERR_INVAL;
1695 kmem_free(nm, len);
1696 goto out;
1697 }
1698
1699 resp->attrset = 0;
1700
1701 sarg.sbp = &sb;
1702 sarg.is_referral = B_FALSE;
1703 nfs4_ntov_table_init(&ntov);
1704
1705 status = do_rfs4_set_attrs(&resp->attrset,
1706 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1707
1708 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1709 status = NFS4ERR_INVAL;
1710
1711 if (status != NFS4_OK) {
1712 *cs->statusp = resp->status = status;
1713 if (name != nm)
1714 kmem_free(name, MAXPATHLEN + 1);
1715 kmem_free(nm, len);
1716 nfs4_ntov_table_free(&ntov, &sarg);
1717 resp->attrset = 0;
1718 goto out;
1719 }
1720
1721 /* Get "before" change value */
1722 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1723 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1724 if (error) {
1725 *cs->statusp = resp->status = puterrno4(error);
1726 if (name != nm)
1727 kmem_free(name, MAXPATHLEN + 1);
1728 kmem_free(nm, len);
1729 nfs4_ntov_table_free(&ntov, &sarg);
1730 resp->attrset = 0;
1731 goto out;
1732 }
1733 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1734
1735 vap = sarg.vap;
1736
1737 /*
1738 * Set the default initial values for attributes when the parent
1739 * directory does not have the VSUID/VSGID bit set and they have
1740 * not been specified in createattrs.
1741 */
1742 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1743 vap->va_uid = crgetuid(cr);
1744 vap->va_mask |= AT_UID;
1745 }
1746 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1747 vap->va_gid = crgetgid(cr);
1748 vap->va_mask |= AT_GID;
1749 }
1750
1751 vap->va_mask |= AT_TYPE;
1752 switch (args->type) {
1753 case NF4DIR:
1754 vap->va_type = VDIR;
1755 if ((vap->va_mask & AT_MODE) == 0) {
1756 vap->va_mode = 0700; /* default: owner rwx only */
1757 vap->va_mask |= AT_MODE;
1758 }
1759 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1760 if (error)
1761 break;
1762
1763 /*
1764 * Get the initial "after" sequence number, if it fails,
1765 * set to zero
1766 */
1767 iva.va_mask = AT_SEQ;
1768 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1769 iva.va_seq = 0;
1770 break;
1771 case NF4LNK:
1772 vap->va_type = VLNK;
1773 if ((vap->va_mask & AT_MODE) == 0) {
1774 vap->va_mode = 0700; /* default: owner rwx only */
1775 vap->va_mask |= AT_MODE;
1776 }
1777
1778 /*
1779 * symlink names must be treated as data
1780 */
1781 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1782 &llen, NULL);
1783
1784 if (lnm == NULL) {
1785 *cs->statusp = resp->status = NFS4ERR_INVAL;
1786 if (name != nm)
1787 kmem_free(name, MAXPATHLEN + 1);
1788 kmem_free(nm, len);
1789 nfs4_ntov_table_free(&ntov, &sarg);
1790 resp->attrset = 0;
1791 goto out;
1792 }
1793
1794 if (llen > MAXPATHLEN) {
1795 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1796 if (name != nm)
1797 kmem_free(name, MAXPATHLEN + 1);
1798 kmem_free(nm, len);
1799 kmem_free(lnm, llen);
1800 nfs4_ntov_table_free(&ntov, &sarg);
1801 resp->attrset = 0;
1802 goto out;
1803 }
1804
1805 lname = nfscmd_convname(ca, cs->exi, lnm,
1806 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1807
1808 if (lname == NULL) {
1809 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1810 if (name != nm)
1811 kmem_free(name, MAXPATHLEN + 1);
1812 kmem_free(nm, len);
1813 kmem_free(lnm, llen);
1814 nfs4_ntov_table_free(&ntov, &sarg);
1815 resp->attrset = 0;
1816 goto out;
1817 }
1818
1819 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1820 if (lname != lnm)
1821 kmem_free(lname, MAXPATHLEN + 1);
1822 kmem_free(lnm, llen);
1823 if (error)
1824 break;
1825
1826 /*
1827 * Get the initial "after" sequence number, if it fails,
1828 * set to zero
1829 */
1830 iva.va_mask = AT_SEQ;
1831 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1832 iva.va_seq = 0;
1833
1834 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1835 NULL, NULL, NULL);
1836 if (error)
1837 break;
1838
1839 /*
1840 * va_seq is not safe over VOP calls, check it again
1841 * if it has changed zero out iva to force atomic = FALSE.
1842 */
1843 iva2.va_mask = AT_SEQ;
1844 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1845 iva2.va_seq != iva.va_seq)
1846 iva.va_seq = 0;
1847 break;
1848 default:
1849 /*
1850 * probably a special file.
1851 */
1852 if ((vap->va_mask & AT_MODE) == 0) {
1853 vap->va_mode = 0600; /* default: owner rw only */
1854 vap->va_mask |= AT_MODE;
1855 }
1856 syncval = FNODSYNC;
1857 /*
1858 * We know this will only generate one VOP call
1859 */
1860 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1861
1862 if (vp == NULL) {
1863 if (name != nm)
1864 kmem_free(name, MAXPATHLEN + 1);
1865 kmem_free(nm, len);
1866 nfs4_ntov_table_free(&ntov, &sarg);
1867 resp->attrset = 0;
1868 goto out;
1869 }
1870
1871 /*
1872 * Get the initial "after" sequence number, if it fails,
1873 * set to zero
1874 */
1875 iva.va_mask = AT_SEQ;
1876 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1877 iva.va_seq = 0;
1878
1879 break;
1880 }
1881 if (name != nm)
1882 kmem_free(name, MAXPATHLEN + 1);
1883 kmem_free(nm, len);
1884
1885 if (error) {
1886 *cs->statusp = resp->status = puterrno4(error);
1887 }
1888
1889 /*
1890 * Force modified data and metadata out to stable storage.
1891 */
1892 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1893
1894 if (resp->status != NFS4_OK) {
1895 if (vp != NULL)
1896 VN_RELE(vp);
1897 nfs4_ntov_table_free(&ntov, &sarg);
1898 resp->attrset = 0;
1899 goto out;
1900 }
1901
1902 /*
1903 * Finish setup of cinfo response, "before" value already set.
1904 * Get "after" change value, if it fails, simply return the
1905 * before value.
1906 */
1907 ava.va_mask = AT_CTIME|AT_SEQ;
1908 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1909 ava.va_ctime = bva.va_ctime;
1910 ava.va_seq = 0;
1911 }
1912 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1913
1914 /*
1915 * True verification that object was created with correct
1916 * attrs is impossible. The attrs could have been changed
1917 * immediately after object creation. If attributes did
1918 * not verify, the only recourse for the server is to
1919 * destroy the object. Maybe if some attrs (like gid)
1920 * are set incorrectly, the object should be destroyed;
1921 * however, seems bad as a default policy. Do we really
1922 * want to destroy an object over one of the times not
1923 * verifying correctly? For these reasons, the server
1924 * currently sets bits in attrset for createattrs
1925 * that were set; however, no verification is done.
1926 *
1927 * vmask_to_nmask accounts for vattr bits set on create
1928 * [do_rfs4_set_attrs() only sets resp bits for
1929 * non-vattr/vfs bits.]
1930 * Mask off any bits set by default so as not to return
1931 * more attrset bits than were requested in createattrs
1932 */
1933 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1934 resp->attrset &= args->createattrs.attrmask;
1935 nfs4_ntov_table_free(&ntov, &sarg);
1936
1937 error = makefh4(&cs->fh, vp, cs->exi);
1938 if (error) {
1939 *cs->statusp = resp->status = puterrno4(error);
1940 }
1941
1942 /*
1943 * The cinfo.atomic = TRUE only if we got no errors, we have
1944 * non-zero va_seq's, and it has incremented by exactly one
1945 * during the creation and it didn't change during the VOP_LOOKUP
1946 * or VOP_FSYNC.
1947 */
1948 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1949 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1950 resp->cinfo.atomic = TRUE;
1951 else
1952 resp->cinfo.atomic = FALSE;
1953
1954 /*
1955 * Force modified metadata out to stable storage.
1956 *
1957 * if a underlying vp exists, pass it to VOP_FSYNC
1958 */
1959 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1960 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1961 else
1962 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1963
1964 if (resp->status != NFS4_OK) {
1965 VN_RELE(vp);
1966 goto out;
1967 }
1968 if (cs->vp)
1969 VN_RELE(cs->vp);
1970
1971 cs->vp = vp;
1972 *cs->statusp = resp->status = NFS4_OK;
1973 out:
1974 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1975 CREATE4res *, resp);
1976 }
1977
1978 /*ARGSUSED*/
1979 static void
1980 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1981 struct compound_state *cs)
1982 {
1983 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1984 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1985
1986 rfs4_op_inval(argop, resop, req, cs);
1987
1988 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1989 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1990 }
1991
1992 /*ARGSUSED*/
1993 static void
1994 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1995 struct compound_state *cs)
1996 {
1997 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1998 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1999 rfs4_deleg_state_t *dsp;
2000 nfsstat4 status;
2001
2002 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2003 DELEGRETURN4args *, args);
2004
2005 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2006 resp->status = *cs->statusp = status;
2007 if (status != NFS4_OK)
2008 goto out;
2009
2010 /* Ensure specified filehandle matches */
2011 if (cs->vp != dsp->rds_finfo->rf_vp) {
2012 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2013 } else
2014 rfs4_return_deleg(dsp, FALSE);
2015
2016 rfs4_update_lease(dsp->rds_client);
2017
2018 rfs4_deleg_state_rele(dsp);
2019 out:
2020 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2021 DELEGRETURN4res *, resp);
2022 }
2023
2024 /*
2025 * Check to see if a given "flavor" is an explicitly shared flavor.
2026 * The assumption of this routine is the "flavor" is already a valid
2027 * flavor in the secinfo list of "exi".
2028 *
2029 * e.g.
2030 * # share -o sec=flavor1 /export
2031 * # share -o sec=flavor2 /export/home
2032 *
2033 * flavor2 is not an explicitly shared flavor for /export,
2034 * however it is in the secinfo list for /export thru the
2035 * server namespace setup.
2036 */
2037 int
2038 is_exported_sec(int flavor, struct exportinfo *exi)
2039 {
2040 int i;
2041 struct secinfo *sp;
2042
2043 sp = exi->exi_export.ex_secinfo;
2044 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2045 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2046 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2047 return (SEC_REF_EXPORTED(&sp[i]));
2048 }
2049 }
2050
2051 /* Should not reach this point based on the assumption */
2052 return (0);
2053 }
2054
2055 /*
2056 * Check if the security flavor used in the request matches what is
2057 * required at the export point or at the root pseudo node (exi_root).
2058 *
2059 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2060 *
2061 */
2062 static int
2063 secinfo_match_or_authnone(struct compound_state *cs)
2064 {
2065 int i;
2066 struct secinfo *sp;
2067
2068 /*
2069 * Check cs->nfsflavor (from the request) against
2070 * the current export data in cs->exi.
2071 */
2072 sp = cs->exi->exi_export.ex_secinfo;
2073 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2074 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2075 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2076 return (1);
2077 }
2078
2079 return (0);
2080 }
2081
2082 /*
2083 * Check the access authority for the client and return the correct error.
2084 */
2085 nfsstat4
2086 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2087 {
2088 int authres;
2089
2090 /*
2091 * First, check if the security flavor used in the request
2092 * are among the flavors set in the server namespace.
2093 */
2094 if (!secinfo_match_or_authnone(cs)) {
2095 *cs->statusp = NFS4ERR_WRONGSEC;
2096 return (*cs->statusp);
2097 }
2098
2099 authres = checkauth4(cs, req);
2100
2101 if (authres > 0) {
2102 *cs->statusp = NFS4_OK;
2103 if (! (cs->access & CS_ACCESS_LIMITED))
2104 cs->access = CS_ACCESS_OK;
2105 } else if (authres == 0) {
2106 *cs->statusp = NFS4ERR_ACCESS;
2107 } else if (authres == -2) {
2108 *cs->statusp = NFS4ERR_WRONGSEC;
2109 } else {
2110 *cs->statusp = NFS4ERR_DELAY;
2111 }
2112 return (*cs->statusp);
2113 }
2114
2115 /*
2116 * bitmap4_to_attrmask is called by getattr and readdir.
2117 * It sets up the vattr mask and determines whether vfsstat call is needed
2118 * based on the input bitmap.
2119 * Returns nfsv4 status.
2120 */
2121 static nfsstat4
2122 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2123 {
2124 int i;
2125 uint_t va_mask;
2126 struct statvfs64 *sbp = sargp->sbp;
2127
2128 sargp->sbp = NULL;
2129 sargp->flag = 0;
2130 sargp->rdattr_error = NFS4_OK;
2131 sargp->mntdfid_set = FALSE;
2132 if (sargp->cs->vp)
2133 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2134 FH4_ATTRDIR | FH4_NAMEDATTR);
2135 else
2136 sargp->xattr = 0;
2137
2138 /*
2139 * Set rdattr_error_req to true if return error per
2140 * failed entry rather than fail the readdir.
2141 */
2142 if (breq & FATTR4_RDATTR_ERROR_MASK)
2143 sargp->rdattr_error_req = 1;
2144 else
2145 sargp->rdattr_error_req = 0;
2146
2147 /*
2148 * generate the va_mask
2149 * Handle the easy cases first
2150 */
2151 switch (breq) {
2152 case NFS4_NTOV_ATTR_MASK:
2153 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2154 return (NFS4_OK);
2155
2156 case NFS4_FS_ATTR_MASK:
2157 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2158 sargp->sbp = sbp;
2159 return (NFS4_OK);
2160
2161 case NFS4_NTOV_ATTR_CACHE_MASK:
2162 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2163 return (NFS4_OK);
2164
2165 case FATTR4_LEASE_TIME_MASK:
2166 sargp->vap->va_mask = 0;
2167 return (NFS4_OK);
2168
2169 default:
2170 va_mask = 0;
2171 for (i = 0; i < nfs4_ntov_map_size; i++) {
2172 if ((breq & nfs4_ntov_map[i].fbit) &&
2173 nfs4_ntov_map[i].vbit)
2174 va_mask |= nfs4_ntov_map[i].vbit;
2175 }
2176
2177 /*
2178 * Check is vfsstat is needed
2179 */
2180 if (breq & NFS4_FS_ATTR_MASK)
2181 sargp->sbp = sbp;
2182
2183 sargp->vap->va_mask = va_mask;
2184 return (NFS4_OK);
2185 }
2186 /* NOTREACHED */
2187 }
2188
2189 /*
2190 * bitmap4_get_sysattrs is called by getattr and readdir.
2191 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2192 * Returns nfsv4 status.
2193 */
2194 static nfsstat4
2195 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2196 {
2197 int error;
2198 struct compound_state *cs = sargp->cs;
2199 vnode_t *vp = cs->vp;
2200
2201 if (sargp->sbp != NULL) {
2202 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2203 sargp->sbp = NULL; /* to identify error */
2204 return (puterrno4(error));
2205 }
2206 }
2207
2208 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2209 }
2210
2211 static void
2212 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2213 {
2214 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2215 KM_SLEEP);
2216 ntovp->attrcnt = 0;
2217 ntovp->vfsstat = FALSE;
2218 }
2219
2220 static void
2221 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2222 struct nfs4_svgetit_arg *sargp)
2223 {
2224 int i;
2225 union nfs4_attr_u *na;
2226 uint8_t *amap;
2227
2228 /*
2229 * XXX Should do the same checks for whether the bit is set
2230 */
2231 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2232 i < ntovp->attrcnt; i++, na++, amap++) {
2233 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2234 NFS4ATTR_FREEIT, sargp, na);
2235 }
2236 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2237 /*
2238 * xdr_free for getattr will be done later
2239 */
2240 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2241 i < ntovp->attrcnt; i++, na++, amap++) {
2242 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2243 }
2244 }
2245 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2246 }
2247
2248 /*
2249 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2250 */
2251 static nfsstat4
2252 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2253 struct nfs4_svgetit_arg *sargp)
2254 {
2255 int error = 0;
2256 int i, k;
2257 struct nfs4_ntov_table ntov;
2258 XDR xdr;
2259 ulong_t xdr_size;
2260 char *xdr_attrs;
2261 nfsstat4 status = NFS4_OK;
2262 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2263 union nfs4_attr_u *na;
2264 uint8_t *amap;
2265
2266 sargp->op = NFS4ATTR_GETIT;
2267 sargp->flag = 0;
2268
2269 fattrp->attrmask = 0;
2270 /* if no bits requested, then return empty fattr4 */
2271 if (breq == 0) {
2272 fattrp->attrlist4_len = 0;
2273 fattrp->attrlist4 = NULL;
2274 return (NFS4_OK);
2275 }
2276
2277 /*
2278 * return NFS4ERR_INVAL when client requests write-only attrs
2279 */
2280 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2281 return (NFS4ERR_INVAL);
2282
2283 nfs4_ntov_table_init(&ntov);
2284 na = ntov.na;
2285 amap = ntov.amap;
2286
2287 /*
2288 * Now loop to get or verify the attrs
2289 */
2290 for (i = 0; i < nfs4_ntov_map_size; i++) {
2291 if (breq & nfs4_ntov_map[i].fbit) {
2292 if ((*nfs4_ntov_map[i].sv_getit)(
2293 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2294
2295 error = (*nfs4_ntov_map[i].sv_getit)(
2296 NFS4ATTR_GETIT, sargp, na);
2297
2298 /*
2299 * Possible error values:
2300 * >0 if sv_getit failed to
2301 * get the attr; 0 if succeeded;
2302 * <0 if rdattr_error and the
2303 * attribute cannot be returned.
2304 */
2305 if (error && !(sargp->rdattr_error_req))
2306 goto done;
2307 /*
2308 * If error then just for entry
2309 */
2310 if (error == 0) {
2311 fattrp->attrmask |=
2312 nfs4_ntov_map[i].fbit;
2313 *amap++ =
2314 (uint8_t)nfs4_ntov_map[i].nval;
2315 na++;
2316 (ntov.attrcnt)++;
2317 } else if ((error > 0) &&
2318 (sargp->rdattr_error == NFS4_OK)) {
2319 sargp->rdattr_error = puterrno4(error);
2320 }
2321 error = 0;
2322 }
2323 }
2324 }
2325
2326 /*
2327 * If rdattr_error was set after the return value for it was assigned,
2328 * update it.
2329 */
2330 if (prev_rdattr_error != sargp->rdattr_error) {
2331 na = ntov.na;
2332 amap = ntov.amap;
2333 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2334 k = *amap;
2335 if (k < FATTR4_RDATTR_ERROR) {
2336 continue;
2337 }
2338 if ((k == FATTR4_RDATTR_ERROR) &&
2339 ((*nfs4_ntov_map[k].sv_getit)(
2340 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2341
2342 (void) (*nfs4_ntov_map[k].sv_getit)(
2343 NFS4ATTR_GETIT, sargp, na);
2344 }
2345 break;
2346 }
2347 }
2348
2349 xdr_size = 0;
2350 na = ntov.na;
2351 amap = ntov.amap;
2352 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2353 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2354 }
2355
2356 fattrp->attrlist4_len = xdr_size;
2357 if (xdr_size) {
2358 /* freed by rfs4_op_getattr_free() */
2359 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2360
2361 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2362
2363 na = ntov.na;
2364 amap = ntov.amap;
2365 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2366 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2367 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2368 int, *amap);
2369 status = NFS4ERR_SERVERFAULT;
2370 break;
2371 }
2372 }
2373 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2374 } else {
2375 fattrp->attrlist4 = NULL;
2376 }
2377 done:
2378
2379 nfs4_ntov_table_free(&ntov, sargp);
2380
2381 if (error != 0)
2382 status = puterrno4(error);
2383
2384 return (status);
2385 }
2386
2387 /* ARGSUSED */
2388 static void
2389 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2390 struct compound_state *cs)
2391 {
2392 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2393 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2394 struct nfs4_svgetit_arg sarg;
2395 struct statvfs64 sb;
2396 nfsstat4 status;
2397
2398 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2399 GETATTR4args *, args);
2400
2401 if (cs->vp == NULL) {
2402 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2403 goto out;
2404 }
2405
2406 if (cs->access == CS_ACCESS_DENIED) {
2407 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2408 goto out;
2409 }
2410
2411 sarg.sbp = &sb;
2412 sarg.cs = cs;
2413 sarg.is_referral = B_FALSE;
2414
2415 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2416 if (status == NFS4_OK) {
2417
2418 status = bitmap4_get_sysattrs(&sarg);
2419 if (status == NFS4_OK) {
2420
2421 /* Is this a referral? */
2422 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2423 /* Older V4 Solaris client sees a link */
2424 if (client_is_downrev(req))
2425 sarg.vap->va_type = VLNK;
2426 else
2427 sarg.is_referral = B_TRUE;
2428 }
2429
2430 status = do_rfs4_op_getattr(args->attr_request,
2431 &resp->obj_attributes, &sarg);
2432 }
2433 }
2434 *cs->statusp = resp->status = status;
2435 out:
2436 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2437 GETATTR4res *, resp);
2438 }
2439
2440 static void
2441 rfs4_op_getattr_free(nfs_resop4 *resop)
2442 {
2443 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2444
2445 nfs4_fattr4_free(&resp->obj_attributes);
2446 }
2447
2448 /* ARGSUSED */
2449 static void
2450 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2451 struct compound_state *cs)
2452 {
2453 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2454
2455 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2456
2457 if (cs->vp == NULL) {
2458 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2459 goto out;
2460 }
2461 if (cs->access == CS_ACCESS_DENIED) {
2462 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2463 goto out;
2464 }
2465
2466 /* check for reparse point at the share point */
2467 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2468 /* it's all bad */
2469 cs->exi->exi_moved = 1;
2470 *cs->statusp = resp->status = NFS4ERR_MOVED;
2471 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2472 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2473 return;
2474 }
2475
2476 /* check for reparse point at vp */
2477 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2478 /* it's not all bad */
2479 *cs->statusp = resp->status = NFS4ERR_MOVED;
2480 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2481 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2482 return;
2483 }
2484
2485 resp->object.nfs_fh4_val =
2486 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2487 nfs_fh4_copy(&cs->fh, &resp->object);
2488 *cs->statusp = resp->status = NFS4_OK;
2489 out:
2490 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2491 GETFH4res *, resp);
2492 }
2493
2494 static void
2495 rfs4_op_getfh_free(nfs_resop4 *resop)
2496 {
2497 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2498
2499 if (resp->status == NFS4_OK &&
2500 resp->object.nfs_fh4_val != NULL) {
2501 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2502 resp->object.nfs_fh4_val = NULL;
2503 resp->object.nfs_fh4_len = 0;
2504 }
2505 }
2506
2507 /*
2508 * illegal: args: void
2509 * res : status (NFS4ERR_OP_ILLEGAL)
2510 */
2511 /* ARGSUSED */
2512 static void
2513 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2514 struct svc_req *req, struct compound_state *cs)
2515 {
2516 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2517
2518 resop->resop = OP_ILLEGAL;
2519 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2520 }
2521
2522 /*
2523 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2524 * res: status. If success - CURRENT_FH unchanged, return change_info
2525 */
2526 /* ARGSUSED */
2527 static void
2528 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2529 struct compound_state *cs)
2530 {
2531 LINK4args *args = &argop->nfs_argop4_u.oplink;
2532 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2533 int error;
2534 vnode_t *vp;
2535 vnode_t *dvp;
2536 struct vattr bdva, idva, adva;
2537 char *nm;
2538 uint_t len;
2539 struct sockaddr *ca;
2540 char *name = NULL;
2541 nfsstat4 status;
2542
2543 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2544 LINK4args *, args);
2545
2546 /* SAVED_FH: source object */
2547 vp = cs->saved_vp;
2548 if (vp == NULL) {
2549 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2550 goto out;
2551 }
2552
2553 /* CURRENT_FH: target directory */
2554 dvp = cs->vp;
2555 if (dvp == NULL) {
2556 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2557 goto out;
2558 }
2559
2560 /*
2561 * If there is a non-shared filesystem mounted on this vnode,
2562 * do not allow to link any file in this directory.
2563 */
2564 if (vn_ismntpt(dvp)) {
2565 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2566 goto out;
2567 }
2568
2569 if (cs->access == CS_ACCESS_DENIED) {
2570 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2571 goto out;
2572 }
2573
2574 /* Check source object's type validity */
2575 if (vp->v_type == VDIR) {
2576 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2577 goto out;
2578 }
2579
2580 /* Check target directory's type */
2581 if (dvp->v_type != VDIR) {
2582 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2583 goto out;
2584 }
2585
2586 if (cs->saved_exi != cs->exi) {
2587 *cs->statusp = resp->status = NFS4ERR_XDEV;
2588 goto out;
2589 }
2590
2591 status = utf8_dir_verify(&args->newname);
2592 if (status != NFS4_OK) {
2593 *cs->statusp = resp->status = status;
2594 goto out;
2595 }
2596
2597 nm = utf8_to_fn(&args->newname, &len, NULL);
2598 if (nm == NULL) {
2599 *cs->statusp = resp->status = NFS4ERR_INVAL;
2600 goto out;
2601 }
2602
2603 if (len > MAXNAMELEN) {
2604 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2605 kmem_free(nm, len);
2606 goto out;
2607 }
2608
2609 if (rdonly4(req, cs)) {
2610 *cs->statusp = resp->status = NFS4ERR_ROFS;
2611 kmem_free(nm, len);
2612 goto out;
2613 }
2614
2615 /* Get "before" change value */
2616 bdva.va_mask = AT_CTIME|AT_SEQ;
2617 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2618 if (error) {
2619 *cs->statusp = resp->status = puterrno4(error);
2620 kmem_free(nm, len);
2621 goto out;
2622 }
2623
2624 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2625 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2626 MAXPATHLEN + 1);
2627
2628 if (name == NULL) {
2629 *cs->statusp = resp->status = NFS4ERR_INVAL;
2630 kmem_free(nm, len);
2631 goto out;
2632 }
2633
2634 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2635
2636 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2637
2638 if (nm != name)
2639 kmem_free(name, MAXPATHLEN + 1);
2640 kmem_free(nm, len);
2641
2642 /*
2643 * Get the initial "after" sequence number, if it fails, set to zero
2644 */
2645 idva.va_mask = AT_SEQ;
2646 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2647 idva.va_seq = 0;
2648
2649 /*
2650 * Force modified data and metadata out to stable storage.
2651 */
2652 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2653 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2654
2655 if (error) {
2656 *cs->statusp = resp->status = puterrno4(error);
2657 goto out;
2658 }
2659
2660 /*
2661 * Get "after" change value, if it fails, simply return the
2662 * before value.
2663 */
2664 adva.va_mask = AT_CTIME|AT_SEQ;
2665 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2666 adva.va_ctime = bdva.va_ctime;
2667 adva.va_seq = 0;
2668 }
2669
2670 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2671
2672 /*
2673 * The cinfo.atomic = TRUE only if we have
2674 * non-zero va_seq's, and it has incremented by exactly one
2675 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2676 */
2677 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2678 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2679 resp->cinfo.atomic = TRUE;
2680 else
2681 resp->cinfo.atomic = FALSE;
2682
2683 *cs->statusp = resp->status = NFS4_OK;
2684 out:
2685 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2686 LINK4res *, resp);
2687 }
2688
2689 /*
2690 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2691 */
2692
2693 /* ARGSUSED */
2694 static nfsstat4
2695 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2696 {
2697 int error;
2698 int different_export = 0;
2699 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2700 struct exportinfo *exi = NULL, *pre_exi = NULL;
2701 nfsstat4 stat;
2702 fid_t fid;
2703 int attrdir, dotdot, walk;
2704 bool_t is_newvp = FALSE;
2705
2706 if (cs->vp->v_flag & V_XATTRDIR) {
2707 attrdir = 1;
2708 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2709 } else {
2710 attrdir = 0;
2711 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2712 }
2713
2714 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2715
2716 /*
2717 * If dotdotting, then need to check whether it's
2718 * above the root of a filesystem, or above an
2719 * export point.
2720 */
2721 if (dotdot) {
2722 ASSERT(cs->exi != NULL);
2723 ASSERT3U(cs->exi->exi_zoneid, ==, curzone->zone_id);
2724 /*
2725 * If dotdotting at the root of a filesystem, then
2726 * need to traverse back to the mounted-on filesystem
2727 * and do the dotdot lookup there.
2728 */
2729 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2730
2731 /*
2732 * If at the system root, then can
2733 * go up no further.
2734 */
2735 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2736 return (puterrno4(ENOENT));
2737
2738 /*
2739 * Traverse back to the mounted-on filesystem
2740 */
2741 cs->vp = untraverse(cs->vp);
2742
2743 /*
2744 * Set the different_export flag so we remember
2745 * to pick up a new exportinfo entry for
2746 * this new filesystem.
2747 */
2748 different_export = 1;
2749 } else {
2750
2751 /*
2752 * If dotdotting above an export point then set
2753 * the different_export to get new export info.
2754 */
2755 different_export = nfs_exported(cs->exi, cs->vp);
2756 }
2757 }
2758
2759 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2760 NULL, NULL, NULL);
2761 if (error)
2762 return (puterrno4(error));
2763
2764 /*
2765 * If the vnode is in a pseudo filesystem, check whether it is visible.
2766 *
2767 * XXX if the vnode is a symlink and it is not visible in
2768 * a pseudo filesystem, return ENOENT (not following symlink).
2769 * V4 client can not mount such symlink. This is a regression
2770 * from V2/V3.
2771 *
2772 * In the same exported filesystem, if the security flavor used
2773 * is not an explicitly shared flavor, limit the view to the visible
2774 * list entries only. This is not a WRONGSEC case because it's already
2775 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2776 */
2777 if (!different_export &&
2778 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2779 cs->access & CS_ACCESS_LIMITED)) {
2780 if (! nfs_visible(cs->exi, vp, &different_export)) {
2781 VN_RELE(vp);
2782 return (puterrno4(ENOENT));
2783 }
2784 }
2785
2786 /*
2787 * If it's a mountpoint, then traverse it.
2788 */
2789 if (vn_ismntpt(vp)) {
2790 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2791 pre_tvp = vp; /* save pre-traversed vnode */
2792
2793 /*
2794 * hold pre_tvp to counteract rele by traverse. We will
2795 * need pre_tvp below if checkexport4 fails
2796 */
2797 VN_HOLD(pre_tvp);
2798 if ((error = traverse(&vp)) != 0) {
2799 VN_RELE(vp);
2800 VN_RELE(pre_tvp);
2801 return (puterrno4(error));
2802 }
2803 different_export = 1;
2804 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2805 /*
2806 * The vfsp comparison is to handle the case where
2807 * a LOFS mount is shared. lo_lookup traverses mount points,
2808 * and NFS is unaware of local fs transistions because
2809 * v_vfsmountedhere isn't set. For this special LOFS case,
2810 * the dir and the obj returned by lookup will have different
2811 * vfs ptrs.
2812 */
2813 different_export = 1;
2814 }
2815
2816 if (different_export) {
2817
2818 bzero(&fid, sizeof (fid));
2819 fid.fid_len = MAXFIDSZ;
2820 error = vop_fid_pseudo(vp, &fid);
2821 if (error) {
2822 VN_RELE(vp);
2823 if (pre_tvp)
2824 VN_RELE(pre_tvp);
2825 return (puterrno4(error));
2826 }
2827
2828 if (dotdot)
2829 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2830 else
2831 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2832
2833 if (exi == NULL) {
2834 if (pre_tvp) {
2835 /*
2836 * If this vnode is a mounted-on vnode,
2837 * but the mounted-on file system is not
2838 * exported, send back the filehandle for
2839 * the mounted-on vnode, not the root of
2840 * the mounted-on file system.
2841 */
2842 VN_RELE(vp);
2843 vp = pre_tvp;
2844 exi = pre_exi;
2845 } else {
2846 VN_RELE(vp);
2847 return (puterrno4(EACCES));
2848 }
2849 } else if (pre_tvp) {
2850 /* we're done with pre_tvp now. release extra hold */
2851 VN_RELE(pre_tvp);
2852 }
2853
2854 cs->exi = exi;
2855
2856 /*
2857 * Now we do a checkauth4. The reason is that
2858 * this client/user may not have access to the new
2859 * exported file system, and if they do,
2860 * the client/user may be mapped to a different uid.
2861 *
2862 * We start with a new cr, because the checkauth4 done
2863 * in the PUT*FH operation over wrote the cred's uid,
2864 * gid, etc, and we want the real thing before calling
2865 * checkauth4()
2866 */
2867 crfree(cs->cr);
2868 cs->cr = crdup(cs->basecr);
2869
2870 oldvp = cs->vp;
2871 cs->vp = vp;
2872 is_newvp = TRUE;
2873
2874 stat = call_checkauth4(cs, req);
2875 if (stat != NFS4_OK) {
2876 VN_RELE(cs->vp);
2877 cs->vp = oldvp;
2878 return (stat);
2879 }
2880 }
2881
2882 /*
2883 * After various NFS checks, do a label check on the path
2884 * component. The label on this path should either be the
2885 * global zone's label or a zone's label. We are only
2886 * interested in the zone's label because exported files
2887 * in global zone is accessible (though read-only) to
2888 * clients. The exportability/visibility check is already
2889 * done before reaching this code.
2890 */
2891 if (is_system_labeled()) {
2892 bslabel_t *clabel;
2893
2894 ASSERT(req->rq_label != NULL);
2895 clabel = req->rq_label;
2896 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2897 "got client label from request(1)", struct svc_req *, req);
2898
2899 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2900 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2901 cs->exi)) {
2902 error = EACCES;
2903 goto err_out;
2904 }
2905 } else {
2906 /*
2907 * We grant access to admin_low label clients
2908 * only if the client is trusted, i.e. also
2909 * running Solaris Trusted Extension.
2910 */
2911 struct sockaddr *ca;
2912 int addr_type;
2913 void *ipaddr;
2914 tsol_tpc_t *tp;
2915
2916 ca = (struct sockaddr *)svc_getrpccaller(
2917 req->rq_xprt)->buf;
2918 if (ca->sa_family == AF_INET) {
2919 addr_type = IPV4_VERSION;
2920 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2921 } else if (ca->sa_family == AF_INET6) {
2922 addr_type = IPV6_VERSION;
2923 ipaddr = &((struct sockaddr_in6 *)
2924 ca)->sin6_addr;
2925 }
2926 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2927 if (tp == NULL || tp->tpc_tp.tp_doi !=
2928 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2929 SUN_CIPSO) {
2930 if (tp != NULL)
2931 TPC_RELE(tp);
2932 error = EACCES;
2933 goto err_out;
2934 }
2935 TPC_RELE(tp);
2936 }
2937 }
2938
2939 error = makefh4(&cs->fh, vp, cs->exi);
2940
2941 err_out:
2942 if (error) {
2943 if (is_newvp) {
2944 VN_RELE(cs->vp);
2945 cs->vp = oldvp;
2946 } else
2947 VN_RELE(vp);
2948 return (puterrno4(error));
2949 }
2950
2951 if (!is_newvp) {
2952 if (cs->vp)
2953 VN_RELE(cs->vp);
2954 cs->vp = vp;
2955 } else if (oldvp)
2956 VN_RELE(oldvp);
2957
2958 /*
2959 * if did lookup on attrdir and didn't lookup .., set named
2960 * attr fh flag
2961 */
2962 if (attrdir && ! dotdot)
2963 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2964
2965 /* Assume false for now, open proc will set this */
2966 cs->mandlock = FALSE;
2967
2968 return (NFS4_OK);
2969 }
2970
2971 /* ARGSUSED */
2972 static void
2973 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2974 struct compound_state *cs)
2975 {
2976 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2977 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2978 char *nm;
2979 uint_t len;
2980 struct sockaddr *ca;
2981 char *name = NULL;
2982 nfsstat4 status;
2983
2984 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2985 LOOKUP4args *, args);
2986
2987 if (cs->vp == NULL) {
2988 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2989 goto out;
2990 }
2991
2992 if (cs->vp->v_type == VLNK) {
2993 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2994 goto out;
2995 }
2996
2997 if (cs->vp->v_type != VDIR) {
2998 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2999 goto out;
3000 }
3001
3002 status = utf8_dir_verify(&args->objname);
3003 if (status != NFS4_OK) {
3004 *cs->statusp = resp->status = status;
3005 goto out;
3006 }
3007
3008 nm = utf8_to_str(&args->objname, &len, NULL);
3009 if (nm == NULL) {
3010 *cs->statusp = resp->status = NFS4ERR_INVAL;
3011 goto out;
3012 }
3013
3014 if (len > MAXNAMELEN) {
3015 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3016 kmem_free(nm, len);
3017 goto out;
3018 }
3019
3020 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3021 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3022 MAXPATHLEN + 1);
3023
3024 if (name == NULL) {
3025 *cs->statusp = resp->status = NFS4ERR_INVAL;
3026 kmem_free(nm, len);
3027 goto out;
3028 }
3029
3030 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3031
3032 if (name != nm)
3033 kmem_free(name, MAXPATHLEN + 1);
3034 kmem_free(nm, len);
3035
3036 out:
3037 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3038 LOOKUP4res *, resp);
3039 }
3040
3041 /* ARGSUSED */
3042 static void
3043 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3044 struct compound_state *cs)
3045 {
3046 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3047
3048 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3049
3050 if (cs->vp == NULL) {
3051 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3052 goto out;
3053 }
3054
3055 if (cs->vp->v_type != VDIR) {
3056 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3057 goto out;
3058 }
3059
3060 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3061
3062 /*
3063 * From NFSV4 Specification, LOOKUPP should not check for
3064 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3065 */
3066 if (resp->status == NFS4ERR_WRONGSEC) {
3067 *cs->statusp = resp->status = NFS4_OK;
3068 }
3069
3070 out:
3071 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3072 LOOKUPP4res *, resp);
3073 }
3074
3075
3076 /*ARGSUSED2*/
3077 static void
3078 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3079 struct compound_state *cs)
3080 {
3081 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3082 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3083 vnode_t *avp = NULL;
3084 int lookup_flags = LOOKUP_XATTR, error;
3085 int exp_ro = 0;
3086
3087 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3088 OPENATTR4args *, args);
3089
3090 if (cs->vp == NULL) {
3091 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3092 goto out;
3093 }
3094
3095 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3096 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3097 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3098 goto out;
3099 }
3100
3101 /*
3102 * If file system supports passing ACE mask to VOP_ACCESS then
3103 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3104 */
3105
3106 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3107 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3108 V_ACE_MASK, cs->cr, NULL);
3109 else
3110 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3111 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3112 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3113
3114 if (error) {
3115 *cs->statusp = resp->status = puterrno4(EACCES);
3116 goto out;
3117 }
3118
3119 /*
3120 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3121 * the file system is exported read-only -- regardless of
3122 * createdir flag. Otherwise the attrdir would be created
3123 * (assuming server fs isn't mounted readonly locally). If
3124 * VOP_LOOKUP returns ENOENT in this case, the error will
3125 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3126 * because specfs has no VOP_LOOKUP op, so the macro would
3127 * return ENOSYS. EINVAL is returned by all (current)
3128 * Solaris file system implementations when any of their
3129 * restrictions are violated (xattr(dir) can't have xattrdir).
3130 * Returning NOTSUPP is more appropriate in this case
3131 * because the object will never be able to have an attrdir.
3132 */
3133 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3134 lookup_flags |= CREATE_XATTR_DIR;
3135
3136 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3137 NULL, NULL, NULL);
3138
3139 if (error) {
3140 if (error == ENOENT && args->createdir && exp_ro)
3141 *cs->statusp = resp->status = puterrno4(EROFS);
3142 else if (error == EINVAL || error == ENOSYS)
3143 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3144 else
3145 *cs->statusp = resp->status = puterrno4(error);
3146 goto out;
3147 }
3148
3149 ASSERT(avp->v_flag & V_XATTRDIR);
3150
3151 error = makefh4(&cs->fh, avp, cs->exi);
3152
3153 if (error) {
3154 VN_RELE(avp);
3155 *cs->statusp = resp->status = puterrno4(error);
3156 goto out;
3157 }
3158
3159 VN_RELE(cs->vp);
3160 cs->vp = avp;
3161
3162 /*
3163 * There is no requirement for an attrdir fh flag
3164 * because the attrdir has a vnode flag to distinguish
3165 * it from regular (non-xattr) directories. The
3166 * FH4_ATTRDIR flag is set for future sanity checks.
3167 */
3168 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3169 *cs->statusp = resp->status = NFS4_OK;
3170
3171 out:
3172 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3173 OPENATTR4res *, resp);
3174 }
3175
3176 static int
3177 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3178 caller_context_t *ct)
3179 {
3180 int error;
3181 int i;
3182 clock_t delaytime;
3183
3184 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3185
3186 /*
3187 * Don't block on mandatory locks. If this routine returns
3188 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3189 */
3190 uio->uio_fmode = FNONBLOCK;
3191
3192 for (i = 0; i < rfs4_maxlock_tries; i++) {
3193
3194
3195 if (direction == FREAD) {
3196 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3197 error = VOP_READ(vp, uio, ioflag, cred, ct);
3198 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3199 } else {
3200 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3201 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3202 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3203 }
3204
3205 if (error != EAGAIN)
3206 break;
3207
3208 if (i < rfs4_maxlock_tries - 1) {
3209 delay(delaytime);
3210 delaytime *= 2;
3211 }
3212 }
3213
3214 return (error);
3215 }
3216
3217 /* ARGSUSED */
3218 static void
3219 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3220 struct compound_state *cs)
3221 {
3222 READ4args *args = &argop->nfs_argop4_u.opread;
3223 READ4res *resp = &resop->nfs_resop4_u.opread;
3224 int error;
3225 int verror;
3226 vnode_t *vp;
3227 struct vattr va;
3228 struct iovec iov, *iovp = NULL;
3229 int iovcnt;
3230 struct uio uio;
3231 u_offset_t offset;
3232 bool_t *deleg = &cs->deleg;
3233 nfsstat4 stat;
3234 int in_crit = 0;
3235 mblk_t *mp = NULL;
3236 int alloc_err = 0;
3237 int rdma_used = 0;
3238 int loaned_buffers;
3239 caller_context_t ct;
3240 struct uio *uiop;
3241
3242 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3243 READ4args, args);
3244
3245 vp = cs->vp;
3246 if (vp == NULL) {
3247 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3248 goto out;
3249 }
3250 if (cs->access == CS_ACCESS_DENIED) {
3251 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3252 goto out;
3253 }
3254
3255 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3256 deleg, TRUE, &ct)) != NFS4_OK) {
3257 *cs->statusp = resp->status = stat;
3258 goto out;
3259 }
3260
3261 /*
3262 * Enter the critical region before calling VOP_RWLOCK
3263 * to avoid a deadlock with write requests.
3264 */
3265 if (nbl_need_check(vp)) {
3266 nbl_start_crit(vp, RW_READER);
3267 in_crit = 1;
3268 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3269 &ct)) {
3270 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3271 goto out;
3272 }
3273 }
3274
3275 if (args->wlist) {
3276 if (args->count > clist_len(args->wlist)) {
3277 *cs->statusp = resp->status = NFS4ERR_INVAL;
3278 goto out;
3279 }
3280 rdma_used = 1;
3281 }
3282
3283 /* use loaned buffers for TCP */
3284 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3285
3286 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3287 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3288
3289 /*
3290 * If we can't get the attributes, then we can't do the
3291 * right access checking. So, we'll fail the request.
3292 */
3293 if (verror) {
3294 *cs->statusp = resp->status = puterrno4(verror);
3295 goto out;
3296 }
3297
3298 if (vp->v_type != VREG) {
3299 *cs->statusp = resp->status =
3300 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3301 goto out;
3302 }
3303
3304 if (crgetuid(cs->cr) != va.va_uid &&
3305 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3306 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3307 *cs->statusp = resp->status = puterrno4(error);
3308 goto out;
3309 }
3310
3311 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3312 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3313 goto out;
3314 }
3315
3316 offset = args->offset;
3317 if (offset >= va.va_size) {
3318 *cs->statusp = resp->status = NFS4_OK;
3319 resp->eof = TRUE;
3320 resp->data_len = 0;
3321 resp->data_val = NULL;
3322 resp->mblk = NULL;
3323 /* RDMA */
3324 resp->wlist = args->wlist;
3325 resp->wlist_len = resp->data_len;
3326 *cs->statusp = resp->status = NFS4_OK;
3327 if (resp->wlist)
3328 clist_zero_len(resp->wlist);
3329 goto out;
3330 }
3331
3332 if (args->count == 0) {
3333 *cs->statusp = resp->status = NFS4_OK;
3334 resp->eof = FALSE;
3335 resp->data_len = 0;
3336 resp->data_val = NULL;
3337 resp->mblk = NULL;
3338 /* RDMA */
3339 resp->wlist = args->wlist;
3340 resp->wlist_len = resp->data_len;
3341 if (resp->wlist)
3342 clist_zero_len(resp->wlist);
3343 goto out;
3344 }
3345
3346 /*
3347 * Do not allocate memory more than maximum allowed
3348 * transfer size
3349 */
3350 if (args->count > rfs4_tsize(req))
3351 args->count = rfs4_tsize(req);
3352
3353 if (loaned_buffers) {
3354 uiop = (uio_t *)rfs_setup_xuio(vp);
3355 ASSERT(uiop != NULL);
3356 uiop->uio_segflg = UIO_SYSSPACE;
3357 uiop->uio_loffset = args->offset;
3358 uiop->uio_resid = args->count;
3359
3360 /* Jump to do the read if successful */
3361 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3362 /*
3363 * Need to hold the vnode until after VOP_RETZCBUF()
3364 * is called.
3365 */
3366 VN_HOLD(vp);
3367 goto doio_read;
3368 }
3369
3370 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3371 uiop->uio_loffset, int, uiop->uio_resid);
3372
3373 uiop->uio_extflg = 0;
3374
3375 /* failure to setup for zero copy */
3376 rfs_free_xuio((void *)uiop);
3377 loaned_buffers = 0;
3378 }
3379
3380 /*
3381 * If returning data via RDMA Write, then grab the chunk list. If we
3382 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3383 */
3384 if (rdma_used) {
3385 mp = NULL;
3386 (void) rdma_get_wchunk(req, &iov, args->wlist);
3387 uio.uio_iov = &iov;
3388 uio.uio_iovcnt = 1;
3389 } else {
3390 /*
3391 * mp will contain the data to be sent out in the read reply.
3392 * It will be freed after the reply has been sent.
3393 */
3394 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3395 ASSERT(mp != NULL);
3396 ASSERT(alloc_err == 0);
3397 uio.uio_iov = iovp;
3398 uio.uio_iovcnt = iovcnt;
3399 }
3400
3401 uio.uio_segflg = UIO_SYSSPACE;
3402 uio.uio_extflg = UIO_COPY_CACHED;
3403 uio.uio_loffset = args->offset;
3404 uio.uio_resid = args->count;
3405 uiop = &uio;
3406
3407 doio_read:
3408 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3409
3410 va.va_mask = AT_SIZE;
3411 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3412
3413 if (error) {
3414 if (mp)
3415 freemsg(mp);
3416 *cs->statusp = resp->status = puterrno4(error);
3417 goto out;
3418 }
3419
3420 /* make mblk using zc buffers */
3421 if (loaned_buffers) {
3422 mp = uio_to_mblk(uiop);
3423 ASSERT(mp != NULL);
3424 }
3425
3426 *cs->statusp = resp->status = NFS4_OK;
3427
3428 ASSERT(uiop->uio_resid >= 0);
3429 resp->data_len = args->count - uiop->uio_resid;
3430 if (mp) {
3431 resp->data_val = (char *)mp->b_datap->db_base;
3432 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3433 } else {
3434 resp->data_val = (caddr_t)iov.iov_base;
3435 }
3436
3437 resp->mblk = mp;
3438
3439 if (!verror && offset + resp->data_len == va.va_size)
3440 resp->eof = TRUE;
3441 else
3442 resp->eof = FALSE;
3443
3444 if (rdma_used) {
3445 if (!rdma_setup_read_data4(args, resp)) {
3446 *cs->statusp = resp->status = NFS4ERR_INVAL;
3447 }
3448 } else {
3449 resp->wlist = NULL;
3450 }
3451
3452 out:
3453 if (in_crit)
3454 nbl_end_crit(vp);
3455
3456 if (iovp != NULL)
3457 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3458
3459 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3460 READ4res *, resp);
3461 }
3462
3463 static void
3464 rfs4_op_read_free(nfs_resop4 *resop)
3465 {
3466 READ4res *resp = &resop->nfs_resop4_u.opread;
3467
3468 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3469 freemsg(resp->mblk);
3470 resp->mblk = NULL;
3471 resp->data_val = NULL;
3472 resp->data_len = 0;
3473 }
3474 }
3475
3476 static void
3477 rfs4_op_readdir_free(nfs_resop4 * resop)
3478 {
3479 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3480
3481 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3482 freeb(resp->mblk);
3483 resp->mblk = NULL;
3484 resp->data_len = 0;
3485 }
3486 }
3487
3488
3489 /* ARGSUSED */
3490 static void
3491 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3492 struct compound_state *cs)
3493 {
3494 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3495 int error;
3496 vnode_t *vp;
3497 struct exportinfo *exi, *sav_exi;
3498 nfs_fh4_fmt_t *fh_fmtp;
3499 nfs_export_t *ne = nfs_get_export();
3500
3501 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3502
3503 if (cs->vp) {
3504 VN_RELE(cs->vp);
3505 cs->vp = NULL;
3506 }
3507
3508 if (cs->cr)
3509 crfree(cs->cr);
3510
3511 cs->cr = crdup(cs->basecr);
3512
3513 vp = ne->exi_public->exi_vp;
3514 if (vp == NULL) {
3515 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3516 goto out;
3517 }
3518
3519 error = makefh4(&cs->fh, vp, ne->exi_public);
3520 if (error != 0) {
3521 *cs->statusp = resp->status = puterrno4(error);
3522 goto out;
3523 }
3524 sav_exi = cs->exi;
3525 if (ne->exi_public == ne->exi_root) {
3526 /*
3527 * No filesystem is actually shared public, so we default
3528 * to exi_root. In this case, we must check whether root
3529 * is exported.
3530 */
3531 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3532
3533 /*
3534 * if root filesystem is exported, the exportinfo struct that we
3535 * should use is what checkexport4 returns, because root_exi is
3536 * actually a mostly empty struct.
3537 */
3538 exi = checkexport4(&fh_fmtp->fh4_fsid,
3539 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3540 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3541 } else {
3542 /*
3543 * it's a properly shared filesystem
3544 */
3545 cs->exi = ne->exi_public;
3546 }
3547
3548 if (is_system_labeled()) {
3549 bslabel_t *clabel;
3550
3551 ASSERT(req->rq_label != NULL);
3552 clabel = req->rq_label;
3553 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3554 "got client label from request(1)",
3555 struct svc_req *, req);
3556 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3557 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3558 cs->exi)) {
3559 *cs->statusp = resp->status =
3560 NFS4ERR_SERVERFAULT;
3561 goto out;
3562 }
3563 }
3564 }
3565
3566 VN_HOLD(vp);
3567 cs->vp = vp;
3568
3569 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3570 VN_RELE(cs->vp);
3571 cs->vp = NULL;
3572 cs->exi = sav_exi;
3573 goto out;
3574 }
3575
3576 *cs->statusp = resp->status = NFS4_OK;
3577 out:
3578 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3579 PUTPUBFH4res *, resp);
3580 }
3581
3582 /*
3583 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3584 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3585 * or joe have restrictive search permissions, then we shouldn't let
3586 * the client get a file handle. This is easy to enforce. However, we
3587 * don't know what security flavor should be used until we resolve the
3588 * path name. Another complication is uid mapping. If root is
3589 * the user, then it will be mapped to the anonymous user by default,
3590 * but we won't know that till we've resolved the path name. And we won't
3591 * know what the anonymous user is.
3592 * Luckily, SECINFO is specified to take a full filename.
3593 * So what we will have to in rfs4_op_lookup is check that flavor of
3594 * the target object matches that of the request, and if root was the
3595 * caller, check for the root= and anon= options, and if necessary,
3596 * repeat the lookup using the right cred_t. But that's not done yet.
3597 */
3598 /* ARGSUSED */
3599 static void
3600 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3601 struct compound_state *cs)
3602 {
3603 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3604 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3605 nfs_fh4_fmt_t *fh_fmtp;
3606
3607 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3608 PUTFH4args *, args);
3609
3610 if (cs->vp) {
3611 VN_RELE(cs->vp);
3612 cs->vp = NULL;
3613 }
3614
3615 if (cs->cr) {
3616 crfree(cs->cr);
3617 cs->cr = NULL;
3618 }
3619
3620
3621 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3622 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3623 goto out;
3624 }
3625
3626 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3627 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3628 NULL);
3629
3630 if (cs->exi == NULL) {
3631 *cs->statusp = resp->status = NFS4ERR_STALE;
3632 goto out;
3633 }
3634
3635 cs->cr = crdup(cs->basecr);
3636
3637 ASSERT(cs->cr != NULL);
3638
3639 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3640 *cs->statusp = resp->status;
3641 goto out;
3642 }
3643
3644 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3645 VN_RELE(cs->vp);
3646 cs->vp = NULL;
3647 goto out;
3648 }
3649
3650 nfs_fh4_copy(&args->object, &cs->fh);
3651 *cs->statusp = resp->status = NFS4_OK;
3652 cs->deleg = FALSE;
3653
3654 out:
3655 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3656 PUTFH4res *, resp);
3657 }
3658
3659 /* ARGSUSED */
3660 static void
3661 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3662 struct compound_state *cs)
3663 {
3664 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3665 int error;
3666 fid_t fid;
3667 struct exportinfo *exi, *sav_exi;
3668
3669 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3670
3671 if (cs->vp) {
3672 VN_RELE(cs->vp);
3673 cs->vp = NULL;
3674 }
3675
3676 if (cs->cr)
3677 crfree(cs->cr);
3678
3679 cs->cr = crdup(cs->basecr);
3680
3681 /*
3682 * Using rootdir, the system root vnode,
3683 * get its fid.
3684 */
3685 bzero(&fid, sizeof (fid));
3686 fid.fid_len = MAXFIDSZ;
3687 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3688 if (error != 0) {
3689 *cs->statusp = resp->status = puterrno4(error);
3690 goto out;
3691 }
3692
3693 /*
3694 * Then use the root fsid & fid it to find out if it's exported
3695 *
3696 * If the server root isn't exported directly, then
3697 * it should at least be a pseudo export based on
3698 * one or more exports further down in the server's
3699 * file tree.
3700 */
3701 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3702 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3703 NFS4_DEBUG(rfs4_debug,
3704 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3705 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3706 goto out;
3707 }
3708
3709 /*
3710 * Now make a filehandle based on the root
3711 * export and root vnode.
3712 */
3713 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3714 if (error != 0) {
3715 *cs->statusp = resp->status = puterrno4(error);
3716 goto out;
3717 }
3718
3719 sav_exi = cs->exi;
3720 cs->exi = exi;
3721
3722 VN_HOLD(ZONE_ROOTVP());
3723 cs->vp = ZONE_ROOTVP();
3724
3725 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3726 VN_RELE(cs->vp);
3727 cs->vp = NULL;
3728 cs->exi = sav_exi;
3729 goto out;
3730 }
3731
3732 *cs->statusp = resp->status = NFS4_OK;
3733 cs->deleg = FALSE;
3734 out:
3735 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3736 PUTROOTFH4res *, resp);
3737 }
3738
3739 /*
3740 * readlink: args: CURRENT_FH.
3741 * res: status. If success - CURRENT_FH unchanged, return linktext.
3742 */
3743
3744 /* ARGSUSED */
3745 static void
3746 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3747 struct compound_state *cs)
3748 {
3749 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3750 int error;
3751 vnode_t *vp;
3752 struct iovec iov;
3753 struct vattr va;
3754 struct uio uio;
3755 char *data;
3756 struct sockaddr *ca;
3757 char *name = NULL;
3758 int is_referral;
3759
3760 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3761
3762 /* CURRENT_FH: directory */
3763 vp = cs->vp;
3764 if (vp == NULL) {
3765 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3766 goto out;
3767 }
3768
3769 if (cs->access == CS_ACCESS_DENIED) {
3770 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3771 goto out;
3772 }
3773
3774 /* Is it a referral? */
3775 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3776
3777 is_referral = 1;
3778
3779 } else {
3780
3781 is_referral = 0;
3782
3783 if (vp->v_type == VDIR) {
3784 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3785 goto out;
3786 }
3787
3788 if (vp->v_type != VLNK) {
3789 *cs->statusp = resp->status = NFS4ERR_INVAL;
3790 goto out;
3791 }
3792
3793 }
3794
3795 va.va_mask = AT_MODE;
3796 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3797 if (error) {
3798 *cs->statusp = resp->status = puterrno4(error);
3799 goto out;
3800 }
3801
3802 if (MANDLOCK(vp, va.va_mode)) {
3803 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3804 goto out;
3805 }
3806
3807 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3808
3809 if (is_referral) {
3810 char *s;
3811 size_t strsz;
3812
3813 /* Get an artificial symlink based on a referral */
3814 s = build_symlink(vp, cs->cr, &strsz);
3815 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3816 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3817 vnode_t *, vp, char *, s);
3818 if (s == NULL)
3819 error = EINVAL;
3820 else {
3821 error = 0;
3822 (void) strlcpy(data, s, MAXPATHLEN + 1);
3823 kmem_free(s, strsz);
3824 }
3825
3826 } else {
3827
3828 iov.iov_base = data;
3829 iov.iov_len = MAXPATHLEN;
3830 uio.uio_iov = &iov;
3831 uio.uio_iovcnt = 1;
3832 uio.uio_segflg = UIO_SYSSPACE;
3833 uio.uio_extflg = UIO_COPY_CACHED;
3834 uio.uio_loffset = 0;
3835 uio.uio_resid = MAXPATHLEN;
3836
3837 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3838
3839 if (!error)
3840 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3841 }
3842
3843 if (error) {
3844 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3845 *cs->statusp = resp->status = puterrno4(error);
3846 goto out;
3847 }
3848
3849 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3850 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3851 MAXPATHLEN + 1);
3852
3853 if (name == NULL) {
3854 /*
3855 * Even though the conversion failed, we return
3856 * something. We just don't translate it.
3857 */
3858 name = data;
3859 }
3860
3861 /*
3862 * treat link name as data
3863 */
3864 (void) str_to_utf8(name, (utf8string *)&resp->link);
3865
3866 if (name != data)
3867 kmem_free(name, MAXPATHLEN + 1);
3868 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3869 *cs->statusp = resp->status = NFS4_OK;
3870
3871 out:
3872 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3873 READLINK4res *, resp);
3874 }
3875
3876 static void
3877 rfs4_op_readlink_free(nfs_resop4 *resop)
3878 {
3879 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3880 utf8string *symlink = (utf8string *)&resp->link;
3881
3882 if (symlink->utf8string_val) {
3883 UTF8STRING_FREE(*symlink)
3884 }
3885 }
3886
3887 /*
3888 * release_lockowner:
3889 * Release any state associated with the supplied
3890 * lockowner. Note if any lo_state is holding locks we will not
3891 * rele that lo_state and thus the lockowner will not be destroyed.
3892 * A client using lock after the lock owner stateid has been released
3893 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3894 * to reissue the lock with new_lock_owner set to TRUE.
3895 * args: lock_owner
3896 * res: status
3897 */
3898 /* ARGSUSED */
3899 static void
3900 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3901 struct svc_req *req, struct compound_state *cs)
3902 {
3903 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3904 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3905 rfs4_lockowner_t *lo;
3906 rfs4_openowner_t *oo;
3907 rfs4_state_t *sp;
3908 rfs4_lo_state_t *lsp;
3909 rfs4_client_t *cp;
3910 bool_t create = FALSE;
3911 locklist_t *llist;
3912 sysid_t sysid;
3913
3914 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3915 cs, RELEASE_LOCKOWNER4args *, ap);
3916
3917 /* Make sure there is a clientid around for this request */
3918 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3919
3920 if (cp == NULL) {
3921 *cs->statusp = resp->status =
3922 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3923 goto out;
3924 }
3925 rfs4_client_rele(cp);
3926
3927 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3928 if (lo == NULL) {
3929 *cs->statusp = resp->status = NFS4_OK;
3930 goto out;
3931 }
3932 ASSERT(lo->rl_client != NULL);
3933
3934 /*
3935 * Check for EXPIRED client. If so will reap state with in a lease
3936 * period or on next set_clientid_confirm step
3937 */
3938 if (rfs4_lease_expired(lo->rl_client)) {
3939 rfs4_lockowner_rele(lo);
3940 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3941 goto out;
3942 }
3943
3944 /*
3945 * If no sysid has been assigned, then no locks exist; just return.
3946 */
3947 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3948 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3949 rfs4_lockowner_rele(lo);
3950 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951 goto out;
3952 }
3953
3954 sysid = lo->rl_client->rc_sysidt;
3955 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3956
3957 /*
3958 * Mark the lockowner invalid.
3959 */
3960 rfs4_dbe_hide(lo->rl_dbe);
3961
3962 /*
3963 * sysid-pid pair should now not be used since the lockowner is
3964 * invalid. If the client were to instantiate the lockowner again
3965 * it would be assigned a new pid. Thus we can get the list of
3966 * current locks.
3967 */
3968
3969 llist = flk_get_active_locks(sysid, lo->rl_pid);
3970 /* If we are still holding locks fail */
3971 if (llist != NULL) {
3972
3973 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3974
3975 flk_free_locklist(llist);
3976 /*
3977 * We need to unhide the lockowner so the client can
3978 * try it again. The bad thing here is if the client
3979 * has a logic error that took it here in the first place
3980 * they probably have lost accounting of the locks that it
3981 * is holding. So we may have dangling state until the
3982 * open owner state is reaped via close. One scenario
3983 * that could possibly occur is that the client has
3984 * sent the unlock request(s) in separate threads
3985 * and has not waited for the replies before sending the
3986 * RELEASE_LOCKOWNER request. Presumably, it would expect
3987 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3988 * reissuing the request.
3989 */
3990 rfs4_dbe_unhide(lo->rl_dbe);
3991 rfs4_lockowner_rele(lo);
3992 goto out;
3993 }
3994
3995 /*
3996 * For the corresponding client we need to check each open
3997 * owner for any opens that have lockowner state associated
3998 * with this lockowner.
3999 */
4000
4001 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4002 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4003 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4004
4005 rfs4_dbe_lock(oo->ro_dbe);
4006 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4007 sp = list_next(&oo->ro_statelist, sp)) {
4008
4009 rfs4_dbe_lock(sp->rs_dbe);
4010 for (lsp = list_head(&sp->rs_lostatelist);
4011 lsp != NULL;
4012 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4013 if (lsp->rls_locker == lo) {
4014 rfs4_dbe_lock(lsp->rls_dbe);
4015 rfs4_dbe_invalidate(lsp->rls_dbe);
4016 rfs4_dbe_unlock(lsp->rls_dbe);
4017 }
4018 }
4019 rfs4_dbe_unlock(sp->rs_dbe);
4020 }
4021 rfs4_dbe_unlock(oo->ro_dbe);
4022 }
4023 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4024
4025 rfs4_lockowner_rele(lo);
4026
4027 *cs->statusp = resp->status = NFS4_OK;
4028
4029 out:
4030 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4031 cs, RELEASE_LOCKOWNER4res *, resp);
4032 }
4033
4034 /*
4035 * short utility function to lookup a file and recall the delegation
4036 */
4037 static rfs4_file_t *
4038 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4039 int *lkup_error, cred_t *cr)
4040 {
4041 vnode_t *vp;
4042 rfs4_file_t *fp = NULL;
4043 bool_t fcreate = FALSE;
4044 int error;
4045
4046 if (vpp)
4047 *vpp = NULL;
4048
4049 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4050 NULL)) == 0) {
4051 if (vp->v_type == VREG)
4052 fp = rfs4_findfile(vp, NULL, &fcreate);
4053 if (vpp)
4054 *vpp = vp;
4055 else
4056 VN_RELE(vp);
4057 }
4058
4059 if (lkup_error)
4060 *lkup_error = error;
4061
4062 return (fp);
4063 }
4064
4065 /*
4066 * remove: args: CURRENT_FH: directory; name.
4067 * res: status. If success - CURRENT_FH unchanged, return change_info
4068 * for directory.
4069 */
4070 /* ARGSUSED */
4071 static void
4072 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4073 struct compound_state *cs)
4074 {
4075 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4076 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4077 int error;
4078 vnode_t *dvp, *vp;
4079 struct vattr bdva, idva, adva;
4080 char *nm;
4081 uint_t len;
4082 rfs4_file_t *fp;
4083 int in_crit = 0;
4084 bslabel_t *clabel;
4085 struct sockaddr *ca;
4086 char *name = NULL;
4087 nfsstat4 status;
4088
4089 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4090 REMOVE4args *, args);
4091
4092 /* CURRENT_FH: directory */
4093 dvp = cs->vp;
4094 if (dvp == NULL) {
4095 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4096 goto out;
4097 }
4098
4099 if (cs->access == CS_ACCESS_DENIED) {
4100 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4101 goto out;
4102 }
4103
4104 /*
4105 * If there is an unshared filesystem mounted on this vnode,
4106 * Do not allow to remove anything in this directory.
4107 */
4108 if (vn_ismntpt(dvp)) {
4109 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4110 goto out;
4111 }
4112
4113 if (dvp->v_type != VDIR) {
4114 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4115 goto out;
4116 }
4117
4118 status = utf8_dir_verify(&args->target);
4119 if (status != NFS4_OK) {
4120 *cs->statusp = resp->status = status;
4121 goto out;
4122 }
4123
4124 /*
4125 * Lookup the file so that we can check if it's a directory
4126 */
4127 nm = utf8_to_fn(&args->target, &len, NULL);
4128 if (nm == NULL) {
4129 *cs->statusp = resp->status = NFS4ERR_INVAL;
4130 goto out;
4131 }
4132
4133 if (len > MAXNAMELEN) {
4134 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4135 kmem_free(nm, len);
4136 goto out;
4137 }
4138
4139 if (rdonly4(req, cs)) {
4140 *cs->statusp = resp->status = NFS4ERR_ROFS;
4141 kmem_free(nm, len);
4142 goto out;
4143 }
4144
4145 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4146 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4147 MAXPATHLEN + 1);
4148
4149 if (name == NULL) {
4150 *cs->statusp = resp->status = NFS4ERR_INVAL;
4151 kmem_free(nm, len);
4152 goto out;
4153 }
4154
4155 /*
4156 * Lookup the file to determine type and while we are see if
4157 * there is a file struct around and check for delegation.
4158 * We don't need to acquire va_seq before this lookup, if
4159 * it causes an update, cinfo.before will not match, which will
4160 * trigger a cache flush even if atomic is TRUE.
4161 */
4162 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4163 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4164 NULL)) {
4165 VN_RELE(vp);
4166 rfs4_file_rele(fp);
4167 *cs->statusp = resp->status = NFS4ERR_DELAY;
4168 if (nm != name)
4169 kmem_free(name, MAXPATHLEN + 1);
4170 kmem_free(nm, len);
4171 goto out;
4172 }
4173 }
4174
4175 /* Didn't find anything to remove */
4176 if (vp == NULL) {
4177 *cs->statusp = resp->status = error;
4178 if (nm != name)
4179 kmem_free(name, MAXPATHLEN + 1);
4180 kmem_free(nm, len);
4181 goto out;
4182 }
4183
4184 if (nbl_need_check(vp)) {
4185 nbl_start_crit(vp, RW_READER);
4186 in_crit = 1;
4187 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4188 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4189 if (nm != name)
4190 kmem_free(name, MAXPATHLEN + 1);
4191 kmem_free(nm, len);
4192 nbl_end_crit(vp);
4193 VN_RELE(vp);
4194 if (fp) {
4195 rfs4_clear_dont_grant(fp);
4196 rfs4_file_rele(fp);
4197 }
4198 goto out;
4199 }
4200 }
4201
4202 /* check label before allowing removal */
4203 if (is_system_labeled()) {
4204 ASSERT(req->rq_label != NULL);
4205 clabel = req->rq_label;
4206 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4207 "got client label from request(1)",
4208 struct svc_req *, req);
4209 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4211 cs->exi)) {
4212 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4213 if (name != nm)
4214 kmem_free(name, MAXPATHLEN + 1);
4215 kmem_free(nm, len);
4216 if (in_crit)
4217 nbl_end_crit(vp);
4218 VN_RELE(vp);
4219 if (fp) {
4220 rfs4_clear_dont_grant(fp);
4221 rfs4_file_rele(fp);
4222 }
4223 goto out;
4224 }
4225 }
4226 }
4227
4228 /* Get dir "before" change value */
4229 bdva.va_mask = AT_CTIME|AT_SEQ;
4230 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4231 if (error) {
4232 *cs->statusp = resp->status = puterrno4(error);
4233 if (nm != name)
4234 kmem_free(name, MAXPATHLEN + 1);
4235 kmem_free(nm, len);
4236 if (in_crit)
4237 nbl_end_crit(vp);
4238 VN_RELE(vp);
4239 if (fp) {
4240 rfs4_clear_dont_grant(fp);
4241 rfs4_file_rele(fp);
4242 }
4243 goto out;
4244 }
4245 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4246
4247 /* Actually do the REMOVE operation */
4248 if (vp->v_type == VDIR) {
4249 /*
4250 * Can't remove a directory that has a mounted-on filesystem.
4251 */
4252 if (vn_ismntpt(vp)) {
4253 error = EACCES;
4254 } else {
4255 /*
4256 * System V defines rmdir to return EEXIST,
4257 * not ENOTEMPTY, if the directory is not
4258 * empty. A System V NFS server needs to map
4259 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4260 * transmit over the wire.
4261 */
4262 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4263 NULL, 0)) == EEXIST)
4264 error = ENOTEMPTY;
4265 }
4266 } else {
4267 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4268 fp != NULL) {
4269 struct vattr va;
4270 vnode_t *tvp;
4271
4272 rfs4_dbe_lock(fp->rf_dbe);
4273 tvp = fp->rf_vp;
4274 if (tvp)
4275 VN_HOLD(tvp);
4276 rfs4_dbe_unlock(fp->rf_dbe);
4277
4278 if (tvp) {
4279 /*
4280 * This is va_seq safe because we are not
4281 * manipulating dvp.
4282 */
4283 va.va_mask = AT_NLINK;
4284 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4285 va.va_nlink == 0) {
4286 /* Remove state on file remove */
4287 if (in_crit) {
4288 nbl_end_crit(vp);
4289 in_crit = 0;
4290 }
4291 rfs4_close_all_state(fp);
4292 }
4293 VN_RELE(tvp);
4294 }
4295 }
4296 }
4297
4298 if (in_crit)
4299 nbl_end_crit(vp);
4300 VN_RELE(vp);
4301
4302 if (fp) {
4303 rfs4_clear_dont_grant(fp);
4304 rfs4_file_rele(fp);
4305 }
4306 if (nm != name)
4307 kmem_free(name, MAXPATHLEN + 1);
4308 kmem_free(nm, len);
4309
4310 if (error) {
4311 *cs->statusp = resp->status = puterrno4(error);
4312 goto out;
4313 }
4314
4315 /*
4316 * Get the initial "after" sequence number, if it fails, set to zero
4317 */
4318 idva.va_mask = AT_SEQ;
4319 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4320 idva.va_seq = 0;
4321
4322 /*
4323 * Force modified data and metadata out to stable storage.
4324 */
4325 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4326
4327 /*
4328 * Get "after" change value, if it fails, simply return the
4329 * before value.
4330 */
4331 adva.va_mask = AT_CTIME|AT_SEQ;
4332 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4333 adva.va_ctime = bdva.va_ctime;
4334 adva.va_seq = 0;
4335 }
4336
4337 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4338
4339 /*
4340 * The cinfo.atomic = TRUE only if we have
4341 * non-zero va_seq's, and it has incremented by exactly one
4342 * during the VOP_REMOVE/RMDIR and it didn't change during
4343 * the VOP_FSYNC.
4344 */
4345 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4346 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4347 resp->cinfo.atomic = TRUE;
4348 else
4349 resp->cinfo.atomic = FALSE;
4350
4351 *cs->statusp = resp->status = NFS4_OK;
4352
4353 out:
4354 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4355 REMOVE4res *, resp);
4356 }
4357
4358 /*
4359 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4360 * oldname and newname.
4361 * res: status. If success - CURRENT_FH unchanged, return change_info
4362 * for both from and target directories.
4363 */
4364 /* ARGSUSED */
4365 static void
4366 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4367 struct compound_state *cs)
4368 {
4369 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4370 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4371 int error;
4372 vnode_t *odvp;
4373 vnode_t *ndvp;
4374 vnode_t *srcvp, *targvp, *tvp;
4375 struct vattr obdva, oidva, oadva;
4376 struct vattr nbdva, nidva, nadva;
4377 char *onm, *nnm;
4378 uint_t olen, nlen;
4379 rfs4_file_t *fp, *sfp;
4380 int in_crit_src, in_crit_targ;
4381 int fp_rele_grant_hold, sfp_rele_grant_hold;
4382 int unlinked;
4383 bslabel_t *clabel;
4384 struct sockaddr *ca;
4385 char *converted_onm = NULL;
4386 char *converted_nnm = NULL;
4387 nfsstat4 status;
4388
4389 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4390 RENAME4args *, args);
4391
4392 fp = sfp = NULL;
4393 srcvp = targvp = tvp = NULL;
4394 in_crit_src = in_crit_targ = 0;
4395 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4396 unlinked = 0;
4397
4398 /* CURRENT_FH: target directory */
4399 ndvp = cs->vp;
4400 if (ndvp == NULL) {
4401 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4402 goto out;
4403 }
4404
4405 /* SAVED_FH: from directory */
4406 odvp = cs->saved_vp;
4407 if (odvp == NULL) {
4408 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4409 goto out;
4410 }
4411
4412 if (cs->access == CS_ACCESS_DENIED) {
4413 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414 goto out;
4415 }
4416
4417 /*
4418 * If there is an unshared filesystem mounted on this vnode,
4419 * do not allow to rename objects in this directory.
4420 */
4421 if (vn_ismntpt(odvp)) {
4422 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4423 goto out;
4424 }
4425
4426 /*
4427 * If there is an unshared filesystem mounted on this vnode,
4428 * do not allow to rename to this directory.
4429 */
4430 if (vn_ismntpt(ndvp)) {
4431 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4432 goto out;
4433 }
4434
4435 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4436 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4437 goto out;
4438 }
4439
4440 if (cs->saved_exi != cs->exi) {
4441 *cs->statusp = resp->status = NFS4ERR_XDEV;
4442 goto out;
4443 }
4444
4445 status = utf8_dir_verify(&args->oldname);
4446 if (status != NFS4_OK) {
4447 *cs->statusp = resp->status = status;
4448 goto out;
4449 }
4450
4451 status = utf8_dir_verify(&args->newname);
4452 if (status != NFS4_OK) {
4453 *cs->statusp = resp->status = status;
4454 goto out;
4455 }
4456
4457 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4458 if (onm == NULL) {
4459 *cs->statusp = resp->status = NFS4ERR_INVAL;
4460 goto out;
4461 }
4462 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4463 nlen = MAXPATHLEN + 1;
4464 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4465 nlen);
4466
4467 if (converted_onm == NULL) {
4468 *cs->statusp = resp->status = NFS4ERR_INVAL;
4469 kmem_free(onm, olen);
4470 goto out;
4471 }
4472
4473 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4474 if (nnm == NULL) {
4475 *cs->statusp = resp->status = NFS4ERR_INVAL;
4476 if (onm != converted_onm)
4477 kmem_free(converted_onm, MAXPATHLEN + 1);
4478 kmem_free(onm, olen);
4479 goto out;
4480 }
4481 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4482 MAXPATHLEN + 1);
4483
4484 if (converted_nnm == NULL) {
4485 *cs->statusp = resp->status = NFS4ERR_INVAL;
4486 kmem_free(nnm, nlen);
4487 nnm = NULL;
4488 if (onm != converted_onm)
4489 kmem_free(converted_onm, MAXPATHLEN + 1);
4490 kmem_free(onm, olen);
4491 goto out;
4492 }
4493
4494
4495 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4496 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4497 kmem_free(onm, olen);
4498 kmem_free(nnm, nlen);
4499 goto out;
4500 }
4501
4502
4503 if (rdonly4(req, cs)) {
4504 *cs->statusp = resp->status = NFS4ERR_ROFS;
4505 if (onm != converted_onm)
4506 kmem_free(converted_onm, MAXPATHLEN + 1);
4507 kmem_free(onm, olen);
4508 if (nnm != converted_nnm)
4509 kmem_free(converted_nnm, MAXPATHLEN + 1);
4510 kmem_free(nnm, nlen);
4511 goto out;
4512 }
4513
4514 /* check label of the target dir */
4515 if (is_system_labeled()) {
4516 ASSERT(req->rq_label != NULL);
4517 clabel = req->rq_label;
4518 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4519 "got client label from request(1)",
4520 struct svc_req *, req);
4521 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4522 if (!do_rfs_label_check(clabel, ndvp,
4523 EQUALITY_CHECK, cs->exi)) {
4524 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4525 goto err_out;
4526 }
4527 }
4528 }
4529
4530 /*
4531 * Is the source a file and have a delegation?
4532 * We don't need to acquire va_seq before these lookups, if
4533 * it causes an update, cinfo.before will not match, which will
4534 * trigger a cache flush even if atomic is TRUE.
4535 */
4536 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4537 &error, cs->cr)) {
4538 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4539 NULL)) {
4540 *cs->statusp = resp->status = NFS4ERR_DELAY;
4541 goto err_out;
4542 }
4543 }
4544
4545 if (srcvp == NULL) {
4546 *cs->statusp = resp->status = puterrno4(error);
4547 if (onm != converted_onm)
4548 kmem_free(converted_onm, MAXPATHLEN + 1);
4549 kmem_free(onm, olen);
4550 if (nnm != converted_nnm)
4551 kmem_free(converted_nnm, MAXPATHLEN + 1);
4552 kmem_free(nnm, nlen);
4553 goto out;
4554 }
4555
4556 sfp_rele_grant_hold = 1;
4557
4558 /* Does the destination exist and a file and have a delegation? */
4559 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4560 NULL, cs->cr)) {
4561 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4562 NULL)) {
4563 *cs->statusp = resp->status = NFS4ERR_DELAY;
4564 goto err_out;
4565 }
4566 }
4567 fp_rele_grant_hold = 1;
4568
4569 /* Check for NBMAND lock on both source and target */
4570 if (nbl_need_check(srcvp)) {
4571 nbl_start_crit(srcvp, RW_READER);
4572 in_crit_src = 1;
4573 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4574 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4575 goto err_out;
4576 }
4577 }
4578
4579 if (targvp && nbl_need_check(targvp)) {
4580 nbl_start_crit(targvp, RW_READER);
4581 in_crit_targ = 1;
4582 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4583 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4584 goto err_out;
4585 }
4586 }
4587
4588 /* Get source "before" change value */
4589 obdva.va_mask = AT_CTIME|AT_SEQ;
4590 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4591 if (!error) {
4592 nbdva.va_mask = AT_CTIME|AT_SEQ;
4593 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4594 }
4595 if (error) {
4596 *cs->statusp = resp->status = puterrno4(error);
4597 goto err_out;
4598 }
4599
4600 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4601 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4602
4603 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4604 NULL, 0);
4605
4606 /*
4607 * If target existed and was unlinked by VOP_RENAME, state will need
4608 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4609 * any necessary nbl_end_crit on srcvp and tgtvp.
4610 */
4611 if (error == 0 && fp != NULL) {
4612 rfs4_dbe_lock(fp->rf_dbe);
4613 tvp = fp->rf_vp;
4614 if (tvp)
4615 VN_HOLD(tvp);
4616 rfs4_dbe_unlock(fp->rf_dbe);
4617
4618 if (tvp) {
4619 struct vattr va;
4620 va.va_mask = AT_NLINK;
4621
4622 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4623 va.va_nlink == 0) {
4624 unlinked = 1;
4625
4626 /* DEBUG data */
4627 if ((srcvp == targvp) || (tvp != targvp)) {
4628 cmn_err(CE_WARN, "rfs4_op_rename: "
4629 "srcvp %p, targvp: %p, tvp: %p",
4630 (void *)srcvp, (void *)targvp,
4631 (void *)tvp);
4632 }
4633 } else {
4634 VN_RELE(tvp);
4635 }
4636 }
4637 }
4638 if (error == 0)
4639 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4640
4641 if (in_crit_src)
4642 nbl_end_crit(srcvp);
4643 if (srcvp)
4644 VN_RELE(srcvp);
4645 if (in_crit_targ)
4646 nbl_end_crit(targvp);
4647 if (targvp)
4648 VN_RELE(targvp);
4649
4650 if (unlinked) {
4651 ASSERT(fp != NULL);
4652 ASSERT(tvp != NULL);
4653
4654 /* DEBUG data */
4655 if (RW_READ_HELD(&tvp->v_nbllock)) {
4656 cmn_err(CE_WARN, "rfs4_op_rename: "
4657 "RW_READ_HELD(%p)", (void *)tvp);
4658 }
4659
4660 /* The file is gone and so should the state */
4661 rfs4_close_all_state(fp);
4662 VN_RELE(tvp);
4663 }
4664
4665 if (sfp) {
4666 rfs4_clear_dont_grant(sfp);
4667 rfs4_file_rele(sfp);
4668 }
4669 if (fp) {
4670 rfs4_clear_dont_grant(fp);
4671 rfs4_file_rele(fp);
4672 }
4673
4674 if (converted_onm != onm)
4675 kmem_free(converted_onm, MAXPATHLEN + 1);
4676 kmem_free(onm, olen);
4677 if (converted_nnm != nnm)
4678 kmem_free(converted_nnm, MAXPATHLEN + 1);
4679 kmem_free(nnm, nlen);
4680
4681 /*
4682 * Get the initial "after" sequence number, if it fails, set to zero
4683 */
4684 oidva.va_mask = AT_SEQ;
4685 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4686 oidva.va_seq = 0;
4687
4688 nidva.va_mask = AT_SEQ;
4689 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4690 nidva.va_seq = 0;
4691
4692 /*
4693 * Force modified data and metadata out to stable storage.
4694 */
4695 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4696 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4697
4698 if (error) {
4699 *cs->statusp = resp->status = puterrno4(error);
4700 goto out;
4701 }
4702
4703 /*
4704 * Get "after" change values, if it fails, simply return the
4705 * before value.
4706 */
4707 oadva.va_mask = AT_CTIME|AT_SEQ;
4708 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4709 oadva.va_ctime = obdva.va_ctime;
4710 oadva.va_seq = 0;
4711 }
4712
4713 nadva.va_mask = AT_CTIME|AT_SEQ;
4714 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4715 nadva.va_ctime = nbdva.va_ctime;
4716 nadva.va_seq = 0;
4717 }
4718
4719 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4720 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4721
4722 /*
4723 * The cinfo.atomic = TRUE only if we have
4724 * non-zero va_seq's, and it has incremented by exactly one
4725 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4726 */
4727 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4728 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4729 resp->source_cinfo.atomic = TRUE;
4730 else
4731 resp->source_cinfo.atomic = FALSE;
4732
4733 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4734 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4735 resp->target_cinfo.atomic = TRUE;
4736 else
4737 resp->target_cinfo.atomic = FALSE;
4738
4739 #ifdef VOLATILE_FH_TEST
4740 {
4741 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4742
4743 /*
4744 * Add the renamed file handle to the volatile rename list
4745 */
4746 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4747 /* file handles may expire on rename */
4748 vnode_t *vp;
4749
4750 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4751 /*
4752 * Already know that nnm will be a valid string
4753 */
4754 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4755 NULL, NULL, NULL);
4756 kmem_free(nnm, nlen);
4757 if (!error) {
4758 add_volrnm_fh(cs->exi, vp);
4759 VN_RELE(vp);
4760 }
4761 }
4762 }
4763 #endif /* VOLATILE_FH_TEST */
4764
4765 *cs->statusp = resp->status = NFS4_OK;
4766 out:
4767 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4768 RENAME4res *, resp);
4769 return;
4770
4771 err_out:
4772 if (onm != converted_onm)
4773 kmem_free(converted_onm, MAXPATHLEN + 1);
4774 if (onm != NULL)
4775 kmem_free(onm, olen);
4776 if (nnm != converted_nnm)
4777 kmem_free(converted_nnm, MAXPATHLEN + 1);
4778 if (nnm != NULL)
4779 kmem_free(nnm, nlen);
4780
4781 if (in_crit_src) nbl_end_crit(srcvp);
4782 if (in_crit_targ) nbl_end_crit(targvp);
4783 if (targvp) VN_RELE(targvp);
4784 if (srcvp) VN_RELE(srcvp);
4785 if (sfp) {
4786 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4787 rfs4_file_rele(sfp);
4788 }
4789 if (fp) {
4790 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4791 rfs4_file_rele(fp);
4792 }
4793
4794 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4795 RENAME4res *, resp);
4796 }
4797
4798 /* ARGSUSED */
4799 static void
4800 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4801 struct compound_state *cs)
4802 {
4803 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4804 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4805 rfs4_client_t *cp;
4806
4807 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4808 RENEW4args *, args);
4809
4810 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4811 *cs->statusp = resp->status =
4812 rfs4_check_clientid(&args->clientid, 0);
4813 goto out;
4814 }
4815
4816 if (rfs4_lease_expired(cp)) {
4817 rfs4_client_rele(cp);
4818 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4819 goto out;
4820 }
4821
4822 rfs4_update_lease(cp);
4823
4824 mutex_enter(cp->rc_cbinfo.cb_lock);
4825 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4826 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4827 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4828 } else {
4829 *cs->statusp = resp->status = NFS4_OK;
4830 }
4831 mutex_exit(cp->rc_cbinfo.cb_lock);
4832
4833 rfs4_client_rele(cp);
4834
4835 out:
4836 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4837 RENEW4res *, resp);
4838 }
4839
4840 /* ARGSUSED */
4841 static void
4842 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4843 struct compound_state *cs)
4844 {
4845 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4846
4847 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4848
4849 /* No need to check cs->access - we are not accessing any object */
4850 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4851 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4852 goto out;
4853 }
4854 if (cs->vp != NULL) {
4855 VN_RELE(cs->vp);
4856 }
4857 cs->vp = cs->saved_vp;
4858 cs->saved_vp = NULL;
4859 cs->exi = cs->saved_exi;
4860 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4861 *cs->statusp = resp->status = NFS4_OK;
4862 cs->deleg = FALSE;
4863
4864 out:
4865 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4866 RESTOREFH4res *, resp);
4867 }
4868
4869 /* ARGSUSED */
4870 static void
4871 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4872 struct compound_state *cs)
4873 {
4874 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4875
4876 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4877
4878 /* No need to check cs->access - we are not accessing any object */
4879 if (cs->vp == NULL) {
4880 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4881 goto out;
4882 }
4883 if (cs->saved_vp != NULL) {
4884 VN_RELE(cs->saved_vp);
4885 }
4886 cs->saved_vp = cs->vp;
4887 VN_HOLD(cs->saved_vp);
4888 cs->saved_exi = cs->exi;
4889 /*
4890 * since SAVEFH is fairly rare, don't alloc space for its fh
4891 * unless necessary.
4892 */
4893 if (cs->saved_fh.nfs_fh4_val == NULL) {
4894 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4895 }
4896 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4897 *cs->statusp = resp->status = NFS4_OK;
4898
4899 out:
4900 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4901 SAVEFH4res *, resp);
4902 }
4903
4904 /*
4905 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4906 * return the bitmap of attrs that were set successfully. It is also
4907 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4908 * always be called only after rfs4_do_set_attrs().
4909 *
4910 * Verify that the attributes are same as the expected ones. sargp->vap
4911 * and sargp->sbp contain the input attributes as translated from fattr4.
4912 *
4913 * This function verifies only the attrs that correspond to a vattr or
4914 * vfsstat struct. That is because of the extra step needed to get the
4915 * corresponding system structs. Other attributes have already been set or
4916 * verified by do_rfs4_set_attrs.
4917 *
4918 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4919 */
4920 static int
4921 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4922 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4923 {
4924 int error, ret_error = 0;
4925 int i, k;
4926 uint_t sva_mask = sargp->vap->va_mask;
4927 uint_t vbit;
4928 union nfs4_attr_u *na;
4929 uint8_t *amap;
4930 bool_t getsb = ntovp->vfsstat;
4931
4932 if (sva_mask != 0) {
4933 /*
4934 * Okay to overwrite sargp->vap because we verify based
4935 * on the incoming values.
4936 */
4937 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4938 sargp->cs->cr, NULL);
4939 if (ret_error) {
4940 if (resp == NULL)
4941 return (ret_error);
4942 /*
4943 * Must return bitmap of successful attrs
4944 */
4945 sva_mask = 0; /* to prevent checking vap later */
4946 } else {
4947 /*
4948 * Some file systems clobber va_mask. it is probably
4949 * wrong of them to do so, nonethless we practice
4950 * defensive coding.
4951 * See bug id 4276830.
4952 */
4953 sargp->vap->va_mask = sva_mask;
4954 }
4955 }
4956
4957 if (getsb) {
4958 /*
4959 * Now get the superblock and loop on the bitmap, as there is
4960 * no simple way of translating from superblock to bitmap4.
4961 */
4962 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4963 if (ret_error) {
4964 if (resp == NULL)
4965 goto errout;
4966 getsb = FALSE;
4967 }
4968 }
4969
4970 /*
4971 * Now loop and verify each attribute which getattr returned
4972 * whether it's the same as the input.
4973 */
4974 if (resp == NULL && !getsb && (sva_mask == 0))
4975 goto errout;
4976
4977 na = ntovp->na;
4978 amap = ntovp->amap;
4979 k = 0;
4980 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4981 k = *amap;
4982 ASSERT(nfs4_ntov_map[k].nval == k);
4983 vbit = nfs4_ntov_map[k].vbit;
4984
4985 /*
4986 * If vattr attribute but VOP_GETATTR failed, or it's
4987 * superblock attribute but VFS_STATVFS failed, skip
4988 */
4989 if (vbit) {
4990 if ((vbit & sva_mask) == 0)
4991 continue;
4992 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4993 continue;
4994 }
4995 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4996 if (resp != NULL) {
4997 if (error)
4998 ret_error = -1; /* not all match */
4999 else /* update response bitmap */
5000 *resp |= nfs4_ntov_map[k].fbit;
5001 continue;
5002 }
5003 if (error) {
5004 ret_error = -1; /* not all match */
5005 break;
5006 }
5007 }
5008 errout:
5009 return (ret_error);
5010 }
5011
5012 /*
5013 * Decode the attribute to be set/verified. If the attr requires a sys op
5014 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5015 * call the sv_getit function for it, because the sys op hasn't yet been done.
5016 * Return 0 for success, error code if failed.
5017 *
5018 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5019 */
5020 static int
5021 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5022 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5023 {
5024 int error = 0;
5025 bool_t set_later;
5026
5027 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5028
5029 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5030 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5031 /*
5032 * don't verify yet if a vattr or sb dependent attr,
5033 * because we don't have their sys values yet.
5034 * Will be done later.
5035 */
5036 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5037 /*
5038 * ACLs are a special case, since setting the MODE
5039 * conflicts with setting the ACL. We delay setting
5040 * the ACL until all other attributes have been set.
5041 * The ACL gets set in do_rfs4_op_setattr().
5042 */
5043 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5044 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5045 sargp, nap);
5046 if (error) {
5047 xdr_free(nfs4_ntov_map[k].xfunc,
5048 (caddr_t)nap);
5049 }
5050 }
5051 }
5052 } else {
5053 #ifdef DEBUG
5054 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5055 "decoding attribute %d\n", k);
5056 #endif
5057 error = EINVAL;
5058 }
5059 if (!error && resp_bval && !set_later) {
5060 *resp_bval |= nfs4_ntov_map[k].fbit;
5061 }
5062
5063 return (error);
5064 }
5065
5066 /*
5067 * Set vattr based on incoming fattr4 attrs - used by setattr.
5068 * Set response mask. Ignore any values that are not writable vattr attrs.
5069 */
5070 static nfsstat4
5071 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5072 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5073 nfs4_attr_cmd_t cmd)
5074 {
5075 int error = 0;
5076 int i;
5077 char *attrs = fattrp->attrlist4;
5078 uint32_t attrslen = fattrp->attrlist4_len;
5079 XDR xdr;
5080 nfsstat4 status = NFS4_OK;
5081 vnode_t *vp = cs->vp;
5082 union nfs4_attr_u *na;
5083 uint8_t *amap;
5084
5085 #ifndef lint
5086 /*
5087 * Make sure that maximum attribute number can be expressed as an
5088 * 8 bit quantity.
5089 */
5090 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5091 #endif
5092
5093 if (vp == NULL) {
5094 if (resp)
5095 *resp = 0;
5096 return (NFS4ERR_NOFILEHANDLE);
5097 }
5098 if (cs->access == CS_ACCESS_DENIED) {
5099 if (resp)
5100 *resp = 0;
5101 return (NFS4ERR_ACCESS);
5102 }
5103
5104 sargp->op = cmd;
5105 sargp->cs = cs;
5106 sargp->flag = 0; /* may be set later */
5107 sargp->vap->va_mask = 0;
5108 sargp->rdattr_error = NFS4_OK;
5109 sargp->rdattr_error_req = FALSE;
5110 /* sargp->sbp is set by the caller */
5111
5112 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5113
5114 na = ntovp->na;
5115 amap = ntovp->amap;
5116
5117 /*
5118 * The following loop iterates on the nfs4_ntov_map checking
5119 * if the fbit is set in the requested bitmap.
5120 * If set then we process the arguments using the
5121 * rfs4_fattr4 conversion functions to populate the setattr
5122 * vattr and va_mask. Any settable attrs that are not using vattr
5123 * will be set in this loop.
5124 */
5125 for (i = 0; i < nfs4_ntov_map_size; i++) {
5126 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5127 continue;
5128 }
5129 /*
5130 * If setattr, must be a writable attr.
5131 * If verify/nverify, must be a readable attr.
5132 */
5133 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5134 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5135 /*
5136 * Client tries to set/verify an
5137 * unsupported attribute, tries to set
5138 * a read only attr or verify a write
5139 * only one - error!
5140 */
5141 break;
5142 }
5143 /*
5144 * Decode the attribute to set/verify
5145 */
5146 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5147 &xdr, resp ? resp : NULL, na);
5148 if (error)
5149 break;
5150 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5151 na++;
5152 (ntovp->attrcnt)++;
5153 if (nfs4_ntov_map[i].vfsstat)
5154 ntovp->vfsstat = TRUE;
5155 }
5156
5157 if (error != 0)
5158 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5159 puterrno4(error));
5160 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5161 return (status);
5162 }
5163
5164 static nfsstat4
5165 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5166 stateid4 *stateid)
5167 {
5168 int error = 0;
5169 struct nfs4_svgetit_arg sarg;
5170 bool_t trunc;
5171
5172 nfsstat4 status = NFS4_OK;
5173 cred_t *cr = cs->cr;
5174 vnode_t *vp = cs->vp;
5175 struct nfs4_ntov_table ntov;
5176 struct statvfs64 sb;
5177 struct vattr bva;
5178 struct flock64 bf;
5179 int in_crit = 0;
5180 uint_t saved_mask = 0;
5181 caller_context_t ct;
5182
5183 *resp = 0;
5184 sarg.sbp = &sb;
5185 sarg.is_referral = B_FALSE;
5186 nfs4_ntov_table_init(&ntov);
5187 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5188 NFS4ATTR_SETIT);
5189 if (status != NFS4_OK) {
5190 /*
5191 * failed set attrs
5192 */
5193 goto done;
5194 }
5195 if ((sarg.vap->va_mask == 0) &&
5196 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5197 /*
5198 * no further work to be done
5199 */
5200 goto done;
5201 }
5202
5203 /*
5204 * If we got a request to set the ACL and the MODE, only
5205 * allow changing VSUID, VSGID, and VSVTX. Attempting
5206 * to change any other bits, along with setting an ACL,
5207 * gives NFS4ERR_INVAL.
5208 */
5209 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5210 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5211 vattr_t va;
5212
5213 va.va_mask = AT_MODE;
5214 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5215 if (error) {
5216 status = puterrno4(error);
5217 goto done;
5218 }
5219 if ((sarg.vap->va_mode ^ va.va_mode) &
5220 ~(VSUID | VSGID | VSVTX)) {
5221 status = NFS4ERR_INVAL;
5222 goto done;
5223 }
5224 }
5225
5226 /* Check stateid only if size has been set */
5227 if (sarg.vap->va_mask & AT_SIZE) {
5228 trunc = (sarg.vap->va_size == 0);
5229 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5230 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5231 if (status != NFS4_OK)
5232 goto done;
5233 } else {
5234 ct.cc_sysid = 0;
5235 ct.cc_pid = 0;
5236 ct.cc_caller_id = nfs4_srv_caller_id;
5237 ct.cc_flags = CC_DONTBLOCK;
5238 }
5239
5240 /* XXX start of possible race with delegations */
5241
5242 /*
5243 * We need to specially handle size changes because it is
5244 * possible for the client to create a file with read-only
5245 * modes, but with the file opened for writing. If the client
5246 * then tries to set the file size, e.g. ftruncate(3C),
5247 * fcntl(F_FREESP), the normal access checking done in
5248 * VOP_SETATTR would prevent the client from doing it even though
5249 * it should be allowed to do so. To get around this, we do the
5250 * access checking for ourselves and use VOP_SPACE which doesn't
5251 * do the access checking.
5252 * Also the client should not be allowed to change the file
5253 * size if there is a conflicting non-blocking mandatory lock in
5254 * the region of the change.
5255 */
5256 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5257 u_offset_t offset;
5258 ssize_t length;
5259
5260 /*
5261 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5262 * before returning, sarg.vap->va_mask is used to
5263 * generate the setattr reply bitmap. We also clear
5264 * AT_SIZE below before calling VOP_SPACE. For both
5265 * of these cases, the va_mask needs to be saved here
5266 * and restored after calling VOP_SETATTR.
5267 */
5268 saved_mask = sarg.vap->va_mask;
5269
5270 /*
5271 * Check any possible conflict due to NBMAND locks.
5272 * Get into critical region before VOP_GETATTR, so the
5273 * size attribute is valid when checking conflicts.
5274 */
5275 if (nbl_need_check(vp)) {
5276 nbl_start_crit(vp, RW_READER);
5277 in_crit = 1;
5278 }
5279
5280 bva.va_mask = AT_UID|AT_SIZE;
5281 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5282 status = puterrno4(error);
5283 goto done;
5284 }
5285
5286 if (in_crit) {
5287 if (sarg.vap->va_size < bva.va_size) {
5288 offset = sarg.vap->va_size;
5289 length = bva.va_size - sarg.vap->va_size;
5290 } else {
5291 offset = bva.va_size;
5292 length = sarg.vap->va_size - bva.va_size;
5293 }
5294 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5295 &ct)) {
5296 status = NFS4ERR_LOCKED;
5297 goto done;
5298 }
5299 }
5300
5301 if (crgetuid(cr) == bva.va_uid) {
5302 sarg.vap->va_mask &= ~AT_SIZE;
5303 bf.l_type = F_WRLCK;
5304 bf.l_whence = 0;
5305 bf.l_start = (off64_t)sarg.vap->va_size;
5306 bf.l_len = 0;
5307 bf.l_sysid = 0;
5308 bf.l_pid = 0;
5309 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5310 (offset_t)sarg.vap->va_size, cr, &ct);
5311 }
5312 }
5313
5314 if (!error && sarg.vap->va_mask != 0)
5315 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5316
5317 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5318 if (saved_mask & AT_SIZE)
5319 sarg.vap->va_mask |= AT_SIZE;
5320
5321 /*
5322 * If an ACL was being set, it has been delayed until now,
5323 * in order to set the mode (via the VOP_SETATTR() above) first.
5324 */
5325 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5326 int i;
5327
5328 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5329 if (ntov.amap[i] == FATTR4_ACL)
5330 break;
5331 if (i < NFS4_MAXNUM_ATTRS) {
5332 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5333 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5334 if (error == 0) {
5335 *resp |= FATTR4_ACL_MASK;
5336 } else if (error == ENOTSUP) {
5337 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5338 status = NFS4ERR_ATTRNOTSUPP;
5339 goto done;
5340 }
5341 } else {
5342 NFS4_DEBUG(rfs4_debug,
5343 (CE_NOTE, "do_rfs4_op_setattr: "
5344 "unable to find ACL in fattr4"));
5345 error = EINVAL;
5346 }
5347 }
5348
5349 if (error) {
5350 /* check if a monitor detected a delegation conflict */
5351 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5352 status = NFS4ERR_DELAY;
5353 else
5354 status = puterrno4(error);
5355
5356 /*
5357 * Set the response bitmap when setattr failed.
5358 * If VOP_SETATTR partially succeeded, test by doing a
5359 * VOP_GETATTR on the object and comparing the data
5360 * to the setattr arguments.
5361 */
5362 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5363 } else {
5364 /*
5365 * Force modified metadata out to stable storage.
5366 */
5367 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5368 /*
5369 * Set response bitmap
5370 */
5371 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5372 }
5373
5374 /* Return early and already have a NFSv4 error */
5375 done:
5376 /*
5377 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5378 * conversion sets both readable and writeable NFS4 attrs
5379 * for AT_MTIME and AT_ATIME. The line below masks out
5380 * unrequested attrs from the setattr result bitmap. This
5381 * is placed after the done: label to catch the ATTRNOTSUP
5382 * case.
5383 */
5384 *resp &= fattrp->attrmask;
5385
5386 if (in_crit)
5387 nbl_end_crit(vp);
5388
5389 nfs4_ntov_table_free(&ntov, &sarg);
5390
5391 return (status);
5392 }
5393
5394 /* ARGSUSED */
5395 static void
5396 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5397 struct compound_state *cs)
5398 {
5399 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5400 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5401 bslabel_t *clabel;
5402
5403 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5404 SETATTR4args *, args);
5405
5406 if (cs->vp == NULL) {
5407 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5408 goto out;
5409 }
5410
5411 /*
5412 * If there is an unshared filesystem mounted on this vnode,
5413 * do not allow to setattr on this vnode.
5414 */
5415 if (vn_ismntpt(cs->vp)) {
5416 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417 goto out;
5418 }
5419
5420 resp->attrsset = 0;
5421
5422 if (rdonly4(req, cs)) {
5423 *cs->statusp = resp->status = NFS4ERR_ROFS;
5424 goto out;
5425 }
5426
5427 /* check label before setting attributes */
5428 if (is_system_labeled()) {
5429 ASSERT(req->rq_label != NULL);
5430 clabel = req->rq_label;
5431 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5432 "got client label from request(1)",
5433 struct svc_req *, req);
5434 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5435 if (!do_rfs_label_check(clabel, cs->vp,
5436 EQUALITY_CHECK, cs->exi)) {
5437 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5438 goto out;
5439 }
5440 }
5441 }
5442
5443 *cs->statusp = resp->status =
5444 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5445 &args->stateid);
5446
5447 out:
5448 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5449 SETATTR4res *, resp);
5450 }
5451
5452 /* ARGSUSED */
5453 static void
5454 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5455 struct compound_state *cs)
5456 {
5457 /*
5458 * verify and nverify are exactly the same, except that nverify
5459 * succeeds when some argument changed, and verify succeeds when
5460 * when none changed.
5461 */
5462
5463 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5464 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5465
5466 int error;
5467 struct nfs4_svgetit_arg sarg;
5468 struct statvfs64 sb;
5469 struct nfs4_ntov_table ntov;
5470
5471 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5472 VERIFY4args *, args);
5473
5474 if (cs->vp == NULL) {
5475 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5476 goto out;
5477 }
5478
5479 sarg.sbp = &sb;
5480 sarg.is_referral = B_FALSE;
5481 nfs4_ntov_table_init(&ntov);
5482 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5483 &sarg, &ntov, NFS4ATTR_VERIT);
5484 if (resp->status != NFS4_OK) {
5485 /*
5486 * do_rfs4_set_attrs will try to verify systemwide attrs,
5487 * so could return -1 for "no match".
5488 */
5489 if (resp->status == -1)
5490 resp->status = NFS4ERR_NOT_SAME;
5491 goto done;
5492 }
5493 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5494 switch (error) {
5495 case 0:
5496 resp->status = NFS4_OK;
5497 break;
5498 case -1:
5499 resp->status = NFS4ERR_NOT_SAME;
5500 break;
5501 default:
5502 resp->status = puterrno4(error);
5503 break;
5504 }
5505 done:
5506 *cs->statusp = resp->status;
5507 nfs4_ntov_table_free(&ntov, &sarg);
5508 out:
5509 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5510 VERIFY4res *, resp);
5511 }
5512
5513 /* ARGSUSED */
5514 static void
5515 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5516 struct compound_state *cs)
5517 {
5518 /*
5519 * verify and nverify are exactly the same, except that nverify
5520 * succeeds when some argument changed, and verify succeeds when
5521 * when none changed.
5522 */
5523
5524 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5525 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5526
5527 int error;
5528 struct nfs4_svgetit_arg sarg;
5529 struct statvfs64 sb;
5530 struct nfs4_ntov_table ntov;
5531
5532 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5533 NVERIFY4args *, args);
5534
5535 if (cs->vp == NULL) {
5536 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5537 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5538 NVERIFY4res *, resp);
5539 return;
5540 }
5541 sarg.sbp = &sb;
5542 sarg.is_referral = B_FALSE;
5543 nfs4_ntov_table_init(&ntov);
5544 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5545 &sarg, &ntov, NFS4ATTR_VERIT);
5546 if (resp->status != NFS4_OK) {
5547 /*
5548 * do_rfs4_set_attrs will try to verify systemwide attrs,
5549 * so could return -1 for "no match".
5550 */
5551 if (resp->status == -1)
5552 resp->status = NFS4_OK;
5553 goto done;
5554 }
5555 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5556 switch (error) {
5557 case 0:
5558 resp->status = NFS4ERR_SAME;
5559 break;
5560 case -1:
5561 resp->status = NFS4_OK;
5562 break;
5563 default:
5564 resp->status = puterrno4(error);
5565 break;
5566 }
5567 done:
5568 *cs->statusp = resp->status;
5569 nfs4_ntov_table_free(&ntov, &sarg);
5570
5571 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5572 NVERIFY4res *, resp);
5573 }
5574
5575 /*
5576 * XXX - This should live in an NFS header file.
5577 */
5578 #define MAX_IOVECS 12
5579
5580 /* ARGSUSED */
5581 static void
5582 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5583 struct compound_state *cs)
5584 {
5585 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5586 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5587 int error;
5588 vnode_t *vp;
5589 struct vattr bva;
5590 u_offset_t rlimit;
5591 struct uio uio;
5592 struct iovec iov[MAX_IOVECS];
5593 struct iovec *iovp;
5594 int iovcnt;
5595 int ioflag;
5596 cred_t *savecred, *cr;
5597 bool_t *deleg = &cs->deleg;
5598 nfsstat4 stat;
5599 int in_crit = 0;
5600 caller_context_t ct;
5601 nfs4_srv_t *nsrv4;
5602
5603 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5604 WRITE4args *, args);
5605
5606 vp = cs->vp;
5607 if (vp == NULL) {
5608 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5609 goto out;
5610 }
5611 if (cs->access == CS_ACCESS_DENIED) {
5612 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5613 goto out;
5614 }
5615
5616 cr = cs->cr;
5617
5618 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5619 deleg, TRUE, &ct)) != NFS4_OK) {
5620 *cs->statusp = resp->status = stat;
5621 goto out;
5622 }
5623
5624 /*
5625 * We have to enter the critical region before calling VOP_RWLOCK
5626 * to avoid a deadlock with ufs.
5627 */
5628 if (nbl_need_check(vp)) {
5629 nbl_start_crit(vp, RW_READER);
5630 in_crit = 1;
5631 if (nbl_conflict(vp, NBL_WRITE,
5632 args->offset, args->data_len, 0, &ct)) {
5633 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5634 goto out;
5635 }
5636 }
5637
5638 bva.va_mask = AT_MODE | AT_UID;
5639 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5640
5641 /*
5642 * If we can't get the attributes, then we can't do the
5643 * right access checking. So, we'll fail the request.
5644 */
5645 if (error) {
5646 *cs->statusp = resp->status = puterrno4(error);
5647 goto out;
5648 }
5649
5650 if (rdonly4(req, cs)) {
5651 *cs->statusp = resp->status = NFS4ERR_ROFS;
5652 goto out;
5653 }
5654
5655 if (vp->v_type != VREG) {
5656 *cs->statusp = resp->status =
5657 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5658 goto out;
5659 }
5660
5661 if (crgetuid(cr) != bva.va_uid &&
5662 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5663 *cs->statusp = resp->status = puterrno4(error);
5664 goto out;
5665 }
5666
5667 if (MANDLOCK(vp, bva.va_mode)) {
5668 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5669 goto out;
5670 }
5671
5672 nsrv4 = nfs4_get_srv();
5673 if (args->data_len == 0) {
5674 *cs->statusp = resp->status = NFS4_OK;
5675 resp->count = 0;
5676 resp->committed = args->stable;
5677 resp->writeverf = nsrv4->write4verf;
5678 goto out;
5679 }
5680
5681 if (args->mblk != NULL) {
5682 mblk_t *m;
5683 uint_t bytes, round_len;
5684
5685 iovcnt = 0;
5686 bytes = 0;
5687 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5688 for (m = args->mblk;
5689 m != NULL && bytes < round_len;
5690 m = m->b_cont) {
5691 iovcnt++;
5692 bytes += MBLKL(m);
5693 }
5694 #ifdef DEBUG
5695 /* should have ended on an mblk boundary */
5696 if (bytes != round_len) {
5697 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5698 bytes, round_len, args->data_len);
5699 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5700 (void *)args->mblk, (void *)m);
5701 ASSERT(bytes == round_len);
5702 }
5703 #endif
5704 if (iovcnt <= MAX_IOVECS) {
5705 iovp = iov;
5706 } else {
5707 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5708 }
5709 mblk_to_iov(args->mblk, iovcnt, iovp);
5710 } else if (args->rlist != NULL) {
5711 iovcnt = 1;
5712 iovp = iov;
5713 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5714 iovp->iov_len = args->data_len;
5715 } else {
5716 iovcnt = 1;
5717 iovp = iov;
5718 iovp->iov_base = args->data_val;
5719 iovp->iov_len = args->data_len;
5720 }
5721
5722 uio.uio_iov = iovp;
5723 uio.uio_iovcnt = iovcnt;
5724
5725 uio.uio_segflg = UIO_SYSSPACE;
5726 uio.uio_extflg = UIO_COPY_DEFAULT;
5727 uio.uio_loffset = args->offset;
5728 uio.uio_resid = args->data_len;
5729 uio.uio_llimit = curproc->p_fsz_ctl;
5730 rlimit = uio.uio_llimit - args->offset;
5731 if (rlimit < (u_offset_t)uio.uio_resid)
5732 uio.uio_resid = (int)rlimit;
5733
5734 if (args->stable == UNSTABLE4)
5735 ioflag = 0;
5736 else if (args->stable == FILE_SYNC4)
5737 ioflag = FSYNC;
5738 else if (args->stable == DATA_SYNC4)
5739 ioflag = FDSYNC;
5740 else {
5741 if (iovp != iov)
5742 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5743 *cs->statusp = resp->status = NFS4ERR_INVAL;
5744 goto out;
5745 }
5746
5747 /*
5748 * We're changing creds because VM may fault and we need
5749 * the cred of the current thread to be used if quota
5750 * checking is enabled.
5751 */
5752 savecred = curthread->t_cred;
5753 curthread->t_cred = cr;
5754 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5755 curthread->t_cred = savecred;
5756
5757 if (iovp != iov)
5758 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5759
5760 if (error) {
5761 *cs->statusp = resp->status = puterrno4(error);
5762 goto out;
5763 }
5764
5765 *cs->statusp = resp->status = NFS4_OK;
5766 resp->count = args->data_len - uio.uio_resid;
5767
5768 if (ioflag == 0)
5769 resp->committed = UNSTABLE4;
5770 else
5771 resp->committed = FILE_SYNC4;
5772
5773 resp->writeverf = nsrv4->write4verf;
5774
5775 out:
5776 if (in_crit)
5777 nbl_end_crit(vp);
5778
5779 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5780 WRITE4res *, resp);
5781 }
5782
5783
5784 /* XXX put in a header file */
5785 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5786
5787 void
5788 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5789 struct svc_req *req, cred_t *cr, int *rv)
5790 {
5791 uint_t i;
5792 struct compound_state cs;
5793 nfs4_srv_t *nsrv4;
5794 nfs_export_t *ne = nfs_get_export();
5795
5796 if (rv != NULL)
5797 *rv = 0;
5798 rfs4_init_compound_state(&cs);
5799 /*
5800 * Form a reply tag by copying over the reqeuest tag.
5801 */
5802 resp->tag.utf8string_val =
5803 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5804 resp->tag.utf8string_len = args->tag.utf8string_len;
5805 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5806 resp->tag.utf8string_len);
5807
5808 cs.statusp = &resp->status;
5809 cs.req = req;
5810 resp->array = NULL;
5811 resp->array_len = 0;
5812
5813 /*
5814 * XXX for now, minorversion should be zero
5815 */
5816 if (args->minorversion != NFS4_MINORVERSION) {
5817 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5818 &cs, COMPOUND4args *, args);
5819 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5820 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5821 &cs, COMPOUND4res *, resp);
5822 return;
5823 }
5824
5825 if (args->array_len == 0) {
5826 resp->status = NFS4_OK;
5827 return;
5828 }
5829
5830 ASSERT(exi == NULL);
5831 ASSERT(cr == NULL);
5832
5833 cr = crget();
5834 ASSERT(cr != NULL);
5835
5836 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5837 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5838 &cs, COMPOUND4args *, args);
5839 crfree(cr);
5840 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5841 &cs, COMPOUND4res *, resp);
5842 svcerr_badcred(req->rq_xprt);
5843 if (rv != NULL)
5844 *rv = 1;
5845 return;
5846 }
5847 resp->array_len = args->array_len;
5848 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5849 KM_SLEEP);
5850
5851 cs.basecr = cr;
5852 nsrv4 = nfs4_get_srv();
5853
5854 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5855 COMPOUND4args *, args);
5856
5857 /*
5858 * For now, NFS4 compound processing must be protected by
5859 * exported_lock because it can access more than one exportinfo
5860 * per compound and share/unshare can now change multiple
5861 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5862 * per proc (excluding public exinfo), and exi_count design
5863 * is sufficient to protect concurrent execution of NFS2/3
5864 * ops along with unexport. This lock will be removed as
5865 * part of the NFSv4 phase 2 namespace redesign work.
5866 */
5867 rw_enter(&ne->exported_lock, RW_READER);
5868
5869 /*
5870 * If this is the first compound we've seen, we need to start all
5871 * new instances' grace periods.
5872 */
5873 if (nsrv4->seen_first_compound == 0) {
5874 rfs4_grace_start_new(nsrv4);
5875 /*
5876 * This must be set after rfs4_grace_start_new(), otherwise
5877 * another thread could proceed past here before the former
5878 * is finished.
5879 */
5880 nsrv4->seen_first_compound = 1;
5881 }
5882
5883 for (i = 0; i < args->array_len && cs.cont; i++) {
5884 nfs_argop4 *argop;
5885 nfs_resop4 *resop;
5886 uint_t op;
5887
5888 argop = &args->array[i];
5889 resop = &resp->array[i];
5890 resop->resop = argop->argop;
5891 op = (uint_t)resop->resop;
5892
5893 if (op < rfsv4disp_cnt) {
5894 /*
5895 * Count the individual ops here; NULL and COMPOUND
5896 * are counted in common_dispatch()
5897 */
5898 rfsproccnt_v4_ptr[op].value.ui64++;
5899
5900 NFS4_DEBUG(rfs4_debug > 1,
5901 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5902 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5903 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5904 rfs4_op_string[op], *cs.statusp));
5905 if (*cs.statusp != NFS4_OK)
5906 cs.cont = FALSE;
5907 } else {
5908 /*
5909 * This is effectively dead code since XDR code
5910 * will have already returned BADXDR if op doesn't
5911 * decode to legal value. This only done for a
5912 * day when XDR code doesn't verify v4 opcodes.
5913 */
5914 op = OP_ILLEGAL;
5915 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5916
5917 rfs4_op_illegal(argop, resop, req, &cs);
5918 cs.cont = FALSE;
5919 }
5920
5921 /*
5922 * If not at last op, and if we are to stop, then
5923 * compact the results array.
5924 */
5925 if ((i + 1) < args->array_len && !cs.cont) {
5926 nfs_resop4 *new_res = kmem_alloc(
5927 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5928 bcopy(resp->array,
5929 new_res, (i+1) * sizeof (nfs_resop4));
5930 kmem_free(resp->array,
5931 args->array_len * sizeof (nfs_resop4));
5932
5933 resp->array_len = i + 1;
5934 resp->array = new_res;
5935 }
5936 }
5937
5938 rw_exit(&ne->exported_lock);
5939
5940 /*
5941 * clear exportinfo and vnode fields from compound_state before dtrace
5942 * probe, to avoid tracing residual values for path and share path.
5943 */
5944 if (cs.vp)
5945 VN_RELE(cs.vp);
5946 if (cs.saved_vp)
5947 VN_RELE(cs.saved_vp);
5948 cs.exi = cs.saved_exi = NULL;
5949 cs.vp = cs.saved_vp = NULL;
5950
5951 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5952 COMPOUND4res *, resp);
5953
5954 if (cs.saved_fh.nfs_fh4_val)
5955 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5956
5957 if (cs.basecr)
5958 crfree(cs.basecr);
5959 if (cs.cr)
5960 crfree(cs.cr);
5961 /*
5962 * done with this compound request, free the label
5963 */
5964
5965 if (req->rq_label != NULL) {
5966 kmem_free(req->rq_label, sizeof (bslabel_t));
5967 req->rq_label = NULL;
5968 }
5969 }
5970
5971 /*
5972 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5973 * XXX zero out the tag and array values. Need to investigate why the
5974 * XXX calls occur, but at least prevent the panic for now.
5975 */
5976 void
5977 rfs4_compound_free(COMPOUND4res *resp)
5978 {
5979 uint_t i;
5980
5981 if (resp->tag.utf8string_val) {
5982 UTF8STRING_FREE(resp->tag)
5983 }
5984
5985 for (i = 0; i < resp->array_len; i++) {
5986 nfs_resop4 *resop;
5987 uint_t op;
5988
5989 resop = &resp->array[i];
5990 op = (uint_t)resop->resop;
5991 if (op < rfsv4disp_cnt) {
5992 (*rfsv4disptab[op].dis_resfree)(resop);
5993 }
5994 }
5995 if (resp->array != NULL) {
5996 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5997 }
5998 }
5999
6000 /*
6001 * Process the value of the compound request rpc flags, as a bit-AND
6002 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6003 */
6004 void
6005 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6006 {
6007 int i;
6008 int flag = RPC_ALL;
6009
6010 for (i = 0; flag && i < args->array_len; i++) {
6011 uint_t op;
6012
6013 op = (uint_t)args->array[i].argop;
6014
6015 if (op < rfsv4disp_cnt)
6016 flag &= rfsv4disptab[op].dis_flags;
6017 else
6018 flag = 0;
6019 }
6020 *flagp = flag;
6021 }
6022
6023 nfsstat4
6024 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6025 {
6026 nfsstat4 e;
6027
6028 rfs4_dbe_lock(cp->rc_dbe);
6029
6030 if (cp->rc_sysidt != LM_NOSYSID) {
6031 *sp = cp->rc_sysidt;
6032 e = NFS4_OK;
6033
6034 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6035 *sp = cp->rc_sysidt;
6036 e = NFS4_OK;
6037
6038 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6039 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6040 } else
6041 e = NFS4ERR_DELAY;
6042
6043 rfs4_dbe_unlock(cp->rc_dbe);
6044 return (e);
6045 }
6046
6047 #if defined(DEBUG) && ! defined(lint)
6048 static void lock_print(char *str, int operation, struct flock64 *flk)
6049 {
6050 char *op, *type;
6051
6052 switch (operation) {
6053 case F_GETLK: op = "F_GETLK";
6054 break;
6055 case F_SETLK: op = "F_SETLK";
6056 break;
6057 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6058 break;
6059 default: op = "F_UNKNOWN";
6060 break;
6061 }
6062 switch (flk->l_type) {
6063 case F_UNLCK: type = "F_UNLCK";
6064 break;
6065 case F_RDLCK: type = "F_RDLCK";
6066 break;
6067 case F_WRLCK: type = "F_WRLCK";
6068 break;
6069 default: type = "F_UNKNOWN";
6070 break;
6071 }
6072
6073 ASSERT(flk->l_whence == 0);
6074 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6075 str, op, type, (longlong_t)flk->l_start,
6076 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6077 }
6078
6079 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6080 #else
6081 #define LOCK_PRINT(d, s, t, f)
6082 #endif
6083
6084 /*ARGSUSED*/
6085 static bool_t
6086 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6087 {
6088 return (TRUE);
6089 }
6090
6091 /*
6092 * Look up the pathname using the vp in cs as the directory vnode.
6093 * cs->vp will be the vnode for the file on success
6094 */
6095
6096 static nfsstat4
6097 rfs4_lookup(component4 *component, struct svc_req *req,
6098 struct compound_state *cs)
6099 {
6100 char *nm;
6101 uint32_t len;
6102 nfsstat4 status;
6103 struct sockaddr *ca;
6104 char *name;
6105
6106 if (cs->vp == NULL) {
6107 return (NFS4ERR_NOFILEHANDLE);
6108 }
6109 if (cs->vp->v_type != VDIR) {
6110 return (NFS4ERR_NOTDIR);
6111 }
6112
6113 status = utf8_dir_verify(component);
6114 if (status != NFS4_OK)
6115 return (status);
6116
6117 nm = utf8_to_fn(component, &len, NULL);
6118 if (nm == NULL) {
6119 return (NFS4ERR_INVAL);
6120 }
6121
6122 if (len > MAXNAMELEN) {
6123 kmem_free(nm, len);
6124 return (NFS4ERR_NAMETOOLONG);
6125 }
6126
6127 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6128 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6129 MAXPATHLEN + 1);
6130
6131 if (name == NULL) {
6132 kmem_free(nm, len);
6133 return (NFS4ERR_INVAL);
6134 }
6135
6136 status = do_rfs4_op_lookup(name, req, cs);
6137
6138 if (name != nm)
6139 kmem_free(name, MAXPATHLEN + 1);
6140
6141 kmem_free(nm, len);
6142
6143 return (status);
6144 }
6145
6146 static nfsstat4
6147 rfs4_lookupfile(component4 *component, struct svc_req *req,
6148 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6149 {
6150 nfsstat4 status;
6151 vnode_t *dvp = cs->vp;
6152 vattr_t bva, ava, fva;
6153 int error;
6154
6155 /* Get "before" change value */
6156 bva.va_mask = AT_CTIME|AT_SEQ;
6157 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6158 if (error)
6159 return (puterrno4(error));
6160
6161 /* rfs4_lookup may VN_RELE directory */
6162 VN_HOLD(dvp);
6163
6164 status = rfs4_lookup(component, req, cs);
6165 if (status != NFS4_OK) {
6166 VN_RELE(dvp);
6167 return (status);
6168 }
6169
6170 /*
6171 * Get "after" change value, if it fails, simply return the
6172 * before value.
6173 */
6174 ava.va_mask = AT_CTIME|AT_SEQ;
6175 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6176 ava.va_ctime = bva.va_ctime;
6177 ava.va_seq = 0;
6178 }
6179 VN_RELE(dvp);
6180
6181 /*
6182 * Validate the file is a file
6183 */
6184 fva.va_mask = AT_TYPE|AT_MODE;
6185 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6186 if (error)
6187 return (puterrno4(error));
6188
6189 if (fva.va_type != VREG) {
6190 if (fva.va_type == VDIR)
6191 return (NFS4ERR_ISDIR);
6192 if (fva.va_type == VLNK)
6193 return (NFS4ERR_SYMLINK);
6194 return (NFS4ERR_INVAL);
6195 }
6196
6197 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6198 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6199
6200 /*
6201 * It is undefined if VOP_LOOKUP will change va_seq, so
6202 * cinfo.atomic = TRUE only if we have
6203 * non-zero va_seq's, and they have not changed.
6204 */
6205 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6206 cinfo->atomic = TRUE;
6207 else
6208 cinfo->atomic = FALSE;
6209
6210 /* Check for mandatory locking */
6211 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6212 return (check_open_access(access, cs, req));
6213 }
6214
6215 static nfsstat4
6216 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6217 cred_t *cr, vnode_t **vpp, bool_t *created)
6218 {
6219 int error;
6220 nfsstat4 status = NFS4_OK;
6221 vattr_t va;
6222
6223 tryagain:
6224
6225 /*
6226 * The file open mode used is VWRITE. If the client needs
6227 * some other semantic, then it should do the access checking
6228 * itself. It would have been nice to have the file open mode
6229 * passed as part of the arguments.
6230 */
6231
6232 *created = TRUE;
6233 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6234
6235 if (error) {
6236 *created = FALSE;
6237
6238 /*
6239 * If we got something other than file already exists
6240 * then just return this error. Otherwise, we got
6241 * EEXIST. If we were doing a GUARDED create, then
6242 * just return this error. Otherwise, we need to
6243 * make sure that this wasn't a duplicate of an
6244 * exclusive create request.
6245 *
6246 * The assumption is made that a non-exclusive create
6247 * request will never return EEXIST.
6248 */
6249
6250 if (error != EEXIST || mode == GUARDED4) {
6251 status = puterrno4(error);
6252 return (status);
6253 }
6254 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6255 NULL, NULL, NULL);
6256
6257 if (error) {
6258 /*
6259 * We couldn't find the file that we thought that
6260 * we just created. So, we'll just try creating
6261 * it again.
6262 */
6263 if (error == ENOENT)
6264 goto tryagain;
6265
6266 status = puterrno4(error);
6267 return (status);
6268 }
6269
6270 if (mode == UNCHECKED4) {
6271 /* existing object must be regular file */
6272 if ((*vpp)->v_type != VREG) {
6273 if ((*vpp)->v_type == VDIR)
6274 status = NFS4ERR_ISDIR;
6275 else if ((*vpp)->v_type == VLNK)
6276 status = NFS4ERR_SYMLINK;
6277 else
6278 status = NFS4ERR_INVAL;
6279 VN_RELE(*vpp);
6280 return (status);
6281 }
6282
6283 return (NFS4_OK);
6284 }
6285
6286 /* Check for duplicate request */
6287 va.va_mask = AT_MTIME;
6288 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6289 if (!error) {
6290 /* We found the file */
6291 const timestruc_t *mtime = &vap->va_mtime;
6292
6293 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6294 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6295 /* but its not our creation */
6296 VN_RELE(*vpp);
6297 return (NFS4ERR_EXIST);
6298 }
6299 *created = TRUE; /* retrans of create == created */
6300 return (NFS4_OK);
6301 }
6302 VN_RELE(*vpp);
6303 return (NFS4ERR_EXIST);
6304 }
6305
6306 return (NFS4_OK);
6307 }
6308
6309 static nfsstat4
6310 check_open_access(uint32_t access, struct compound_state *cs,
6311 struct svc_req *req)
6312 {
6313 int error;
6314 vnode_t *vp;
6315 bool_t readonly;
6316 cred_t *cr = cs->cr;
6317
6318 /* For now we don't allow mandatory locking as per V2/V3 */
6319 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6320 return (NFS4ERR_ACCESS);
6321 }
6322
6323 vp = cs->vp;
6324 ASSERT(cr != NULL && vp->v_type == VREG);
6325
6326 /*
6327 * If the file system is exported read only and we are trying
6328 * to open for write, then return NFS4ERR_ROFS
6329 */
6330
6331 readonly = rdonly4(req, cs);
6332
6333 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6334 return (NFS4ERR_ROFS);
6335
6336 if (access & OPEN4_SHARE_ACCESS_READ) {
6337 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6338 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6339 return (NFS4ERR_ACCESS);
6340 }
6341 }
6342
6343 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6344 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6345 if (error)
6346 return (NFS4ERR_ACCESS);
6347 }
6348
6349 return (NFS4_OK);
6350 }
6351
6352 static nfsstat4
6353 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6354 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6355 {
6356 struct nfs4_svgetit_arg sarg;
6357 struct nfs4_ntov_table ntov;
6358
6359 bool_t ntov_table_init = FALSE;
6360 struct statvfs64 sb;
6361 nfsstat4 status;
6362 vnode_t *vp;
6363 vattr_t bva, ava, iva, cva, *vap;
6364 vnode_t *dvp;
6365 timespec32_t *mtime;
6366 char *nm = NULL;
6367 uint_t buflen;
6368 bool_t created;
6369 bool_t setsize = FALSE;
6370 len_t reqsize;
6371 int error;
6372 bool_t trunc;
6373 caller_context_t ct;
6374 component4 *component;
6375 bslabel_t *clabel;
6376 struct sockaddr *ca;
6377 char *name = NULL;
6378
6379 sarg.sbp = &sb;
6380 sarg.is_referral = B_FALSE;
6381
6382 dvp = cs->vp;
6383
6384 /* Check if the file system is read only */
6385 if (rdonly4(req, cs))
6386 return (NFS4ERR_ROFS);
6387
6388 /* check the label of including directory */
6389 if (is_system_labeled()) {
6390 ASSERT(req->rq_label != NULL);
6391 clabel = req->rq_label;
6392 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6393 "got client label from request(1)",
6394 struct svc_req *, req);
6395 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6396 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6397 cs->exi)) {
6398 return (NFS4ERR_ACCESS);
6399 }
6400 }
6401 }
6402
6403 /*
6404 * Get the last component of path name in nm. cs will reference
6405 * the including directory on success.
6406 */
6407 component = &args->open_claim4_u.file;
6408 status = utf8_dir_verify(component);
6409 if (status != NFS4_OK)
6410 return (status);
6411
6412 nm = utf8_to_fn(component, &buflen, NULL);
6413
6414 if (nm == NULL)
6415 return (NFS4ERR_RESOURCE);
6416
6417 if (buflen > MAXNAMELEN) {
6418 kmem_free(nm, buflen);
6419 return (NFS4ERR_NAMETOOLONG);
6420 }
6421
6422 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6423 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6424 if (error) {
6425 kmem_free(nm, buflen);
6426 return (puterrno4(error));
6427 }
6428
6429 if (bva.va_type != VDIR) {
6430 kmem_free(nm, buflen);
6431 return (NFS4ERR_NOTDIR);
6432 }
6433
6434 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6435
6436 switch (args->mode) {
6437 case GUARDED4:
6438 /*FALLTHROUGH*/
6439 case UNCHECKED4:
6440 nfs4_ntov_table_init(&ntov);
6441 ntov_table_init = TRUE;
6442
6443 *attrset = 0;
6444 status = do_rfs4_set_attrs(attrset,
6445 &args->createhow4_u.createattrs,
6446 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6447
6448 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6449 sarg.vap->va_type != VREG) {
6450 if (sarg.vap->va_type == VDIR)
6451 status = NFS4ERR_ISDIR;
6452 else if (sarg.vap->va_type == VLNK)
6453 status = NFS4ERR_SYMLINK;
6454 else
6455 status = NFS4ERR_INVAL;
6456 }
6457
6458 if (status != NFS4_OK) {
6459 kmem_free(nm, buflen);
6460 nfs4_ntov_table_free(&ntov, &sarg);
6461 *attrset = 0;
6462 return (status);
6463 }
6464
6465 vap = sarg.vap;
6466 vap->va_type = VREG;
6467 vap->va_mask |= AT_TYPE;
6468
6469 if ((vap->va_mask & AT_MODE) == 0) {
6470 vap->va_mask |= AT_MODE;
6471 vap->va_mode = (mode_t)0600;
6472 }
6473
6474 if (vap->va_mask & AT_SIZE) {
6475
6476 /* Disallow create with a non-zero size */
6477
6478 if ((reqsize = sarg.vap->va_size) != 0) {
6479 kmem_free(nm, buflen);
6480 nfs4_ntov_table_free(&ntov, &sarg);
6481 *attrset = 0;
6482 return (NFS4ERR_INVAL);
6483 }
6484 setsize = TRUE;
6485 }
6486 break;
6487
6488 case EXCLUSIVE4:
6489 /* prohibit EXCL create of named attributes */
6490 if (dvp->v_flag & V_XATTRDIR) {
6491 kmem_free(nm, buflen);
6492 *attrset = 0;
6493 return (NFS4ERR_INVAL);
6494 }
6495
6496 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6497 cva.va_type = VREG;
6498 /*
6499 * Ensure no time overflows. Assumes underlying
6500 * filesystem supports at least 32 bits.
6501 * Truncate nsec to usec resolution to allow valid
6502 * compares even if the underlying filesystem truncates.
6503 */
6504 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6505 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6506 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6507 cva.va_mode = (mode_t)0;
6508 vap = &cva;
6509
6510 /*
6511 * For EXCL create, attrset is set to the server attr
6512 * used to cache the client's verifier.
6513 */
6514 *attrset = FATTR4_TIME_MODIFY_MASK;
6515 break;
6516 }
6517
6518 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6519 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6520 MAXPATHLEN + 1);
6521
6522 if (name == NULL) {
6523 kmem_free(nm, buflen);
6524 return (NFS4ERR_SERVERFAULT);
6525 }
6526
6527 status = create_vnode(dvp, name, vap, args->mode,
6528 cs->cr, &vp, &created);
6529 if (nm != name)
6530 kmem_free(name, MAXPATHLEN + 1);
6531 kmem_free(nm, buflen);
6532
6533 if (status != NFS4_OK) {
6534 if (ntov_table_init)
6535 nfs4_ntov_table_free(&ntov, &sarg);
6536 *attrset = 0;
6537 return (status);
6538 }
6539
6540 trunc = (setsize && !created);
6541
6542 if (args->mode != EXCLUSIVE4) {
6543 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6544
6545 /*
6546 * True verification that object was created with correct
6547 * attrs is impossible. The attrs could have been changed
6548 * immediately after object creation. If attributes did
6549 * not verify, the only recourse for the server is to
6550 * destroy the object. Maybe if some attrs (like gid)
6551 * are set incorrectly, the object should be destroyed;
6552 * however, seems bad as a default policy. Do we really
6553 * want to destroy an object over one of the times not
6554 * verifying correctly? For these reasons, the server
6555 * currently sets bits in attrset for createattrs
6556 * that were set; however, no verification is done.
6557 *
6558 * vmask_to_nmask accounts for vattr bits set on create
6559 * [do_rfs4_set_attrs() only sets resp bits for
6560 * non-vattr/vfs bits.]
6561 * Mask off any bits we set by default so as not to return
6562 * more attrset bits than were requested in createattrs
6563 */
6564 if (created) {
6565 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6566 *attrset &= createmask;
6567 } else {
6568 /*
6569 * We did not create the vnode (we tried but it
6570 * already existed). In this case, the only createattr
6571 * that the spec allows the server to set is size,
6572 * and even then, it can only be set if it is 0.
6573 */
6574 *attrset = 0;
6575 if (trunc)
6576 *attrset = FATTR4_SIZE_MASK;
6577 }
6578 }
6579 if (ntov_table_init)
6580 nfs4_ntov_table_free(&ntov, &sarg);
6581
6582 /*
6583 * Get the initial "after" sequence number, if it fails,
6584 * set to zero, time to before.
6585 */
6586 iva.va_mask = AT_CTIME|AT_SEQ;
6587 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6588 iva.va_seq = 0;
6589 iva.va_ctime = bva.va_ctime;
6590 }
6591
6592 /*
6593 * create_vnode attempts to create the file exclusive,
6594 * if it already exists the VOP_CREATE will fail and
6595 * may not increase va_seq. It is atomic if
6596 * we haven't changed the directory, but if it has changed
6597 * we don't know what changed it.
6598 */
6599 if (!created) {
6600 if (bva.va_seq && iva.va_seq &&
6601 bva.va_seq == iva.va_seq)
6602 cinfo->atomic = TRUE;
6603 else
6604 cinfo->atomic = FALSE;
6605 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6606 } else {
6607 /*
6608 * The entry was created, we need to sync the
6609 * directory metadata.
6610 */
6611 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6612
6613 /*
6614 * Get "after" change value, if it fails, simply return the
6615 * before value.
6616 */
6617 ava.va_mask = AT_CTIME|AT_SEQ;
6618 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6619 ava.va_ctime = bva.va_ctime;
6620 ava.va_seq = 0;
6621 }
6622
6623 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6624
6625 /*
6626 * The cinfo->atomic = TRUE only if we have
6627 * non-zero va_seq's, and it has incremented by exactly one
6628 * during the create_vnode and it didn't
6629 * change during the VOP_FSYNC.
6630 */
6631 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6632 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6633 cinfo->atomic = TRUE;
6634 else
6635 cinfo->atomic = FALSE;
6636 }
6637
6638 /* Check for mandatory locking and that the size gets set. */
6639 cva.va_mask = AT_MODE;
6640 if (setsize)
6641 cva.va_mask |= AT_SIZE;
6642
6643 /* Assume the worst */
6644 cs->mandlock = TRUE;
6645
6646 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6647 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6648
6649 /*
6650 * Truncate the file if necessary; this would be
6651 * the case for create over an existing file.
6652 */
6653
6654 if (trunc) {
6655 int in_crit = 0;
6656 rfs4_file_t *fp;
6657 nfs4_srv_t *nsrv4;
6658 bool_t create = FALSE;
6659
6660 /*
6661 * We are writing over an existing file.
6662 * Check to see if we need to recall a delegation.
6663 */
6664 nsrv4 = nfs4_get_srv();
6665 rfs4_hold_deleg_policy(nsrv4);
6666 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6667 if (rfs4_check_delegated_byfp(FWRITE, fp,
6668 (reqsize == 0), FALSE, FALSE, &clientid)) {
6669 rfs4_file_rele(fp);
6670 rfs4_rele_deleg_policy(nsrv4);
6671 VN_RELE(vp);
6672 *attrset = 0;
6673 return (NFS4ERR_DELAY);
6674 }
6675 rfs4_file_rele(fp);
6676 }
6677 rfs4_rele_deleg_policy(nsrv4);
6678
6679 if (nbl_need_check(vp)) {
6680 in_crit = 1;
6681
6682 ASSERT(reqsize == 0);
6683
6684 nbl_start_crit(vp, RW_READER);
6685 if (nbl_conflict(vp, NBL_WRITE, 0,
6686 cva.va_size, 0, NULL)) {
6687 in_crit = 0;
6688 nbl_end_crit(vp);
6689 VN_RELE(vp);
6690 *attrset = 0;
6691 return (NFS4ERR_ACCESS);
6692 }
6693 }
6694 ct.cc_sysid = 0;
6695 ct.cc_pid = 0;
6696 ct.cc_caller_id = nfs4_srv_caller_id;
6697 ct.cc_flags = CC_DONTBLOCK;
6698
6699 cva.va_mask = AT_SIZE;
6700 cva.va_size = reqsize;
6701 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6702 if (in_crit)
6703 nbl_end_crit(vp);
6704 }
6705 }
6706
6707 error = makefh4(&cs->fh, vp, cs->exi);
6708
6709 /*
6710 * Force modified data and metadata out to stable storage.
6711 */
6712 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6713
6714 if (error) {
6715 VN_RELE(vp);
6716 *attrset = 0;
6717 return (puterrno4(error));
6718 }
6719
6720 /* if parent dir is attrdir, set namedattr fh flag */
6721 if (dvp->v_flag & V_XATTRDIR)
6722 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6723
6724 if (cs->vp)
6725 VN_RELE(cs->vp);
6726
6727 cs->vp = vp;
6728
6729 /*
6730 * if we did not create the file, we will need to check
6731 * the access bits on the file
6732 */
6733
6734 if (!created) {
6735 if (setsize)
6736 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6737 status = check_open_access(args->share_access, cs, req);
6738 if (status != NFS4_OK)
6739 *attrset = 0;
6740 }
6741 return (status);
6742 }
6743
6744 /*ARGSUSED*/
6745 static void
6746 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6747 rfs4_openowner_t *oo, delegreq_t deleg,
6748 uint32_t access, uint32_t deny,
6749 OPEN4res *resp, int deleg_cur)
6750 {
6751 /* XXX Currently not using req */
6752 rfs4_state_t *sp;
6753 rfs4_file_t *fp;
6754 bool_t screate = TRUE;
6755 bool_t fcreate = TRUE;
6756 uint32_t open_a, share_a;
6757 uint32_t open_d, share_d;
6758 rfs4_deleg_state_t *dsp;
6759 sysid_t sysid;
6760 nfsstat4 status;
6761 caller_context_t ct;
6762 int fflags = 0;
6763 int recall = 0;
6764 int err;
6765 int first_open;
6766
6767 /* get the file struct and hold a lock on it during initial open */
6768 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6769 if (fp == NULL) {
6770 resp->status = NFS4ERR_RESOURCE;
6771 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6772 return;
6773 }
6774
6775 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6776 if (sp == NULL) {
6777 resp->status = NFS4ERR_RESOURCE;
6778 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6779 /* No need to keep any reference */
6780 rw_exit(&fp->rf_file_rwlock);
6781 rfs4_file_rele(fp);
6782 return;
6783 }
6784
6785 /* try to get the sysid before continuing */
6786 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6787 resp->status = status;
6788 rfs4_file_rele(fp);
6789 /* Not a fully formed open; "close" it */
6790 if (screate == TRUE)
6791 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6792 rfs4_state_rele(sp);
6793 return;
6794 }
6795
6796 /* Calculate the fflags for this OPEN. */
6797 if (access & OPEN4_SHARE_ACCESS_READ)
6798 fflags |= FREAD;
6799 if (access & OPEN4_SHARE_ACCESS_WRITE)
6800 fflags |= FWRITE;
6801
6802 rfs4_dbe_lock(sp->rs_dbe);
6803
6804 /*
6805 * Calculate the new deny and access mode that this open is adding to
6806 * the file for this open owner;
6807 */
6808 open_d = (deny & ~sp->rs_open_deny);
6809 open_a = (access & ~sp->rs_open_access);
6810
6811 /*
6812 * Calculate the new share access and share deny modes that this open
6813 * is adding to the file for this open owner;
6814 */
6815 share_a = (access & ~sp->rs_share_access);
6816 share_d = (deny & ~sp->rs_share_deny);
6817
6818 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6819
6820 /*
6821 * Check to see the client has already sent an open for this
6822 * open owner on this file with the same share/deny modes.
6823 * If so, we don't need to check for a conflict and we don't
6824 * need to add another shrlock. If not, then we need to
6825 * check for conflicts in deny and access before checking for
6826 * conflicts in delegation. We don't want to recall a
6827 * delegation based on an open that will eventually fail based
6828 * on shares modes.
6829 */
6830
6831 if (share_a || share_d) {
6832 if ((err = rfs4_share(sp, access, deny)) != 0) {
6833 rfs4_dbe_unlock(sp->rs_dbe);
6834 resp->status = err;
6835
6836 rfs4_file_rele(fp);
6837 /* Not a fully formed open; "close" it */
6838 if (screate == TRUE)
6839 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6840 rfs4_state_rele(sp);
6841 return;
6842 }
6843 }
6844
6845 rfs4_dbe_lock(fp->rf_dbe);
6846
6847 /*
6848 * Check to see if this file is delegated and if so, if a
6849 * recall needs to be done.
6850 */
6851 if (rfs4_check_recall(sp, access)) {
6852 rfs4_dbe_unlock(fp->rf_dbe);
6853 rfs4_dbe_unlock(sp->rs_dbe);
6854 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6855 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6856 rfs4_dbe_lock(sp->rs_dbe);
6857
6858 /* if state closed while lock was dropped */
6859 if (sp->rs_closed) {
6860 if (share_a || share_d)
6861 (void) rfs4_unshare(sp);
6862 rfs4_dbe_unlock(sp->rs_dbe);
6863 rfs4_file_rele(fp);
6864 /* Not a fully formed open; "close" it */
6865 if (screate == TRUE)
6866 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867 rfs4_state_rele(sp);
6868 resp->status = NFS4ERR_OLD_STATEID;
6869 return;
6870 }
6871
6872 rfs4_dbe_lock(fp->rf_dbe);
6873 /* Let's see if the delegation was returned */
6874 if (rfs4_check_recall(sp, access)) {
6875 rfs4_dbe_unlock(fp->rf_dbe);
6876 if (share_a || share_d)
6877 (void) rfs4_unshare(sp);
6878 rfs4_dbe_unlock(sp->rs_dbe);
6879 rfs4_file_rele(fp);
6880 rfs4_update_lease(sp->rs_owner->ro_client);
6881
6882 /* Not a fully formed open; "close" it */
6883 if (screate == TRUE)
6884 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6885 rfs4_state_rele(sp);
6886 resp->status = NFS4ERR_DELAY;
6887 return;
6888 }
6889 }
6890 /*
6891 * the share check passed and any delegation conflict has been
6892 * taken care of, now call vop_open.
6893 * if this is the first open then call vop_open with fflags.
6894 * if not, call vn_open_upgrade with just the upgrade flags.
6895 *
6896 * if the file has been opened already, it will have the current
6897 * access mode in the state struct. if it has no share access, then
6898 * this is a new open.
6899 *
6900 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6901 * call VOP_OPEN(), just do the open upgrade.
6902 */
6903 if (first_open && !deleg_cur) {
6904 ct.cc_sysid = sysid;
6905 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6906 ct.cc_caller_id = nfs4_srv_caller_id;
6907 ct.cc_flags = CC_DONTBLOCK;
6908 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6909 if (err) {
6910 rfs4_dbe_unlock(fp->rf_dbe);
6911 if (share_a || share_d)
6912 (void) rfs4_unshare(sp);
6913 rfs4_dbe_unlock(sp->rs_dbe);
6914 rfs4_file_rele(fp);
6915
6916 /* Not a fully formed open; "close" it */
6917 if (screate == TRUE)
6918 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6919 rfs4_state_rele(sp);
6920 /* check if a monitor detected a delegation conflict */
6921 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6922 resp->status = NFS4ERR_DELAY;
6923 else
6924 resp->status = NFS4ERR_SERVERFAULT;
6925 return;
6926 }
6927 } else { /* open upgrade */
6928 /*
6929 * calculate the fflags for the new mode that is being added
6930 * by this upgrade.
6931 */
6932 fflags = 0;
6933 if (open_a & OPEN4_SHARE_ACCESS_READ)
6934 fflags |= FREAD;
6935 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6936 fflags |= FWRITE;
6937 vn_open_upgrade(cs->vp, fflags);
6938 }
6939 sp->rs_open_access |= access;
6940 sp->rs_open_deny |= deny;
6941
6942 if (open_d & OPEN4_SHARE_DENY_READ)
6943 fp->rf_deny_read++;
6944 if (open_d & OPEN4_SHARE_DENY_WRITE)
6945 fp->rf_deny_write++;
6946 fp->rf_share_deny |= deny;
6947
6948 if (open_a & OPEN4_SHARE_ACCESS_READ)
6949 fp->rf_access_read++;
6950 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6951 fp->rf_access_write++;
6952 fp->rf_share_access |= access;
6953
6954 /*
6955 * Check for delegation here. if the deleg argument is not
6956 * DELEG_ANY, then this is a reclaim from a client and
6957 * we must honor the delegation requested. If necessary we can
6958 * set the recall flag.
6959 */
6960
6961 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6962
6963 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6964
6965 next_stateid(&sp->rs_stateid);
6966
6967 resp->stateid = sp->rs_stateid.stateid;
6968
6969 rfs4_dbe_unlock(fp->rf_dbe);
6970 rfs4_dbe_unlock(sp->rs_dbe);
6971
6972 if (dsp) {
6973 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6974 rfs4_deleg_state_rele(dsp);
6975 }
6976
6977 rfs4_file_rele(fp);
6978 rfs4_state_rele(sp);
6979
6980 resp->status = NFS4_OK;
6981 }
6982
6983 /*ARGSUSED*/
6984 static void
6985 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6986 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6987 {
6988 change_info4 *cinfo = &resp->cinfo;
6989 bitmap4 *attrset = &resp->attrset;
6990
6991 if (args->opentype == OPEN4_NOCREATE)
6992 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6993 req, cs, args->share_access, cinfo);
6994 else {
6995 /* inhibit delegation grants during exclusive create */
6996
6997 if (args->mode == EXCLUSIVE4)
6998 rfs4_disable_delegation();
6999
7000 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7001 oo->ro_client->rc_clientid);
7002 }
7003
7004 if (resp->status == NFS4_OK) {
7005
7006 /* cs->vp cs->fh now reference the desired file */
7007
7008 rfs4_do_open(cs, req, oo,
7009 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7010 args->share_access, args->share_deny, resp, 0);
7011
7012 /*
7013 * If rfs4_createfile set attrset, we must
7014 * clear this attrset before the response is copied.
7015 */
7016 if (resp->status != NFS4_OK && resp->attrset) {
7017 resp->attrset = 0;
7018 }
7019 }
7020 else
7021 *cs->statusp = resp->status;
7022
7023 if (args->mode == EXCLUSIVE4)
7024 rfs4_enable_delegation();
7025 }
7026
7027 /*ARGSUSED*/
7028 static void
7029 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7030 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7031 {
7032 change_info4 *cinfo = &resp->cinfo;
7033 vattr_t va;
7034 vtype_t v_type = cs->vp->v_type;
7035 int error = 0;
7036
7037 /* Verify that we have a regular file */
7038 if (v_type != VREG) {
7039 if (v_type == VDIR)
7040 resp->status = NFS4ERR_ISDIR;
7041 else if (v_type == VLNK)
7042 resp->status = NFS4ERR_SYMLINK;
7043 else
7044 resp->status = NFS4ERR_INVAL;
7045 return;
7046 }
7047
7048 va.va_mask = AT_MODE|AT_UID;
7049 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7050 if (error) {
7051 resp->status = puterrno4(error);
7052 return;
7053 }
7054
7055 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7056
7057 /*
7058 * Check if we have access to the file, Note the the file
7059 * could have originally been open UNCHECKED or GUARDED
7060 * with mode bits that will now fail, but there is nothing
7061 * we can really do about that except in the case that the
7062 * owner of the file is the one requesting the open.
7063 */
7064 if (crgetuid(cs->cr) != va.va_uid) {
7065 resp->status = check_open_access(args->share_access, cs, req);
7066 if (resp->status != NFS4_OK) {
7067 return;
7068 }
7069 }
7070
7071 /*
7072 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7073 */
7074 cinfo->before = 0;
7075 cinfo->after = 0;
7076 cinfo->atomic = FALSE;
7077
7078 rfs4_do_open(cs, req, oo,
7079 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7080 args->share_access, args->share_deny, resp, 0);
7081 }
7082
7083 static void
7084 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7085 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7086 {
7087 int error;
7088 nfsstat4 status;
7089 stateid4 stateid =
7090 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7091 rfs4_deleg_state_t *dsp;
7092
7093 /*
7094 * Find the state info from the stateid and confirm that the
7095 * file is delegated. If the state openowner is the same as
7096 * the supplied openowner we're done. If not, get the file
7097 * info from the found state info. Use that file info to
7098 * create the state for this lock owner. Note solaris doen't
7099 * really need the pathname to find the file. We may want to
7100 * lookup the pathname and make sure that the vp exist and
7101 * matches the vp in the file structure. However it is
7102 * possible that the pathname nolonger exists (local process
7103 * unlinks the file), so this may not be that useful.
7104 */
7105
7106 status = rfs4_get_deleg_state(&stateid, &dsp);
7107 if (status != NFS4_OK) {
7108 resp->status = status;
7109 return;
7110 }
7111
7112 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7113
7114 /*
7115 * New lock owner, create state. Since this was probably called
7116 * in response to a CB_RECALL we set deleg to DELEG_NONE
7117 */
7118
7119 ASSERT(cs->vp != NULL);
7120 VN_RELE(cs->vp);
7121 VN_HOLD(dsp->rds_finfo->rf_vp);
7122 cs->vp = dsp->rds_finfo->rf_vp;
7123
7124 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7125 rfs4_deleg_state_rele(dsp);
7126 *cs->statusp = resp->status = puterrno4(error);
7127 return;
7128 }
7129
7130 /* Mark progress for delegation returns */
7131 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7132 rfs4_deleg_state_rele(dsp);
7133 rfs4_do_open(cs, req, oo, DELEG_NONE,
7134 args->share_access, args->share_deny, resp, 1);
7135 }
7136
7137 /*ARGSUSED*/
7138 static void
7139 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7140 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7141 {
7142 /*
7143 * Lookup the pathname, it must already exist since this file
7144 * was delegated.
7145 *
7146 * Find the file and state info for this vp and open owner pair.
7147 * check that they are in fact delegated.
7148 * check that the state access and deny modes are the same.
7149 *
7150 * Return the delgation possibly seting the recall flag.
7151 */
7152 rfs4_file_t *fp;
7153 rfs4_state_t *sp;
7154 bool_t create = FALSE;
7155 bool_t dcreate = FALSE;
7156 rfs4_deleg_state_t *dsp;
7157 nfsace4 *ace;
7158
7159 /* Note we ignore oflags */
7160 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7161 req, cs, args->share_access, &resp->cinfo);
7162
7163 if (resp->status != NFS4_OK) {
7164 return;
7165 }
7166
7167 /* get the file struct and hold a lock on it during initial open */
7168 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7169 if (fp == NULL) {
7170 resp->status = NFS4ERR_RESOURCE;
7171 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7172 return;
7173 }
7174
7175 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7176 if (sp == NULL) {
7177 resp->status = NFS4ERR_SERVERFAULT;
7178 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7179 rw_exit(&fp->rf_file_rwlock);
7180 rfs4_file_rele(fp);
7181 return;
7182 }
7183
7184 rfs4_dbe_lock(sp->rs_dbe);
7185 rfs4_dbe_lock(fp->rf_dbe);
7186 if (args->share_access != sp->rs_share_access ||
7187 args->share_deny != sp->rs_share_deny ||
7188 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7189 NFS4_DEBUG(rfs4_debug,
7190 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7191 rfs4_dbe_unlock(fp->rf_dbe);
7192 rfs4_dbe_unlock(sp->rs_dbe);
7193 rfs4_file_rele(fp);
7194 rfs4_state_rele(sp);
7195 resp->status = NFS4ERR_SERVERFAULT;
7196 return;
7197 }
7198 rfs4_dbe_unlock(fp->rf_dbe);
7199 rfs4_dbe_unlock(sp->rs_dbe);
7200
7201 dsp = rfs4_finddeleg(sp, &dcreate);
7202 if (dsp == NULL) {
7203 rfs4_state_rele(sp);
7204 rfs4_file_rele(fp);
7205 resp->status = NFS4ERR_SERVERFAULT;
7206 return;
7207 }
7208
7209 next_stateid(&sp->rs_stateid);
7210
7211 resp->stateid = sp->rs_stateid.stateid;
7212
7213 resp->delegation.delegation_type = dsp->rds_dtype;
7214
7215 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7216 open_read_delegation4 *rv =
7217 &resp->delegation.open_delegation4_u.read;
7218
7219 rv->stateid = dsp->rds_delegid.stateid;
7220 rv->recall = FALSE; /* no policy in place to set to TRUE */
7221 ace = &rv->permissions;
7222 } else {
7223 open_write_delegation4 *rv =
7224 &resp->delegation.open_delegation4_u.write;
7225
7226 rv->stateid = dsp->rds_delegid.stateid;
7227 rv->recall = FALSE; /* no policy in place to set to TRUE */
7228 ace = &rv->permissions;
7229 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7230 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7231 }
7232
7233 /* XXX For now */
7234 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7235 ace->flag = 0;
7236 ace->access_mask = 0;
7237 ace->who.utf8string_len = 0;
7238 ace->who.utf8string_val = 0;
7239
7240 rfs4_deleg_state_rele(dsp);
7241 rfs4_state_rele(sp);
7242 rfs4_file_rele(fp);
7243 }
7244
7245 typedef enum {
7246 NFS4_CHKSEQ_OKAY = 0,
7247 NFS4_CHKSEQ_REPLAY = 1,
7248 NFS4_CHKSEQ_BAD = 2
7249 } rfs4_chkseq_t;
7250
7251 /*
7252 * Generic function for sequence number checks.
7253 */
7254 static rfs4_chkseq_t
7255 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7256 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7257 {
7258 /* Same sequence ids and matching operations? */
7259 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7260 if (copyres == TRUE) {
7261 rfs4_free_reply(resop);
7262 rfs4_copy_reply(resop, lastop);
7263 }
7264 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7265 "Replayed SEQID %d\n", seqid));
7266 return (NFS4_CHKSEQ_REPLAY);
7267 }
7268
7269 /* If the incoming sequence is not the next expected then it is bad */
7270 if (rqst_seq != seqid + 1) {
7271 if (rqst_seq == seqid) {
7272 NFS4_DEBUG(rfs4_debug,
7273 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7274 "but last op was %d current op is %d\n",
7275 lastop->resop, resop->resop));
7276 return (NFS4_CHKSEQ_BAD);
7277 }
7278 NFS4_DEBUG(rfs4_debug,
7279 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7280 rqst_seq, seqid));
7281 return (NFS4_CHKSEQ_BAD);
7282 }
7283
7284 /* Everything okay -- next expected */
7285 return (NFS4_CHKSEQ_OKAY);
7286 }
7287
7288
7289 static rfs4_chkseq_t
7290 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7291 {
7292 rfs4_chkseq_t rc;
7293
7294 rfs4_dbe_lock(op->ro_dbe);
7295 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7296 TRUE);
7297 rfs4_dbe_unlock(op->ro_dbe);
7298
7299 if (rc == NFS4_CHKSEQ_OKAY)
7300 rfs4_update_lease(op->ro_client);
7301
7302 return (rc);
7303 }
7304
7305 static rfs4_chkseq_t
7306 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7307 {
7308 rfs4_chkseq_t rc;
7309
7310 rfs4_dbe_lock(op->ro_dbe);
7311 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7312 olo_seqid, resop, FALSE);
7313 rfs4_dbe_unlock(op->ro_dbe);
7314
7315 return (rc);
7316 }
7317
7318 static rfs4_chkseq_t
7319 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7320 {
7321 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7322
7323 rfs4_dbe_lock(lsp->rls_dbe);
7324 if (!lsp->rls_skip_seqid_check)
7325 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7326 resop, TRUE);
7327 rfs4_dbe_unlock(lsp->rls_dbe);
7328
7329 return (rc);
7330 }
7331
7332 static void
7333 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7334 struct svc_req *req, struct compound_state *cs)
7335 {
7336 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7337 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7338 open_owner4 *owner = &args->owner;
7339 open_claim_type4 claim = args->claim;
7340 rfs4_client_t *cp;
7341 rfs4_openowner_t *oo;
7342 bool_t create;
7343 bool_t replay = FALSE;
7344 int can_reclaim;
7345
7346 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7347 OPEN4args *, args);
7348
7349 if (cs->vp == NULL) {
7350 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7351 goto end;
7352 }
7353
7354 /*
7355 * Need to check clientid and lease expiration first based on
7356 * error ordering and incrementing sequence id.
7357 */
7358 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7359 if (cp == NULL) {
7360 *cs->statusp = resp->status =
7361 rfs4_check_clientid(&owner->clientid, 0);
7362 goto end;
7363 }
7364
7365 if (rfs4_lease_expired(cp)) {
7366 rfs4_client_close(cp);
7367 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7368 goto end;
7369 }
7370 can_reclaim = cp->rc_can_reclaim;
7371
7372 /*
7373 * Find the open_owner for use from this point forward. Take
7374 * care in updating the sequence id based on the type of error
7375 * being returned.
7376 */
7377 retry:
7378 create = TRUE;
7379 oo = rfs4_findopenowner(owner, &create, args->seqid);
7380 if (oo == NULL) {
7381 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7382 rfs4_client_rele(cp);
7383 goto end;
7384 }
7385
7386 /* Hold off access to the sequence space while the open is done */
7387 rfs4_sw_enter(&oo->ro_sw);
7388
7389 /*
7390 * If the open_owner existed before at the server, then check
7391 * the sequence id.
7392 */
7393 if (!create && !oo->ro_postpone_confirm) {
7394 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7395 case NFS4_CHKSEQ_BAD:
7396 if ((args->seqid > oo->ro_open_seqid) &&
7397 oo->ro_need_confirm) {
7398 rfs4_free_opens(oo, TRUE, FALSE);
7399 rfs4_sw_exit(&oo->ro_sw);
7400 rfs4_openowner_rele(oo);
7401 goto retry;
7402 }
7403 resp->status = NFS4ERR_BAD_SEQID;
7404 goto out;
7405 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7406 replay = TRUE;
7407 goto out;
7408 default:
7409 break;
7410 }
7411
7412 /*
7413 * Sequence was ok and open owner exists
7414 * check to see if we have yet to see an
7415 * open_confirm.
7416 */
7417 if (oo->ro_need_confirm) {
7418 rfs4_free_opens(oo, TRUE, FALSE);
7419 rfs4_sw_exit(&oo->ro_sw);
7420 rfs4_openowner_rele(oo);
7421 goto retry;
7422 }
7423 }
7424 /* Grace only applies to regular-type OPENs */
7425 if (rfs4_clnt_in_grace(cp) &&
7426 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7427 *cs->statusp = resp->status = NFS4ERR_GRACE;
7428 goto out;
7429 }
7430
7431 /*
7432 * If previous state at the server existed then can_reclaim
7433 * will be set. If not reply NFS4ERR_NO_GRACE to the
7434 * client.
7435 */
7436 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7437 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7438 goto out;
7439 }
7440
7441
7442 /*
7443 * Reject the open if the client has missed the grace period
7444 */
7445 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7446 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7447 goto out;
7448 }
7449
7450 /* Couple of up-front bookkeeping items */
7451 if (oo->ro_need_confirm) {
7452 /*
7453 * If this is a reclaim OPEN then we should not ask
7454 * for a confirmation of the open_owner per the
7455 * protocol specification.
7456 */
7457 if (claim == CLAIM_PREVIOUS)
7458 oo->ro_need_confirm = FALSE;
7459 else
7460 resp->rflags |= OPEN4_RESULT_CONFIRM;
7461 }
7462 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7463
7464 /*
7465 * If there is an unshared filesystem mounted on this vnode,
7466 * do not allow to open/create in this directory.
7467 */
7468 if (vn_ismntpt(cs->vp)) {
7469 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7470 goto out;
7471 }
7472
7473 /*
7474 * access must READ, WRITE, or BOTH. No access is invalid.
7475 * deny can be READ, WRITE, BOTH, or NONE.
7476 * bits not defined for access/deny are invalid.
7477 */
7478 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7479 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7480 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7481 *cs->statusp = resp->status = NFS4ERR_INVAL;
7482 goto out;
7483 }
7484
7485
7486 /*
7487 * make sure attrset is zero before response is built.
7488 */
7489 resp->attrset = 0;
7490
7491 switch (claim) {
7492 case CLAIM_NULL:
7493 rfs4_do_opennull(cs, req, args, oo, resp);
7494 break;
7495 case CLAIM_PREVIOUS:
7496 rfs4_do_openprev(cs, req, args, oo, resp);
7497 break;
7498 case CLAIM_DELEGATE_CUR:
7499 rfs4_do_opendelcur(cs, req, args, oo, resp);
7500 break;
7501 case CLAIM_DELEGATE_PREV:
7502 rfs4_do_opendelprev(cs, req, args, oo, resp);
7503 break;
7504 default:
7505 resp->status = NFS4ERR_INVAL;
7506 break;
7507 }
7508
7509 out:
7510 rfs4_client_rele(cp);
7511
7512 /* Catch sequence id handling here to make it a little easier */
7513 switch (resp->status) {
7514 case NFS4ERR_BADXDR:
7515 case NFS4ERR_BAD_SEQID:
7516 case NFS4ERR_BAD_STATEID:
7517 case NFS4ERR_NOFILEHANDLE:
7518 case NFS4ERR_RESOURCE:
7519 case NFS4ERR_STALE_CLIENTID:
7520 case NFS4ERR_STALE_STATEID:
7521 /*
7522 * The protocol states that if any of these errors are
7523 * being returned, the sequence id should not be
7524 * incremented. Any other return requires an
7525 * increment.
7526 */
7527 break;
7528 default:
7529 /* Always update the lease in this case */
7530 rfs4_update_lease(oo->ro_client);
7531
7532 /* Regular response - copy the result */
7533 if (!replay)
7534 rfs4_update_open_resp(oo, resop, &cs->fh);
7535
7536 /*
7537 * REPLAY case: Only if the previous response was OK
7538 * do we copy the filehandle. If not OK, no
7539 * filehandle to copy.
7540 */
7541 if (replay == TRUE &&
7542 resp->status == NFS4_OK &&
7543 oo->ro_reply_fh.nfs_fh4_val) {
7544 /*
7545 * If this is a replay, we must restore the
7546 * current filehandle/vp to that of what was
7547 * returned originally. Try our best to do
7548 * it.
7549 */
7550 nfs_fh4_fmt_t *fh_fmtp =
7551 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7552
7553 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7554 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7555
7556 if (cs->exi == NULL) {
7557 resp->status = NFS4ERR_STALE;
7558 goto finish;
7559 }
7560
7561 VN_RELE(cs->vp);
7562
7563 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7564 &resp->status);
7565
7566 if (cs->vp == NULL)
7567 goto finish;
7568
7569 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7570 }
7571
7572 /*
7573 * If this was a replay, no need to update the
7574 * sequence id. If the open_owner was not created on
7575 * this pass, then update. The first use of an
7576 * open_owner will not bump the sequence id.
7577 */
7578 if (replay == FALSE && !create)
7579 rfs4_update_open_sequence(oo);
7580 /*
7581 * If the client is receiving an error and the
7582 * open_owner needs to be confirmed, there is no way
7583 * to notify the client of this fact ignoring the fact
7584 * that the server has no method of returning a
7585 * stateid to confirm. Therefore, the server needs to
7586 * mark this open_owner in a way as to avoid the
7587 * sequence id checking the next time the client uses
7588 * this open_owner.
7589 */
7590 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7591 oo->ro_postpone_confirm = TRUE;
7592 /*
7593 * If OK response then clear the postpone flag and
7594 * reset the sequence id to keep in sync with the
7595 * client.
7596 */
7597 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7598 oo->ro_postpone_confirm = FALSE;
7599 oo->ro_open_seqid = args->seqid;
7600 }
7601 break;
7602 }
7603
7604 finish:
7605 *cs->statusp = resp->status;
7606
7607 rfs4_sw_exit(&oo->ro_sw);
7608 rfs4_openowner_rele(oo);
7609
7610 end:
7611 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7612 OPEN4res *, resp);
7613 }
7614
7615 /*ARGSUSED*/
7616 void
7617 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7618 struct svc_req *req, struct compound_state *cs)
7619 {
7620 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7621 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7622 rfs4_state_t *sp;
7623 nfsstat4 status;
7624
7625 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7626 OPEN_CONFIRM4args *, args);
7627
7628 if (cs->vp == NULL) {
7629 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7630 goto out;
7631 }
7632
7633 if (cs->vp->v_type != VREG) {
7634 *cs->statusp = resp->status =
7635 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7636 return;
7637 }
7638
7639 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7640 if (status != NFS4_OK) {
7641 *cs->statusp = resp->status = status;
7642 goto out;
7643 }
7644
7645 /* Ensure specified filehandle matches */
7646 if (cs->vp != sp->rs_finfo->rf_vp) {
7647 rfs4_state_rele(sp);
7648 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7649 goto out;
7650 }
7651
7652 /* hold off other access to open_owner while we tinker */
7653 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7654
7655 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7656 case NFS4_CHECK_STATEID_OKAY:
7657 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7658 resop) != 0) {
7659 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7660 break;
7661 }
7662 /*
7663 * If it is the appropriate stateid and determined to
7664 * be "OKAY" then this means that the stateid does not
7665 * need to be confirmed and the client is in error for
7666 * sending an OPEN_CONFIRM.
7667 */
7668 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7669 break;
7670 case NFS4_CHECK_STATEID_OLD:
7671 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7672 break;
7673 case NFS4_CHECK_STATEID_BAD:
7674 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7675 break;
7676 case NFS4_CHECK_STATEID_EXPIRED:
7677 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7678 break;
7679 case NFS4_CHECK_STATEID_CLOSED:
7680 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7681 break;
7682 case NFS4_CHECK_STATEID_REPLAY:
7683 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7684 resop)) {
7685 case NFS4_CHKSEQ_OKAY:
7686 /*
7687 * This is replayed stateid; if seqid matches
7688 * next expected, then client is using wrong seqid.
7689 */
7690 /* fall through */
7691 case NFS4_CHKSEQ_BAD:
7692 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7693 break;
7694 case NFS4_CHKSEQ_REPLAY:
7695 /*
7696 * Note this case is the duplicate case so
7697 * resp->status is already set.
7698 */
7699 *cs->statusp = resp->status;
7700 rfs4_update_lease(sp->rs_owner->ro_client);
7701 break;
7702 }
7703 break;
7704 case NFS4_CHECK_STATEID_UNCONFIRMED:
7705 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7706 resop) != NFS4_CHKSEQ_OKAY) {
7707 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7708 break;
7709 }
7710 *cs->statusp = resp->status = NFS4_OK;
7711
7712 next_stateid(&sp->rs_stateid);
7713 resp->open_stateid = sp->rs_stateid.stateid;
7714 sp->rs_owner->ro_need_confirm = FALSE;
7715 rfs4_update_lease(sp->rs_owner->ro_client);
7716 rfs4_update_open_sequence(sp->rs_owner);
7717 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7718 break;
7719 default:
7720 ASSERT(FALSE);
7721 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7722 break;
7723 }
7724 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7725 rfs4_state_rele(sp);
7726
7727 out:
7728 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7729 OPEN_CONFIRM4res *, resp);
7730 }
7731
7732 /*ARGSUSED*/
7733 void
7734 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7735 struct svc_req *req, struct compound_state *cs)
7736 {
7737 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7738 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7739 uint32_t access = args->share_access;
7740 uint32_t deny = args->share_deny;
7741 nfsstat4 status;
7742 rfs4_state_t *sp;
7743 rfs4_file_t *fp;
7744 int fflags = 0;
7745
7746 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7747 OPEN_DOWNGRADE4args *, args);
7748
7749 if (cs->vp == NULL) {
7750 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7751 goto out;
7752 }
7753
7754 if (cs->vp->v_type != VREG) {
7755 *cs->statusp = resp->status = NFS4ERR_INVAL;
7756 return;
7757 }
7758
7759 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7760 if (status != NFS4_OK) {
7761 *cs->statusp = resp->status = status;
7762 goto out;
7763 }
7764
7765 /* Ensure specified filehandle matches */
7766 if (cs->vp != sp->rs_finfo->rf_vp) {
7767 rfs4_state_rele(sp);
7768 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7769 goto out;
7770 }
7771
7772 /* hold off other access to open_owner while we tinker */
7773 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7774
7775 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7776 case NFS4_CHECK_STATEID_OKAY:
7777 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7778 resop) != NFS4_CHKSEQ_OKAY) {
7779 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7780 goto end;
7781 }
7782 break;
7783 case NFS4_CHECK_STATEID_OLD:
7784 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7785 goto end;
7786 case NFS4_CHECK_STATEID_BAD:
7787 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7788 goto end;
7789 case NFS4_CHECK_STATEID_EXPIRED:
7790 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7791 goto end;
7792 case NFS4_CHECK_STATEID_CLOSED:
7793 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7794 goto end;
7795 case NFS4_CHECK_STATEID_UNCONFIRMED:
7796 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7797 goto end;
7798 case NFS4_CHECK_STATEID_REPLAY:
7799 /* Check the sequence id for the open owner */
7800 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7801 resop)) {
7802 case NFS4_CHKSEQ_OKAY:
7803 /*
7804 * This is replayed stateid; if seqid matches
7805 * next expected, then client is using wrong seqid.
7806 */
7807 /* fall through */
7808 case NFS4_CHKSEQ_BAD:
7809 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7810 goto end;
7811 case NFS4_CHKSEQ_REPLAY:
7812 /*
7813 * Note this case is the duplicate case so
7814 * resp->status is already set.
7815 */
7816 *cs->statusp = resp->status;
7817 rfs4_update_lease(sp->rs_owner->ro_client);
7818 goto end;
7819 }
7820 break;
7821 default:
7822 ASSERT(FALSE);
7823 break;
7824 }
7825
7826 rfs4_dbe_lock(sp->rs_dbe);
7827 /*
7828 * Check that the new access modes and deny modes are valid.
7829 * Check that no invalid bits are set.
7830 */
7831 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7832 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7833 *cs->statusp = resp->status = NFS4ERR_INVAL;
7834 rfs4_update_open_sequence(sp->rs_owner);
7835 rfs4_dbe_unlock(sp->rs_dbe);
7836 goto end;
7837 }
7838
7839 /*
7840 * The new modes must be a subset of the current modes and
7841 * the access must specify at least one mode. To test that
7842 * the new mode is a subset of the current modes we bitwise
7843 * AND them together and check that the result equals the new
7844 * mode. For example:
7845 * New mode, access == R and current mode, sp->rs_open_access == RW
7846 * access & sp->rs_open_access == R == access, so the new access mode
7847 * is valid. Consider access == RW, sp->rs_open_access = R
7848 * access & sp->rs_open_access == R != access, so the new access mode
7849 * is invalid.
7850 */
7851 if ((access & sp->rs_open_access) != access ||
7852 (deny & sp->rs_open_deny) != deny ||
7853 (access &
7854 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7855 *cs->statusp = resp->status = NFS4ERR_INVAL;
7856 rfs4_update_open_sequence(sp->rs_owner);
7857 rfs4_dbe_unlock(sp->rs_dbe);
7858 goto end;
7859 }
7860
7861 /*
7862 * Release any share locks associated with this stateID.
7863 * Strictly speaking, this violates the spec because the
7864 * spec effectively requires that open downgrade be atomic.
7865 * At present, fs_shrlock does not have this capability.
7866 */
7867 (void) rfs4_unshare(sp);
7868
7869 status = rfs4_share(sp, access, deny);
7870 if (status != NFS4_OK) {
7871 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7872 rfs4_update_open_sequence(sp->rs_owner);
7873 rfs4_dbe_unlock(sp->rs_dbe);
7874 goto end;
7875 }
7876
7877 fp = sp->rs_finfo;
7878 rfs4_dbe_lock(fp->rf_dbe);
7879
7880 /*
7881 * If the current mode has deny read and the new mode
7882 * does not, decrement the number of deny read mode bits
7883 * and if it goes to zero turn off the deny read bit
7884 * on the file.
7885 */
7886 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7887 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7888 fp->rf_deny_read--;
7889 if (fp->rf_deny_read == 0)
7890 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7891 }
7892
7893 /*
7894 * If the current mode has deny write and the new mode
7895 * does not, decrement the number of deny write mode bits
7896 * and if it goes to zero turn off the deny write bit
7897 * on the file.
7898 */
7899 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7900 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7901 fp->rf_deny_write--;
7902 if (fp->rf_deny_write == 0)
7903 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7904 }
7905
7906 /*
7907 * If the current mode has access read and the new mode
7908 * does not, decrement the number of access read mode bits
7909 * and if it goes to zero turn off the access read bit
7910 * on the file. set fflags to FREAD for the call to
7911 * vn_open_downgrade().
7912 */
7913 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7914 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7915 fp->rf_access_read--;
7916 if (fp->rf_access_read == 0)
7917 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7918 fflags |= FREAD;
7919 }
7920
7921 /*
7922 * If the current mode has access write and the new mode
7923 * does not, decrement the number of access write mode bits
7924 * and if it goes to zero turn off the access write bit
7925 * on the file. set fflags to FWRITE for the call to
7926 * vn_open_downgrade().
7927 */
7928 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7929 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7930 fp->rf_access_write--;
7931 if (fp->rf_access_write == 0)
7932 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7933 fflags |= FWRITE;
7934 }
7935
7936 /* Check that the file is still accessible */
7937 ASSERT(fp->rf_share_access);
7938
7939 rfs4_dbe_unlock(fp->rf_dbe);
7940
7941 /* now set the new open access and deny modes */
7942 sp->rs_open_access = access;
7943 sp->rs_open_deny = deny;
7944
7945 /*
7946 * we successfully downgraded the share lock, now we need to downgrade
7947 * the open. it is possible that the downgrade was only for a deny
7948 * mode and we have nothing else to do.
7949 */
7950 if ((fflags & (FREAD|FWRITE)) != 0)
7951 vn_open_downgrade(cs->vp, fflags);
7952
7953 /* Update the stateid */
7954 next_stateid(&sp->rs_stateid);
7955 resp->open_stateid = sp->rs_stateid.stateid;
7956
7957 rfs4_dbe_unlock(sp->rs_dbe);
7958
7959 *cs->statusp = resp->status = NFS4_OK;
7960 /* Update the lease */
7961 rfs4_update_lease(sp->rs_owner->ro_client);
7962 /* And the sequence */
7963 rfs4_update_open_sequence(sp->rs_owner);
7964 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7965
7966 end:
7967 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7968 rfs4_state_rele(sp);
7969 out:
7970 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7971 OPEN_DOWNGRADE4res *, resp);
7972 }
7973
7974 static void *
7975 memstr(const void *s1, const char *s2, size_t n)
7976 {
7977 size_t l = strlen(s2);
7978 char *p = (char *)s1;
7979
7980 while (n >= l) {
7981 if (bcmp(p, s2, l) == 0)
7982 return (p);
7983 p++;
7984 n--;
7985 }
7986
7987 return (NULL);
7988 }
7989
7990 /*
7991 * The logic behind this function is detailed in the NFSv4 RFC in the
7992 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7993 * that section for explicit guidance to server behavior for
7994 * SETCLIENTID.
7995 */
7996 void
7997 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7998 struct svc_req *req, struct compound_state *cs)
7999 {
8000 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8001 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8002 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8003 rfs4_clntip_t *ci;
8004 bool_t create;
8005 char *addr, *netid;
8006 int len;
8007
8008 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8009 SETCLIENTID4args *, args);
8010 retry:
8011 newcp = cp_confirmed = cp_unconfirmed = NULL;
8012
8013 /*
8014 * Save the caller's IP address
8015 */
8016 args->client.cl_addr =
8017 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8018
8019 /*
8020 * Record if it is a Solaris client that cannot handle referrals.
8021 */
8022 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8023 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8024 /* Add a "yes, it's downrev" record */
8025 create = TRUE;
8026 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8027 ASSERT(ci != NULL);
8028 rfs4_dbe_rele(ci->ri_dbe);
8029 } else {
8030 /* Remove any previous record */
8031 rfs4_invalidate_clntip(args->client.cl_addr);
8032 }
8033
8034 /*
8035 * In search of an EXISTING client matching the incoming
8036 * request to establish a new client identifier at the server
8037 */
8038 create = TRUE;
8039 cp = rfs4_findclient(&args->client, &create, NULL);
8040
8041 /* Should never happen */
8042 ASSERT(cp != NULL);
8043
8044 if (cp == NULL) {
8045 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8046 goto out;
8047 }
8048
8049 /*
8050 * Easiest case. Client identifier is newly created and is
8051 * unconfirmed. Also note that for this case, no other
8052 * entries exist for the client identifier. Nothing else to
8053 * check. Just setup the response and respond.
8054 */
8055 if (create) {
8056 *cs->statusp = res->status = NFS4_OK;
8057 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8058 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8059 cp->rc_confirm_verf;
8060 /* Setup callback information; CB_NULL confirmation later */
8061 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8062
8063 rfs4_client_rele(cp);
8064 goto out;
8065 }
8066
8067 /*
8068 * An existing, confirmed client may exist but it may not have
8069 * been active for at least one lease period. If so, then
8070 * "close" the client and create a new client identifier
8071 */
8072 if (rfs4_lease_expired(cp)) {
8073 rfs4_client_close(cp);
8074 goto retry;
8075 }
8076
8077 if (cp->rc_need_confirm == TRUE)
8078 cp_unconfirmed = cp;
8079 else
8080 cp_confirmed = cp;
8081
8082 cp = NULL;
8083
8084 /*
8085 * We have a confirmed client, now check for an
8086 * unconfimred entry
8087 */
8088 if (cp_confirmed) {
8089 /* If creds don't match then client identifier is inuse */
8090 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8091 rfs4_cbinfo_t *cbp;
8092 /*
8093 * Some one else has established this client
8094 * id. Try and say * who they are. We will use
8095 * the call back address supplied by * the
8096 * first client.
8097 */
8098 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8099
8100 addr = netid = NULL;
8101
8102 cbp = &cp_confirmed->rc_cbinfo;
8103 if (cbp->cb_callback.cb_location.r_addr &&
8104 cbp->cb_callback.cb_location.r_netid) {
8105 cb_client4 *cbcp = &cbp->cb_callback;
8106
8107 len = strlen(cbcp->cb_location.r_addr)+1;
8108 addr = kmem_alloc(len, KM_SLEEP);
8109 bcopy(cbcp->cb_location.r_addr, addr, len);
8110 len = strlen(cbcp->cb_location.r_netid)+1;
8111 netid = kmem_alloc(len, KM_SLEEP);
8112 bcopy(cbcp->cb_location.r_netid, netid, len);
8113 }
8114
8115 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8116 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8117
8118 rfs4_client_rele(cp_confirmed);
8119 }
8120
8121 /*
8122 * Confirmed, creds match, and verifier matches; must
8123 * be an update of the callback info
8124 */
8125 if (cp_confirmed->rc_nfs_client.verifier ==
8126 args->client.verifier) {
8127 /* Setup callback information */
8128 rfs4_client_setcb(cp_confirmed, &args->callback,
8129 args->callback_ident);
8130
8131 /* everything okay -- move ahead */
8132 *cs->statusp = res->status = NFS4_OK;
8133 res->SETCLIENTID4res_u.resok4.clientid =
8134 cp_confirmed->rc_clientid;
8135
8136 /* update the confirm_verifier and return it */
8137 rfs4_client_scv_next(cp_confirmed);
8138 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8139 cp_confirmed->rc_confirm_verf;
8140
8141 rfs4_client_rele(cp_confirmed);
8142 goto out;
8143 }
8144
8145 /*
8146 * Creds match but the verifier doesn't. Must search
8147 * for an unconfirmed client that would be replaced by
8148 * this request.
8149 */
8150 create = FALSE;
8151 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8152 cp_confirmed);
8153 }
8154
8155 /*
8156 * At this point, we have taken care of the brand new client
8157 * struct, INUSE case, update of an existing, and confirmed
8158 * client struct.
8159 */
8160
8161 /*
8162 * check to see if things have changed while we originally
8163 * picked up the client struct. If they have, then return and
8164 * retry the processing of this SETCLIENTID request.
8165 */
8166 if (cp_unconfirmed) {
8167 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8168 if (!cp_unconfirmed->rc_need_confirm) {
8169 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8170 rfs4_client_rele(cp_unconfirmed);
8171 if (cp_confirmed)
8172 rfs4_client_rele(cp_confirmed);
8173 goto retry;
8174 }
8175 /* do away with the old unconfirmed one */
8176 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8177 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8178 rfs4_client_rele(cp_unconfirmed);
8179 cp_unconfirmed = NULL;
8180 }
8181
8182 /*
8183 * This search will temporarily hide the confirmed client
8184 * struct while a new client struct is created as the
8185 * unconfirmed one.
8186 */
8187 create = TRUE;
8188 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8189
8190 ASSERT(newcp != NULL);
8191
8192 if (newcp == NULL) {
8193 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8194 rfs4_client_rele(cp_confirmed);
8195 goto out;
8196 }
8197
8198 /*
8199 * If one was not created, then a similar request must be in
8200 * process so release and start over with this one
8201 */
8202 if (create != TRUE) {
8203 rfs4_client_rele(newcp);
8204 if (cp_confirmed)
8205 rfs4_client_rele(cp_confirmed);
8206 goto retry;
8207 }
8208
8209 *cs->statusp = res->status = NFS4_OK;
8210 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8211 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8212 newcp->rc_confirm_verf;
8213 /* Setup callback information; CB_NULL confirmation later */
8214 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8215
8216 newcp->rc_cp_confirmed = cp_confirmed;
8217
8218 rfs4_client_rele(newcp);
8219
8220 out:
8221 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8222 SETCLIENTID4res *, res);
8223 }
8224
8225 /*ARGSUSED*/
8226 void
8227 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8228 struct svc_req *req, struct compound_state *cs)
8229 {
8230 SETCLIENTID_CONFIRM4args *args =
8231 &argop->nfs_argop4_u.opsetclientid_confirm;
8232 SETCLIENTID_CONFIRM4res *res =
8233 &resop->nfs_resop4_u.opsetclientid_confirm;
8234 rfs4_client_t *cp, *cptoclose = NULL;
8235 nfs4_srv_t *nsrv4;
8236
8237 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8238 struct compound_state *, cs,
8239 SETCLIENTID_CONFIRM4args *, args);
8240
8241 nsrv4 = nfs4_get_srv();
8242 *cs->statusp = res->status = NFS4_OK;
8243
8244 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8245
8246 if (cp == NULL) {
8247 *cs->statusp = res->status =
8248 rfs4_check_clientid(&args->clientid, 1);
8249 goto out;
8250 }
8251
8252 if (!creds_ok(cp, req, cs)) {
8253 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8254 rfs4_client_rele(cp);
8255 goto out;
8256 }
8257
8258 /* If the verifier doesn't match, the record doesn't match */
8259 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8260 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8261 rfs4_client_rele(cp);
8262 goto out;
8263 }
8264
8265 rfs4_dbe_lock(cp->rc_dbe);
8266 cp->rc_need_confirm = FALSE;
8267 if (cp->rc_cp_confirmed) {
8268 cptoclose = cp->rc_cp_confirmed;
8269 cptoclose->rc_ss_remove = 1;
8270 cp->rc_cp_confirmed = NULL;
8271 }
8272
8273 /*
8274 * Update the client's associated server instance, if it's changed
8275 * since the client was created.
8276 */
8277 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8278 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8279
8280 /*
8281 * Record clientid in stable storage.
8282 * Must be done after server instance has been assigned.
8283 */
8284 rfs4_ss_clid(nsrv4, cp);
8285
8286 rfs4_dbe_unlock(cp->rc_dbe);
8287
8288 if (cptoclose)
8289 /* don't need to rele, client_close does it */
8290 rfs4_client_close(cptoclose);
8291
8292 /* If needed, initiate CB_NULL call for callback path */
8293 rfs4_deleg_cb_check(cp);
8294 rfs4_update_lease(cp);
8295
8296 /*
8297 * Check to see if client can perform reclaims
8298 */
8299 rfs4_ss_chkclid(nsrv4, cp);
8300
8301 rfs4_client_rele(cp);
8302
8303 out:
8304 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8305 struct compound_state *, cs,
8306 SETCLIENTID_CONFIRM4 *, res);
8307 }
8308
8309
8310 /*ARGSUSED*/
8311 void
8312 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8313 struct svc_req *req, struct compound_state *cs)
8314 {
8315 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8316 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8317 rfs4_state_t *sp;
8318 nfsstat4 status;
8319
8320 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8321 CLOSE4args *, args);
8322
8323 if (cs->vp == NULL) {
8324 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8325 goto out;
8326 }
8327
8328 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8329 if (status != NFS4_OK) {
8330 *cs->statusp = resp->status = status;
8331 goto out;
8332 }
8333
8334 /* Ensure specified filehandle matches */
8335 if (cs->vp != sp->rs_finfo->rf_vp) {
8336 rfs4_state_rele(sp);
8337 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8338 goto out;
8339 }
8340
8341 /* hold off other access to open_owner while we tinker */
8342 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8343
8344 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8345 case NFS4_CHECK_STATEID_OKAY:
8346 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8347 resop) != NFS4_CHKSEQ_OKAY) {
8348 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8349 goto end;
8350 }
8351 break;
8352 case NFS4_CHECK_STATEID_OLD:
8353 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8354 goto end;
8355 case NFS4_CHECK_STATEID_BAD:
8356 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8357 goto end;
8358 case NFS4_CHECK_STATEID_EXPIRED:
8359 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8360 goto end;
8361 case NFS4_CHECK_STATEID_CLOSED:
8362 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8363 goto end;
8364 case NFS4_CHECK_STATEID_UNCONFIRMED:
8365 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8366 goto end;
8367 case NFS4_CHECK_STATEID_REPLAY:
8368 /* Check the sequence id for the open owner */
8369 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8370 resop)) {
8371 case NFS4_CHKSEQ_OKAY:
8372 /*
8373 * This is replayed stateid; if seqid matches
8374 * next expected, then client is using wrong seqid.
8375 */
8376 /* FALL THROUGH */
8377 case NFS4_CHKSEQ_BAD:
8378 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8379 goto end;
8380 case NFS4_CHKSEQ_REPLAY:
8381 /*
8382 * Note this case is the duplicate case so
8383 * resp->status is already set.
8384 */
8385 *cs->statusp = resp->status;
8386 rfs4_update_lease(sp->rs_owner->ro_client);
8387 goto end;
8388 }
8389 break;
8390 default:
8391 ASSERT(FALSE);
8392 break;
8393 }
8394
8395 rfs4_dbe_lock(sp->rs_dbe);
8396
8397 /* Update the stateid. */
8398 next_stateid(&sp->rs_stateid);
8399 resp->open_stateid = sp->rs_stateid.stateid;
8400
8401 rfs4_dbe_unlock(sp->rs_dbe);
8402
8403 rfs4_update_lease(sp->rs_owner->ro_client);
8404 rfs4_update_open_sequence(sp->rs_owner);
8405 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8406
8407 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8408
8409 *cs->statusp = resp->status = status;
8410
8411 end:
8412 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8413 rfs4_state_rele(sp);
8414 out:
8415 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8416 CLOSE4res *, resp);
8417 }
8418
8419 /*
8420 * Manage the counts on the file struct and close all file locks
8421 */
8422 /*ARGSUSED*/
8423 void
8424 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8425 bool_t close_of_client)
8426 {
8427 rfs4_file_t *fp = sp->rs_finfo;
8428 rfs4_lo_state_t *lsp;
8429 int fflags = 0;
8430
8431 /*
8432 * If this call is part of the larger closing down of client
8433 * state then it is just easier to release all locks
8434 * associated with this client instead of going through each
8435 * individual file and cleaning locks there.
8436 */
8437 if (close_of_client) {
8438 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8439 !list_is_empty(&sp->rs_lostatelist) &&
8440 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8441 /* Is the PxFS kernel module loaded? */
8442 if (lm_remove_file_locks != NULL) {
8443 int new_sysid;
8444
8445 /* Encode the cluster nodeid in new sysid */
8446 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8447 lm_set_nlmid_flk(&new_sysid);
8448
8449 /*
8450 * This PxFS routine removes file locks for a
8451 * client over all nodes of a cluster.
8452 */
8453 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8454 "lm_remove_file_locks(sysid=0x%x)\n",
8455 new_sysid));
8456 (*lm_remove_file_locks)(new_sysid);
8457 } else {
8458 struct flock64 flk;
8459
8460 /* Release all locks for this client */
8461 flk.l_type = F_UNLKSYS;
8462 flk.l_whence = 0;
8463 flk.l_start = 0;
8464 flk.l_len = 0;
8465 flk.l_sysid =
8466 sp->rs_owner->ro_client->rc_sysidt;
8467 flk.l_pid = 0;
8468 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8469 &flk, F_REMOTELOCK | FREAD | FWRITE,
8470 (u_offset_t)0, NULL, CRED(), NULL);
8471 }
8472
8473 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8474 }
8475 }
8476
8477 /*
8478 * Release all locks on this file by this lock owner or at
8479 * least mark the locks as having been released
8480 */
8481 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8482 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8483 lsp->rls_locks_cleaned = TRUE;
8484
8485 /* Was this already taken care of above? */
8486 if (!close_of_client &&
8487 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8488 (void) cleanlocks(sp->rs_finfo->rf_vp,
8489 lsp->rls_locker->rl_pid,
8490 lsp->rls_locker->rl_client->rc_sysidt);
8491 }
8492
8493 /*
8494 * Release any shrlocks associated with this open state ID.
8495 * This must be done before the rfs4_state gets marked closed.
8496 */
8497 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8498 (void) rfs4_unshare(sp);
8499
8500 if (sp->rs_open_access) {
8501 rfs4_dbe_lock(fp->rf_dbe);
8502
8503 /*
8504 * Decrement the count for each access and deny bit that this
8505 * state has contributed to the file.
8506 * If the file counts go to zero
8507 * clear the appropriate bit in the appropriate mask.
8508 */
8509 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8510 fp->rf_access_read--;
8511 fflags |= FREAD;
8512 if (fp->rf_access_read == 0)
8513 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8514 }
8515 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8516 fp->rf_access_write--;
8517 fflags |= FWRITE;
8518 if (fp->rf_access_write == 0)
8519 fp->rf_share_access &=
8520 ~OPEN4_SHARE_ACCESS_WRITE;
8521 }
8522 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8523 fp->rf_deny_read--;
8524 if (fp->rf_deny_read == 0)
8525 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8526 }
8527 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8528 fp->rf_deny_write--;
8529 if (fp->rf_deny_write == 0)
8530 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8531 }
8532
8533 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8534
8535 rfs4_dbe_unlock(fp->rf_dbe);
8536
8537 sp->rs_open_access = 0;
8538 sp->rs_open_deny = 0;
8539 }
8540 }
8541
8542 /*
8543 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8544 */
8545 static nfsstat4
8546 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8547 {
8548 rfs4_lockowner_t *lo;
8549 rfs4_client_t *cp;
8550 uint32_t len;
8551
8552 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8553 if (lo != NULL) {
8554 cp = lo->rl_client;
8555 if (rfs4_lease_expired(cp)) {
8556 rfs4_lockowner_rele(lo);
8557 rfs4_dbe_hold(cp->rc_dbe);
8558 rfs4_client_close(cp);
8559 return (NFS4ERR_EXPIRED);
8560 }
8561 dp->owner.clientid = lo->rl_owner.clientid;
8562 len = lo->rl_owner.owner_len;
8563 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8564 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8565 dp->owner.owner_len = len;
8566 rfs4_lockowner_rele(lo);
8567 goto finish;
8568 }
8569
8570 /*
8571 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8572 * of the client id contain the boot time for a NFS4 lock. So we
8573 * fabricate and identity by setting clientid to the sysid, and
8574 * the lock owner to the pid.
8575 */
8576 dp->owner.clientid = flk->l_sysid;
8577 len = sizeof (pid_t);
8578 dp->owner.owner_len = len;
8579 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8580 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8581 finish:
8582 dp->offset = flk->l_start;
8583 dp->length = flk->l_len;
8584
8585 if (flk->l_type == F_RDLCK)
8586 dp->locktype = READ_LT;
8587 else if (flk->l_type == F_WRLCK)
8588 dp->locktype = WRITE_LT;
8589 else
8590 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8591
8592 return (NFS4_OK);
8593 }
8594
8595 /*
8596 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8597 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8598 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8599 * for that (obviously); they are sending the LOCK requests with some delays
8600 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8601 * locking and delay implementation at the client side.
8602 *
8603 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8604 * fast retries on its own (the for loop below) in a hope the lock will be
8605 * available soon. And if not, the client won't need to resend the LOCK
8606 * requests so fast to check the lock availability. This basically saves some
8607 * network traffic and tries to make sure the client gets the lock ASAP.
8608 */
8609 static int
8610 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8611 {
8612 int error;
8613 struct flock64 flk;
8614 int i;
8615 clock_t delaytime;
8616 int cmd;
8617 int spin_cnt = 0;
8618
8619 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8620 retry:
8621 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8622
8623 for (i = 0; i < rfs4_maxlock_tries; i++) {
8624 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8625 error = VOP_FRLOCK(vp, cmd,
8626 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8627
8628 if (error != EAGAIN && error != EACCES)
8629 break;
8630
8631 if (i < rfs4_maxlock_tries - 1) {
8632 delay(delaytime);
8633 delaytime *= 2;
8634 }
8635 }
8636
8637 if (error == EAGAIN || error == EACCES) {
8638 /* Get the owner of the lock */
8639 flk = *flock;
8640 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8641 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8642 NULL) == 0) {
8643 /*
8644 * There's a race inherent in the current VOP_FRLOCK
8645 * design where:
8646 * a: "other guy" takes a lock that conflicts with a
8647 * lock we want
8648 * b: we attempt to take our lock (non-blocking) and
8649 * the attempt fails.
8650 * c: "other guy" releases the conflicting lock
8651 * d: we ask what lock conflicts with the lock we want,
8652 * getting F_UNLCK (no lock blocks us)
8653 *
8654 * If we retry the non-blocking lock attempt in this
8655 * case (restart at step 'b') there's some possibility
8656 * that many such attempts might fail. However a test
8657 * designed to actually provoke this race shows that
8658 * the vast majority of cases require no retry, and
8659 * only a few took as many as three retries. Here's
8660 * the test outcome:
8661 *
8662 * number of retries how many times we needed
8663 * that many retries
8664 * 0 79461
8665 * 1 862
8666 * 2 49
8667 * 3 5
8668 *
8669 * Given those empirical results, we arbitrarily limit
8670 * the retry count to ten.
8671 *
8672 * If we actually make to ten retries and give up,
8673 * nothing catastrophic happens, but we're unable to
8674 * return the information about the conflicting lock to
8675 * the NFS client. That's an acceptable trade off vs.
8676 * letting this retry loop run forever.
8677 */
8678 if (flk.l_type == F_UNLCK) {
8679 if (spin_cnt++ < 10) {
8680 /* No longer locked, retry */
8681 goto retry;
8682 }
8683 } else {
8684 *flock = flk;
8685 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8686 F_GETLK, &flk);
8687 }
8688 }
8689 }
8690
8691 return (error);
8692 }
8693
8694 /*ARGSUSED*/
8695 static nfsstat4
8696 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8697 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8698 {
8699 nfsstat4 status;
8700 rfs4_lockowner_t *lo = lsp->rls_locker;
8701 rfs4_state_t *sp = lsp->rls_state;
8702 struct flock64 flock;
8703 int16_t ltype;
8704 int flag;
8705 int error;
8706 sysid_t sysid;
8707 LOCK4res *lres;
8708 vnode_t *vp;
8709
8710 if (rfs4_lease_expired(lo->rl_client)) {
8711 return (NFS4ERR_EXPIRED);
8712 }
8713
8714 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8715 return (status);
8716
8717 /* Check for zero length. To lock to end of file use all ones for V4 */
8718 if (length == 0)
8719 return (NFS4ERR_INVAL);
8720 else if (length == (length4)(~0))
8721 length = 0; /* Posix to end of file */
8722
8723 retry:
8724 rfs4_dbe_lock(sp->rs_dbe);
8725 if (sp->rs_closed == TRUE) {
8726 rfs4_dbe_unlock(sp->rs_dbe);
8727 return (NFS4ERR_OLD_STATEID);
8728 }
8729
8730 if (resop->resop != OP_LOCKU) {
8731 switch (locktype) {
8732 case READ_LT:
8733 case READW_LT:
8734 if ((sp->rs_share_access
8735 & OPEN4_SHARE_ACCESS_READ) == 0) {
8736 rfs4_dbe_unlock(sp->rs_dbe);
8737
8738 return (NFS4ERR_OPENMODE);
8739 }
8740 ltype = F_RDLCK;
8741 break;
8742 case WRITE_LT:
8743 case WRITEW_LT:
8744 if ((sp->rs_share_access
8745 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8746 rfs4_dbe_unlock(sp->rs_dbe);
8747
8748 return (NFS4ERR_OPENMODE);
8749 }
8750 ltype = F_WRLCK;
8751 break;
8752 }
8753 } else
8754 ltype = F_UNLCK;
8755
8756 flock.l_type = ltype;
8757 flock.l_whence = 0; /* SEEK_SET */
8758 flock.l_start = offset;
8759 flock.l_len = length;
8760 flock.l_sysid = sysid;
8761 flock.l_pid = lsp->rls_locker->rl_pid;
8762
8763 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8764 if (flock.l_len < 0 || flock.l_start < 0) {
8765 rfs4_dbe_unlock(sp->rs_dbe);
8766 return (NFS4ERR_INVAL);
8767 }
8768
8769 /*
8770 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8771 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8772 */
8773 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8774
8775 vp = sp->rs_finfo->rf_vp;
8776 VN_HOLD(vp);
8777
8778 /*
8779 * We need to unlock sp before we call the underlying filesystem to
8780 * acquire the file lock.
8781 */
8782 rfs4_dbe_unlock(sp->rs_dbe);
8783
8784 error = setlock(vp, &flock, flag, cred);
8785
8786 /*
8787 * Make sure the file is still open. In a case the file was closed in
8788 * the meantime, clean the lock we acquired using the setlock() call
8789 * above, and return the appropriate error.
8790 */
8791 rfs4_dbe_lock(sp->rs_dbe);
8792 if (sp->rs_closed == TRUE) {
8793 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8794 rfs4_dbe_unlock(sp->rs_dbe);
8795
8796 VN_RELE(vp);
8797
8798 return (NFS4ERR_OLD_STATEID);
8799 }
8800 rfs4_dbe_unlock(sp->rs_dbe);
8801
8802 VN_RELE(vp);
8803
8804 if (error == 0) {
8805 rfs4_dbe_lock(lsp->rls_dbe);
8806 next_stateid(&lsp->rls_lockid);
8807 rfs4_dbe_unlock(lsp->rls_dbe);
8808 }
8809
8810 /*
8811 * N.B. We map error values to nfsv4 errors. This is differrent
8812 * than puterrno4 routine.
8813 */
8814 switch (error) {
8815 case 0:
8816 status = NFS4_OK;
8817 break;
8818 case EAGAIN:
8819 case EACCES: /* Old value */
8820 /* Can only get here if op is OP_LOCK */
8821 ASSERT(resop->resop == OP_LOCK);
8822 lres = &resop->nfs_resop4_u.oplock;
8823 status = NFS4ERR_DENIED;
8824 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8825 == NFS4ERR_EXPIRED)
8826 goto retry;
8827 break;
8828 case ENOLCK:
8829 status = NFS4ERR_DELAY;
8830 break;
8831 case EOVERFLOW:
8832 status = NFS4ERR_INVAL;
8833 break;
8834 case EINVAL:
8835 status = NFS4ERR_NOTSUPP;
8836 break;
8837 default:
8838 status = NFS4ERR_SERVERFAULT;
8839 break;
8840 }
8841
8842 return (status);
8843 }
8844
8845 /*ARGSUSED*/
8846 void
8847 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8848 struct svc_req *req, struct compound_state *cs)
8849 {
8850 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8851 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8852 nfsstat4 status;
8853 stateid4 *stateid;
8854 rfs4_lockowner_t *lo;
8855 rfs4_client_t *cp;
8856 rfs4_state_t *sp = NULL;
8857 rfs4_lo_state_t *lsp = NULL;
8858 bool_t ls_sw_held = FALSE;
8859 bool_t create = TRUE;
8860 bool_t lcreate = TRUE;
8861 bool_t dup_lock = FALSE;
8862 int rc;
8863
8864 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8865 LOCK4args *, args);
8866
8867 if (cs->vp == NULL) {
8868 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8869 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8870 cs, LOCK4res *, resp);
8871 return;
8872 }
8873
8874 if (args->locker.new_lock_owner) {
8875 /* Create a new lockowner for this instance */
8876 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8877
8878 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8879
8880 stateid = &olo->open_stateid;
8881 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8882 if (status != NFS4_OK) {
8883 NFS4_DEBUG(rfs4_debug,
8884 (CE_NOTE, "Get state failed in lock %d", status));
8885 *cs->statusp = resp->status = status;
8886 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8887 cs, LOCK4res *, resp);
8888 return;
8889 }
8890
8891 /* Ensure specified filehandle matches */
8892 if (cs->vp != sp->rs_finfo->rf_vp) {
8893 rfs4_state_rele(sp);
8894 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8895 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8896 cs, LOCK4res *, resp);
8897 return;
8898 }
8899
8900 /* hold off other access to open_owner while we tinker */
8901 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8902
8903 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8904 case NFS4_CHECK_STATEID_OLD:
8905 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8906 goto end;
8907 case NFS4_CHECK_STATEID_BAD:
8908 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8909 goto end;
8910 case NFS4_CHECK_STATEID_EXPIRED:
8911 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8912 goto end;
8913 case NFS4_CHECK_STATEID_UNCONFIRMED:
8914 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8915 goto end;
8916 case NFS4_CHECK_STATEID_CLOSED:
8917 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8918 goto end;
8919 case NFS4_CHECK_STATEID_OKAY:
8920 case NFS4_CHECK_STATEID_REPLAY:
8921 switch (rfs4_check_olo_seqid(olo->open_seqid,
8922 sp->rs_owner, resop)) {
8923 case NFS4_CHKSEQ_OKAY:
8924 if (rc == NFS4_CHECK_STATEID_OKAY)
8925 break;
8926 /*
8927 * This is replayed stateid; if seqid
8928 * matches next expected, then client
8929 * is using wrong seqid.
8930 */
8931 /* FALLTHROUGH */
8932 case NFS4_CHKSEQ_BAD:
8933 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8934 goto end;
8935 case NFS4_CHKSEQ_REPLAY:
8936 /* This is a duplicate LOCK request */
8937 dup_lock = TRUE;
8938
8939 /*
8940 * For a duplicate we do not want to
8941 * create a new lockowner as it should
8942 * already exist.
8943 * Turn off the lockowner create flag.
8944 */
8945 lcreate = FALSE;
8946 }
8947 break;
8948 }
8949
8950 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8951 if (lo == NULL) {
8952 NFS4_DEBUG(rfs4_debug,
8953 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8954 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8955 goto end;
8956 }
8957
8958 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8959 if (lsp == NULL) {
8960 rfs4_update_lease(sp->rs_owner->ro_client);
8961 /*
8962 * Only update theh open_seqid if this is not
8963 * a duplicate request
8964 */
8965 if (dup_lock == FALSE) {
8966 rfs4_update_open_sequence(sp->rs_owner);
8967 }
8968
8969 NFS4_DEBUG(rfs4_debug,
8970 (CE_NOTE, "rfs4_op_lock: no state"));
8971 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8972 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8973 rfs4_lockowner_rele(lo);
8974 goto end;
8975 }
8976
8977 /*
8978 * This is the new_lock_owner branch and the client is
8979 * supposed to be associating a new lock_owner with
8980 * the open file at this point. If we find that a
8981 * lock_owner/state association already exists and a
8982 * successful LOCK request was returned to the client,
8983 * an error is returned to the client since this is
8984 * not appropriate. The client should be using the
8985 * existing lock_owner branch.
8986 */
8987 if (dup_lock == FALSE && create == FALSE) {
8988 if (lsp->rls_lock_completed == TRUE) {
8989 *cs->statusp =
8990 resp->status = NFS4ERR_BAD_SEQID;
8991 rfs4_lockowner_rele(lo);
8992 goto end;
8993 }
8994 }
8995
8996 rfs4_update_lease(sp->rs_owner->ro_client);
8997
8998 /*
8999 * Only update theh open_seqid if this is not
9000 * a duplicate request
9001 */
9002 if (dup_lock == FALSE) {
9003 rfs4_update_open_sequence(sp->rs_owner);
9004 }
9005
9006 /*
9007 * If this is a duplicate lock request, just copy the
9008 * previously saved reply and return.
9009 */
9010 if (dup_lock == TRUE) {
9011 /* verify that lock_seqid's match */
9012 if (lsp->rls_seqid != olo->lock_seqid) {
9013 NFS4_DEBUG(rfs4_debug,
9014 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9015 "lsp->seqid=%d old->seqid=%d",
9016 lsp->rls_seqid, olo->lock_seqid));
9017 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9018 } else {
9019 rfs4_copy_reply(resop, &lsp->rls_reply);
9020 /*
9021 * Make sure to copy the just
9022 * retrieved reply status into the
9023 * overall compound status
9024 */
9025 *cs->statusp = resp->status;
9026 }
9027 rfs4_lockowner_rele(lo);
9028 goto end;
9029 }
9030
9031 rfs4_dbe_lock(lsp->rls_dbe);
9032
9033 /* Make sure to update the lock sequence id */
9034 lsp->rls_seqid = olo->lock_seqid;
9035
9036 NFS4_DEBUG(rfs4_debug,
9037 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9038
9039 /*
9040 * This is used to signify the newly created lockowner
9041 * stateid and its sequence number. The checks for
9042 * sequence number and increment don't occur on the
9043 * very first lock request for a lockowner.
9044 */
9045 lsp->rls_skip_seqid_check = TRUE;
9046
9047 /* hold off other access to lsp while we tinker */
9048 rfs4_sw_enter(&lsp->rls_sw);
9049 ls_sw_held = TRUE;
9050
9051 rfs4_dbe_unlock(lsp->rls_dbe);
9052
9053 rfs4_lockowner_rele(lo);
9054 } else {
9055 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9056 /* get lsp and hold the lock on the underlying file struct */
9057 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9058 != NFS4_OK) {
9059 *cs->statusp = resp->status = status;
9060 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9061 cs, LOCK4res *, resp);
9062 return;
9063 }
9064 create = FALSE; /* We didn't create lsp */
9065
9066 /* Ensure specified filehandle matches */
9067 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9068 rfs4_lo_state_rele(lsp, TRUE);
9069 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9070 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9071 cs, LOCK4res *, resp);
9072 return;
9073 }
9074
9075 /* hold off other access to lsp while we tinker */
9076 rfs4_sw_enter(&lsp->rls_sw);
9077 ls_sw_held = TRUE;
9078
9079 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9080 /*
9081 * The stateid looks like it was okay (expected to be
9082 * the next one)
9083 */
9084 case NFS4_CHECK_STATEID_OKAY:
9085 /*
9086 * The sequence id is now checked. Determine
9087 * if this is a replay or if it is in the
9088 * expected (next) sequence. In the case of a
9089 * replay, there are two replay conditions
9090 * that may occur. The first is the normal
9091 * condition where a LOCK is done with a
9092 * NFS4_OK response and the stateid is
9093 * updated. That case is handled below when
9094 * the stateid is identified as a REPLAY. The
9095 * second is the case where an error is
9096 * returned, like NFS4ERR_DENIED, and the
9097 * sequence number is updated but the stateid
9098 * is not updated. This second case is dealt
9099 * with here. So it may seem odd that the
9100 * stateid is okay but the sequence id is a
9101 * replay but it is okay.
9102 */
9103 switch (rfs4_check_lock_seqid(
9104 args->locker.locker4_u.lock_owner.lock_seqid,
9105 lsp, resop)) {
9106 case NFS4_CHKSEQ_REPLAY:
9107 if (resp->status != NFS4_OK) {
9108 /*
9109 * Here is our replay and need
9110 * to verify that the last
9111 * response was an error.
9112 */
9113 *cs->statusp = resp->status;
9114 goto end;
9115 }
9116 /*
9117 * This is done since the sequence id
9118 * looked like a replay but it didn't
9119 * pass our check so a BAD_SEQID is
9120 * returned as a result.
9121 */
9122 /*FALLTHROUGH*/
9123 case NFS4_CHKSEQ_BAD:
9124 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9125 goto end;
9126 case NFS4_CHKSEQ_OKAY:
9127 /* Everything looks okay move ahead */
9128 break;
9129 }
9130 break;
9131 case NFS4_CHECK_STATEID_OLD:
9132 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9133 goto end;
9134 case NFS4_CHECK_STATEID_BAD:
9135 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9136 goto end;
9137 case NFS4_CHECK_STATEID_EXPIRED:
9138 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9139 goto end;
9140 case NFS4_CHECK_STATEID_CLOSED:
9141 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9142 goto end;
9143 case NFS4_CHECK_STATEID_REPLAY:
9144 switch (rfs4_check_lock_seqid(
9145 args->locker.locker4_u.lock_owner.lock_seqid,
9146 lsp, resop)) {
9147 case NFS4_CHKSEQ_OKAY:
9148 /*
9149 * This is a replayed stateid; if
9150 * seqid matches the next expected,
9151 * then client is using wrong seqid.
9152 */
9153 case NFS4_CHKSEQ_BAD:
9154 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9155 goto end;
9156 case NFS4_CHKSEQ_REPLAY:
9157 rfs4_update_lease(lsp->rls_locker->rl_client);
9158 *cs->statusp = status = resp->status;
9159 goto end;
9160 }
9161 break;
9162 default:
9163 ASSERT(FALSE);
9164 break;
9165 }
9166
9167 rfs4_update_lock_sequence(lsp);
9168 rfs4_update_lease(lsp->rls_locker->rl_client);
9169 }
9170
9171 /*
9172 * NFS4 only allows locking on regular files, so
9173 * verify type of object.
9174 */
9175 if (cs->vp->v_type != VREG) {
9176 if (cs->vp->v_type == VDIR)
9177 status = NFS4ERR_ISDIR;
9178 else
9179 status = NFS4ERR_INVAL;
9180 goto out;
9181 }
9182
9183 cp = lsp->rls_state->rs_owner->ro_client;
9184
9185 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9186 status = NFS4ERR_GRACE;
9187 goto out;
9188 }
9189
9190 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9191 status = NFS4ERR_NO_GRACE;
9192 goto out;
9193 }
9194
9195 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9196 status = NFS4ERR_NO_GRACE;
9197 goto out;
9198 }
9199
9200 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9201 cs->deleg = TRUE;
9202
9203 status = rfs4_do_lock(lsp, args->locktype,
9204 args->offset, args->length, cs->cr, resop);
9205
9206 out:
9207 lsp->rls_skip_seqid_check = FALSE;
9208
9209 *cs->statusp = resp->status = status;
9210
9211 if (status == NFS4_OK) {
9212 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9213 lsp->rls_lock_completed = TRUE;
9214 }
9215 /*
9216 * Only update the "OPEN" response here if this was a new
9217 * lock_owner
9218 */
9219 if (sp)
9220 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9221
9222 rfs4_update_lock_resp(lsp, resop);
9223
9224 end:
9225 if (lsp) {
9226 if (ls_sw_held)
9227 rfs4_sw_exit(&lsp->rls_sw);
9228 /*
9229 * If an sp obtained, then the lsp does not represent
9230 * a lock on the file struct.
9231 */
9232 if (sp != NULL)
9233 rfs4_lo_state_rele(lsp, FALSE);
9234 else
9235 rfs4_lo_state_rele(lsp, TRUE);
9236 }
9237 if (sp) {
9238 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9239 rfs4_state_rele(sp);
9240 }
9241
9242 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9243 LOCK4res *, resp);
9244 }
9245
9246 /* free function for LOCK/LOCKT */
9247 static void
9248 lock_denied_free(nfs_resop4 *resop)
9249 {
9250 LOCK4denied *dp = NULL;
9251
9252 switch (resop->resop) {
9253 case OP_LOCK:
9254 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9255 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9256 break;
9257 case OP_LOCKT:
9258 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9259 dp = &resop->nfs_resop4_u.oplockt.denied;
9260 break;
9261 default:
9262 break;
9263 }
9264
9265 if (dp)
9266 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9267 }
9268
9269 /*ARGSUSED*/
9270 void
9271 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9272 struct svc_req *req, struct compound_state *cs)
9273 {
9274 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9275 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9276 nfsstat4 status;
9277 stateid4 *stateid = &args->lock_stateid;
9278 rfs4_lo_state_t *lsp;
9279
9280 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9281 LOCKU4args *, args);
9282
9283 if (cs->vp == NULL) {
9284 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9285 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9286 LOCKU4res *, resp);
9287 return;
9288 }
9289
9290 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9291 *cs->statusp = resp->status = status;
9292 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9293 LOCKU4res *, resp);
9294 return;
9295 }
9296
9297 /* Ensure specified filehandle matches */
9298 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9299 rfs4_lo_state_rele(lsp, TRUE);
9300 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9302 LOCKU4res *, resp);
9303 return;
9304 }
9305
9306 /* hold off other access to lsp while we tinker */
9307 rfs4_sw_enter(&lsp->rls_sw);
9308
9309 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9310 case NFS4_CHECK_STATEID_OKAY:
9311 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9312 != NFS4_CHKSEQ_OKAY) {
9313 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9314 goto end;
9315 }
9316 break;
9317 case NFS4_CHECK_STATEID_OLD:
9318 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9319 goto end;
9320 case NFS4_CHECK_STATEID_BAD:
9321 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9322 goto end;
9323 case NFS4_CHECK_STATEID_EXPIRED:
9324 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9325 goto end;
9326 case NFS4_CHECK_STATEID_CLOSED:
9327 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9328 goto end;
9329 case NFS4_CHECK_STATEID_REPLAY:
9330 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9331 case NFS4_CHKSEQ_OKAY:
9332 /*
9333 * This is a replayed stateid; if
9334 * seqid matches the next expected,
9335 * then client is using wrong seqid.
9336 */
9337 case NFS4_CHKSEQ_BAD:
9338 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9339 goto end;
9340 case NFS4_CHKSEQ_REPLAY:
9341 rfs4_update_lease(lsp->rls_locker->rl_client);
9342 *cs->statusp = status = resp->status;
9343 goto end;
9344 }
9345 break;
9346 default:
9347 ASSERT(FALSE);
9348 break;
9349 }
9350
9351 rfs4_update_lock_sequence(lsp);
9352 rfs4_update_lease(lsp->rls_locker->rl_client);
9353
9354 /*
9355 * NFS4 only allows locking on regular files, so
9356 * verify type of object.
9357 */
9358 if (cs->vp->v_type != VREG) {
9359 if (cs->vp->v_type == VDIR)
9360 status = NFS4ERR_ISDIR;
9361 else
9362 status = NFS4ERR_INVAL;
9363 goto out;
9364 }
9365
9366 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9367 status = NFS4ERR_GRACE;
9368 goto out;
9369 }
9370
9371 status = rfs4_do_lock(lsp, args->locktype,
9372 args->offset, args->length, cs->cr, resop);
9373
9374 out:
9375 *cs->statusp = resp->status = status;
9376
9377 if (status == NFS4_OK)
9378 resp->lock_stateid = lsp->rls_lockid.stateid;
9379
9380 rfs4_update_lock_resp(lsp, resop);
9381
9382 end:
9383 rfs4_sw_exit(&lsp->rls_sw);
9384 rfs4_lo_state_rele(lsp, TRUE);
9385
9386 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9387 LOCKU4res *, resp);
9388 }
9389
9390 /*
9391 * LOCKT is a best effort routine, the client can not be guaranteed that
9392 * the status return is still in effect by the time the reply is received.
9393 * They are numerous race conditions in this routine, but we are not required
9394 * and can not be accurate.
9395 */
9396 /*ARGSUSED*/
9397 void
9398 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9399 struct svc_req *req, struct compound_state *cs)
9400 {
9401 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9402 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9403 rfs4_lockowner_t *lo;
9404 rfs4_client_t *cp;
9405 bool_t create = FALSE;
9406 struct flock64 flk;
9407 int error;
9408 int flag = FREAD | FWRITE;
9409 int ltype;
9410 length4 posix_length;
9411 sysid_t sysid;
9412 pid_t pid;
9413
9414 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9415 LOCKT4args *, args);
9416
9417 if (cs->vp == NULL) {
9418 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9419 goto out;
9420 }
9421
9422 /*
9423 * NFS4 only allows locking on regular files, so
9424 * verify type of object.
9425 */
9426 if (cs->vp->v_type != VREG) {
9427 if (cs->vp->v_type == VDIR)
9428 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9429 else
9430 *cs->statusp = resp->status = NFS4ERR_INVAL;
9431 goto out;
9432 }
9433
9434 /*
9435 * Check out the clientid to ensure the server knows about it
9436 * so that we correctly inform the client of a server reboot.
9437 */
9438 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9439 == NULL) {
9440 *cs->statusp = resp->status =
9441 rfs4_check_clientid(&args->owner.clientid, 0);
9442 goto out;
9443 }
9444 if (rfs4_lease_expired(cp)) {
9445 rfs4_client_close(cp);
9446 /*
9447 * Protocol doesn't allow returning NFS4ERR_STALE as
9448 * other operations do on this check so STALE_CLIENTID
9449 * is returned instead
9450 */
9451 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9452 goto out;
9453 }
9454
9455 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9456 *cs->statusp = resp->status = NFS4ERR_GRACE;
9457 rfs4_client_rele(cp);
9458 goto out;
9459 }
9460 rfs4_client_rele(cp);
9461
9462 resp->status = NFS4_OK;
9463
9464 switch (args->locktype) {
9465 case READ_LT:
9466 case READW_LT:
9467 ltype = F_RDLCK;
9468 break;
9469 case WRITE_LT:
9470 case WRITEW_LT:
9471 ltype = F_WRLCK;
9472 break;
9473 }
9474
9475 posix_length = args->length;
9476 /* Check for zero length. To lock to end of file use all ones for V4 */
9477 if (posix_length == 0) {
9478 *cs->statusp = resp->status = NFS4ERR_INVAL;
9479 goto out;
9480 } else if (posix_length == (length4)(~0)) {
9481 posix_length = 0; /* Posix to end of file */
9482 }
9483
9484 /* Find or create a lockowner */
9485 lo = rfs4_findlockowner(&args->owner, &create);
9486
9487 if (lo) {
9488 pid = lo->rl_pid;
9489 if ((resp->status =
9490 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9491 goto err;
9492 } else {
9493 pid = 0;
9494 sysid = lockt_sysid;
9495 }
9496 retry:
9497 flk.l_type = ltype;
9498 flk.l_whence = 0; /* SEEK_SET */
9499 flk.l_start = args->offset;
9500 flk.l_len = posix_length;
9501 flk.l_sysid = sysid;
9502 flk.l_pid = pid;
9503 flag |= F_REMOTELOCK;
9504
9505 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9506
9507 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9508 if (flk.l_len < 0 || flk.l_start < 0) {
9509 resp->status = NFS4ERR_INVAL;
9510 goto err;
9511 }
9512 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9513 NULL, cs->cr, NULL);
9514
9515 /*
9516 * N.B. We map error values to nfsv4 errors. This is differrent
9517 * than puterrno4 routine.
9518 */
9519 switch (error) {
9520 case 0:
9521 if (flk.l_type == F_UNLCK)
9522 resp->status = NFS4_OK;
9523 else {
9524 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9525 goto retry;
9526 resp->status = NFS4ERR_DENIED;
9527 }
9528 break;
9529 case EOVERFLOW:
9530 resp->status = NFS4ERR_INVAL;
9531 break;
9532 case EINVAL:
9533 resp->status = NFS4ERR_NOTSUPP;
9534 break;
9535 default:
9536 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9537 error);
9538 resp->status = NFS4ERR_SERVERFAULT;
9539 break;
9540 }
9541
9542 err:
9543 if (lo)
9544 rfs4_lockowner_rele(lo);
9545 *cs->statusp = resp->status;
9546 out:
9547 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9548 LOCKT4res *, resp);
9549 }
9550
9551 int
9552 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9553 {
9554 int err;
9555 int cmd;
9556 vnode_t *vp;
9557 struct shrlock shr;
9558 struct shr_locowner shr_loco;
9559 int fflags = 0;
9560
9561 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9562 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9563
9564 if (sp->rs_closed)
9565 return (NFS4ERR_OLD_STATEID);
9566
9567 vp = sp->rs_finfo->rf_vp;
9568 ASSERT(vp);
9569
9570 shr.s_access = shr.s_deny = 0;
9571
9572 if (access & OPEN4_SHARE_ACCESS_READ) {
9573 fflags |= FREAD;
9574 shr.s_access |= F_RDACC;
9575 }
9576 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9577 fflags |= FWRITE;
9578 shr.s_access |= F_WRACC;
9579 }
9580 ASSERT(shr.s_access);
9581
9582 if (deny & OPEN4_SHARE_DENY_READ)
9583 shr.s_deny |= F_RDDNY;
9584 if (deny & OPEN4_SHARE_DENY_WRITE)
9585 shr.s_deny |= F_WRDNY;
9586
9587 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9588 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9589 shr_loco.sl_pid = shr.s_pid;
9590 shr_loco.sl_id = shr.s_sysid;
9591 shr.s_owner = (caddr_t)&shr_loco;
9592 shr.s_own_len = sizeof (shr_loco);
9593
9594 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9595
9596 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9597 if (err != 0) {
9598 if (err == EAGAIN)
9599 err = NFS4ERR_SHARE_DENIED;
9600 else
9601 err = puterrno4(err);
9602 return (err);
9603 }
9604
9605 sp->rs_share_access |= access;
9606 sp->rs_share_deny |= deny;
9607
9608 return (0);
9609 }
9610
9611 int
9612 rfs4_unshare(rfs4_state_t *sp)
9613 {
9614 int err;
9615 struct shrlock shr;
9616 struct shr_locowner shr_loco;
9617
9618 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9619
9620 if (sp->rs_closed || sp->rs_share_access == 0)
9621 return (0);
9622
9623 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9624 ASSERT(sp->rs_finfo->rf_vp);
9625
9626 shr.s_access = shr.s_deny = 0;
9627 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9628 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9629 shr_loco.sl_pid = shr.s_pid;
9630 shr_loco.sl_id = shr.s_sysid;
9631 shr.s_owner = (caddr_t)&shr_loco;
9632 shr.s_own_len = sizeof (shr_loco);
9633
9634 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9635 NULL);
9636 if (err != 0) {
9637 err = puterrno4(err);
9638 return (err);
9639 }
9640
9641 sp->rs_share_access = 0;
9642 sp->rs_share_deny = 0;
9643
9644 return (0);
9645
9646 }
9647
9648 static int
9649 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9650 {
9651 struct clist *wcl;
9652 count4 count = rok->data_len;
9653 int wlist_len;
9654
9655 wcl = args->wlist;
9656 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9657 return (FALSE);
9658 }
9659 wcl = args->wlist;
9660 rok->wlist_len = wlist_len;
9661 rok->wlist = wcl;
9662 return (TRUE);
9663 }
9664
9665 /* tunable to disable server referrals */
9666 int rfs4_no_referrals = 0;
9667
9668 /*
9669 * Find an NFS record in reparse point data.
9670 * Returns 0 for success and <0 or an errno value on failure.
9671 */
9672 int
9673 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9674 {
9675 int err;
9676 char *stype, *val;
9677 nvlist_t *nvl;
9678 nvpair_t *curr;
9679
9680 if ((nvl = reparse_init()) == NULL)
9681 return (-1);
9682
9683 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9684 reparse_free(nvl);
9685 return (err);
9686 }
9687
9688 curr = NULL;
9689 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9690 if ((stype = nvpair_name(curr)) == NULL) {
9691 reparse_free(nvl);
9692 return (-2);
9693 }
9694 if (strncasecmp(stype, "NFS", 3) == 0)
9695 break;
9696 }
9697
9698 if ((curr == NULL) ||
9699 (nvpair_value_string(curr, &val))) {
9700 reparse_free(nvl);
9701 return (-3);
9702 }
9703 *nvlp = nvl;
9704 *svcp = stype;
9705 *datap = val;
9706 return (0);
9707 }
9708
9709 int
9710 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9711 {
9712 nvlist_t *nvl;
9713 char *s, *d;
9714
9715 if (rfs4_no_referrals != 0)
9716 return (B_FALSE);
9717
9718 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9719 return (B_FALSE);
9720
9721 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9722 return (B_FALSE);
9723
9724 reparse_free(nvl);
9725
9726 return (B_TRUE);
9727 }
9728
9729 /*
9730 * There is a user-level copy of this routine in ref_subr.c.
9731 * Changes should be kept in sync.
9732 */
9733 static int
9734 nfs4_create_components(char *path, component4 *comp4)
9735 {
9736 int slen, plen, ncomp;
9737 char *ori_path, *nxtc, buf[MAXNAMELEN];
9738
9739 if (path == NULL)
9740 return (0);
9741
9742 plen = strlen(path) + 1; /* include the terminator */
9743 ori_path = path;
9744 ncomp = 0;
9745
9746 /* count number of components in the path */
9747 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9748 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9749 if ((slen = nxtc - path) == 0) {
9750 path = nxtc + 1;
9751 continue;
9752 }
9753
9754 if (comp4 != NULL) {
9755 bcopy(path, buf, slen);
9756 buf[slen] = '\0';
9757 (void) str_to_utf8(buf, &comp4[ncomp]);
9758 }
9759
9760 ncomp++; /* 1 valid component */
9761 path = nxtc + 1;
9762 }
9763 if (*nxtc == '\0' || *nxtc == '\n')
9764 break;
9765 }
9766
9767 return (ncomp);
9768 }
9769
9770 /*
9771 * There is a user-level copy of this routine in ref_subr.c.
9772 * Changes should be kept in sync.
9773 */
9774 static int
9775 make_pathname4(char *path, pathname4 *pathname)
9776 {
9777 int ncomp;
9778 component4 *comp4;
9779
9780 if (pathname == NULL)
9781 return (0);
9782
9783 if (path == NULL) {
9784 pathname->pathname4_val = NULL;
9785 pathname->pathname4_len = 0;
9786 return (0);
9787 }
9788
9789 /* count number of components to alloc buffer */
9790 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9791 pathname->pathname4_val = NULL;
9792 pathname->pathname4_len = 0;
9793 return (0);
9794 }
9795 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9796
9797 /* copy components into allocated buffer */
9798 ncomp = nfs4_create_components(path, comp4);
9799
9800 pathname->pathname4_val = comp4;
9801 pathname->pathname4_len = ncomp;
9802
9803 return (ncomp);
9804 }
9805
9806 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9807
9808 fs_locations4 *
9809 fetch_referral(vnode_t *vp, cred_t *cr)
9810 {
9811 nvlist_t *nvl;
9812 char *stype, *sdata;
9813 fs_locations4 *result;
9814 char buf[1024];
9815 size_t bufsize;
9816 XDR xdr;
9817 int err;
9818
9819 /*
9820 * Check attrs to ensure it's a reparse point
9821 */
9822 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9823 return (NULL);
9824
9825 /*
9826 * Look for an NFS record and get the type and data
9827 */
9828 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9829 return (NULL);
9830
9831 /*
9832 * With the type and data, upcall to get the referral
9833 */
9834 bufsize = sizeof (buf);
9835 bzero(buf, sizeof (buf));
9836 err = reparse_kderef((const char *)stype, (const char *)sdata,
9837 buf, &bufsize);
9838 reparse_free(nvl);
9839
9840 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9841 char *, stype, char *, sdata, char *, buf, int, err);
9842 if (err) {
9843 cmn_err(CE_NOTE,
9844 "reparsed daemon not running: unable to get referral (%d)",
9845 err);
9846 return (NULL);
9847 }
9848
9849 /*
9850 * We get an XDR'ed record back from the kderef call
9851 */
9852 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9853 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9854 err = xdr_fs_locations4(&xdr, result);
9855 XDR_DESTROY(&xdr);
9856 if (err != TRUE) {
9857 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9858 int, err);
9859 return (NULL);
9860 }
9861
9862 /*
9863 * Look at path to recover fs_root, ignoring the leading '/'
9864 */
9865 (void) make_pathname4(vp->v_path, &result->fs_root);
9866
9867 return (result);
9868 }
9869
9870 char *
9871 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9872 {
9873 fs_locations4 *fsl;
9874 fs_location4 *fs;
9875 char *server, *path, *symbuf;
9876 static char *prefix = "/net/";
9877 int i, size, npaths;
9878 uint_t len;
9879
9880 /* Get the referral */
9881 if ((fsl = fetch_referral(vp, cr)) == NULL)
9882 return (NULL);
9883
9884 /* Deal with only the first location and first server */
9885 fs = &fsl->locations_val[0];
9886 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9887 if (server == NULL) {
9888 rfs4_free_fs_locations4(fsl);
9889 kmem_free(fsl, sizeof (fs_locations4));
9890 return (NULL);
9891 }
9892
9893 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9894 size = strlen(prefix) + len;
9895 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9896 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9897
9898 /* Allocate the symlink buffer and fill it */
9899 symbuf = kmem_zalloc(size, KM_SLEEP);
9900 (void) strcat(symbuf, prefix);
9901 (void) strcat(symbuf, server);
9902 kmem_free(server, len);
9903
9904 npaths = 0;
9905 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9906 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9907 if (path == NULL)
9908 continue;
9909 (void) strcat(symbuf, "/");
9910 (void) strcat(symbuf, path);
9911 npaths++;
9912 kmem_free(path, len);
9913 }
9914
9915 rfs4_free_fs_locations4(fsl);
9916 kmem_free(fsl, sizeof (fs_locations4));
9917
9918 if (strsz != NULL)
9919 *strsz = size;
9920 return (symbuf);
9921 }
9922
9923 /*
9924 * Check to see if we have a downrev Solaris client, so that we
9925 * can send it a symlink instead of a referral.
9926 */
9927 int
9928 client_is_downrev(struct svc_req *req)
9929 {
9930 struct sockaddr *ca;
9931 rfs4_clntip_t *ci;
9932 bool_t create = FALSE;
9933 int is_downrev;
9934
9935 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9936 ASSERT(ca);
9937 ci = rfs4_find_clntip(ca, &create);
9938 if (ci == NULL)
9939 return (0);
9940 is_downrev = ci->ri_no_referrals;
9941 rfs4_dbe_rele(ci->ri_dbe);
9942 return (is_downrev);
9943 }
9944
9945 /*
9946 * Do the main work of handling HA-NFSv4 Resource Group failover on
9947 * Sun Cluster.
9948 * We need to detect whether any RG admin paths have been added or removed,
9949 * and adjust resources accordingly.
9950 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9951 * order to scale, the list and array of paths need to be held in more
9952 * suitable data structures.
9953 */
9954 static void
9955 hanfsv4_failover(nfs4_srv_t *nsrv4)
9956 {
9957 int i, start_grace, numadded_paths = 0;
9958 char **added_paths = NULL;
9959 rfs4_dss_path_t *dss_path;
9960
9961 /*
9962 * Note: currently, dss_pathlist cannot be NULL, since
9963 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9964 * make the latter dynamically specified too, the following will
9965 * need to be adjusted.
9966 */
9967
9968 /*
9969 * First, look for removed paths: RGs that have been failed-over
9970 * away from this node.
9971 * Walk the "currently-serving" dss_pathlist and, for each
9972 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9973 * from nfsd. If not, that RG path has been removed.
9974 *
9975 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9976 * any duplicates.
9977 */
9978 dss_path = nsrv4->dss_pathlist;
9979 do {
9980 int found = 0;
9981 char *path = dss_path->path;
9982
9983 /* used only for non-HA so may not be removed */
9984 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9985 dss_path = dss_path->next;
9986 continue;
9987 }
9988
9989 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9990 int cmpret;
9991 char *newpath = rfs4_dss_newpaths[i];
9992
9993 /*
9994 * Since nfsd has sorted rfs4_dss_newpaths for us,
9995 * once the return from strcmp is negative we know
9996 * we've passed the point where "path" should be,
9997 * and can stop searching: "path" has been removed.
9998 */
9999 cmpret = strcmp(path, newpath);
10000 if (cmpret < 0)
10001 break;
10002 if (cmpret == 0) {
10003 found = 1;
10004 break;
10005 }
10006 }
10007
10008 if (found == 0) {
10009 unsigned index = dss_path->index;
10010 rfs4_servinst_t *sip = dss_path->sip;
10011 rfs4_dss_path_t *path_next = dss_path->next;
10012
10013 /*
10014 * This path has been removed.
10015 * We must clear out the servinst reference to
10016 * it, since it's now owned by another
10017 * node: we should not attempt to touch it.
10018 */
10019 ASSERT(dss_path == sip->dss_paths[index]);
10020 sip->dss_paths[index] = NULL;
10021
10022 /* remove from "currently-serving" list, and destroy */
10023 remque(dss_path);
10024 /* allow for NUL */
10025 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10026 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10027
10028 dss_path = path_next;
10029 } else {
10030 /* path was found; not removed */
10031 dss_path = dss_path->next;
10032 }
10033 } while (dss_path != nsrv4->dss_pathlist);
10034
10035 /*
10036 * Now, look for added paths: RGs that have been failed-over
10037 * to this node.
10038 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10039 * for each path, check if it is on the "currently-serving"
10040 * dss_pathlist. If not, that RG path has been added.
10041 *
10042 * Note: we don't do duplicate detection here; nfsd does that for us.
10043 *
10044 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10045 * an upper bound for the size needed for added_paths[numadded_paths].
10046 */
10047
10048 /* probably more space than we need, but guaranteed to be enough */
10049 if (rfs4_dss_numnewpaths > 0) {
10050 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10051 added_paths = kmem_zalloc(sz, KM_SLEEP);
10052 }
10053
10054 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10055 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10056 int found = 0;
10057 char *newpath = rfs4_dss_newpaths[i];
10058
10059 dss_path = nsrv4->dss_pathlist;
10060 do {
10061 char *path = dss_path->path;
10062
10063 /* used only for non-HA */
10064 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10065 dss_path = dss_path->next;
10066 continue;
10067 }
10068
10069 if (strncmp(path, newpath, strlen(path)) == 0) {
10070 found = 1;
10071 break;
10072 }
10073
10074 dss_path = dss_path->next;
10075 } while (dss_path != nsrv4->dss_pathlist);
10076
10077 if (found == 0) {
10078 added_paths[numadded_paths] = newpath;
10079 numadded_paths++;
10080 }
10081 }
10082
10083 /* did we find any added paths? */
10084 if (numadded_paths > 0) {
10085
10086 /* create a new server instance, and start its grace period */
10087 start_grace = 1;
10088 /* CSTYLED */
10089 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10090
10091 /* read in the stable storage state from these paths */
10092 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10093
10094 /*
10095 * Multiple failovers during a grace period will cause
10096 * clients of the same resource group to be partitioned
10097 * into different server instances, with different
10098 * grace periods. Since clients of the same resource
10099 * group must be subject to the same grace period,
10100 * we need to reset all currently active grace periods.
10101 */
10102 rfs4_grace_reset_all(nsrv4);
10103 }
10104
10105 if (rfs4_dss_numnewpaths > 0)
10106 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10107 }