1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 */
36
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/cred.h>
41 #include <sys/buf.h>
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/sysmacros.h>
48 #include <sys/statvfs.h>
49 #include <sys/kmem.h>
50 #include <sys/dirent.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/systeminfo.h>
54 #include <sys/flock.h>
55 #include <sys/pathname.h>
56 #include <sys/nbmlock.h>
57 #include <sys/share.h>
58 #include <sys/atomic.h>
59 #include <sys/policy.h>
60 #include <sys/fem.h>
61 #include <sys/sdt.h>
62 #include <sys/ddi.h>
63 #include <sys/zone.h>
64
65 #include <fs/fs_reparse.h>
66
67 #include <rpc/types.h>
68 #include <rpc/auth.h>
69 #include <rpc/rpcsec_gss.h>
70 #include <rpc/svc.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfssys.h>
74 #include <nfs/export.h>
75 #include <nfs/nfs_cmd.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_drc.h>
79
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82
83 #include <inet/common.h>
84 #include <inet/ip.h>
85 #include <inet/ip6.h>
86
87 #include <sys/tsol/label.h>
88 #include <sys/tsol/tndb.h>
89
90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 extern struct svc_ops rdma_svc_ops;
95 extern int nfs_loaned_buffers;
96 /* End of Tunables */
97
98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99
100 /*
101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 */
103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104
105 /*
106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 * maxcount that isn't large enough to hold the smallest possible
109 * XDR encoded dirent.
110 *
111 * sizeof cookie (8 bytes) +
112 * sizeof name_len (4 bytes) +
113 * sizeof smallest (padded) name (4 bytes) +
114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 * sizeof attrlist4_len (4 bytes) +
116 * sizeof next boolean (4 bytes)
117 *
118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 * the smallest possible entry4 (assumes no attrs requested).
120 * sizeof nfsstat4 (4 bytes) +
121 * sizeof verifier4 (8 bytes) +
122 * sizeof entry4list bool (4 bytes) +
123 * sizeof entry4 (36 bytes) +
124 * sizeof eof bool (4 bytes)
125 *
126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 * required for a given name length. MAXNAMELEN is the maximum
130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 * to hold ., .., and the largest possible solaris dirent64.
134 */
135 #define RFS4_MINLEN_ENTRY4 36
136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 #define RFS4_MINLEN_RDDIR_BUF \
138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139
140 /*
141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 * but the dirents UFS gives us are already padded to 8, so just take
143 * what we're given. Dircount is only a hint anyway. Currently the
144 * solaris kernel is ASCII only, so there's no point in calling the
145 * UTF8 functions.
146 *
147 * dirent64: named padded to provide 8 byte struct alignment
148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 *
150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 *
152 */
153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155
156
157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
158
159 u_longlong_t nfs4_srv_caller_id;
160 uint_t nfs4_srv_vkey = 0;
161
162 void rfs4_init_compound_state(struct compound_state *);
163
164 static void nullfree(caddr_t);
165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 struct compound_state *);
175 static void rfs4_op_create_free(nfs_resop4 *resop);
176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 struct svc_req *, struct compound_state *);
180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 struct compound_state *);
182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 struct compound_state *);
185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void lock_denied_free(nfs_resop4 *);
193 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 struct compound_state *);
195 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 struct compound_state *);
201 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
202 struct svc_req *req, struct compound_state *cs);
203 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 struct compound_state *);
205 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 struct compound_state *);
207 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
208 struct svc_req *, struct compound_state *);
209 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
210 struct svc_req *, struct compound_state *);
211 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 struct compound_state *);
213 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 struct compound_state *);
219 static void rfs4_op_read_free(nfs_resop4 *);
220 static void rfs4_op_readdir_free(nfs_resop4 *resop);
221 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 struct compound_state *);
223 static void rfs4_op_readlink_free(nfs_resop4 *);
224 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
225 struct svc_req *, struct compound_state *);
226 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 struct compound_state *);
228 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 struct compound_state *);
242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *, struct compound_state *);
244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
245 struct svc_req *req, struct compound_state *);
246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
247 struct compound_state *);
248 static void rfs4_op_secinfo_free(nfs_resop4 *);
249
250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
251 struct svc_req *);
252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
254
255
256 /*
257 * translation table for attrs
258 */
259 struct nfs4_ntov_table {
260 union nfs4_attr_u *na;
261 uint8_t amap[NFS4_MAXNUM_ATTRS];
262 int attrcnt;
263 bool_t vfsstat;
264 };
265
266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
268 struct nfs4_svgetit_arg *sargp);
269
270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
273
274 static void hanfsv4_failover(nfs4_srv_t *);
275
276 fem_t *deleg_rdops;
277 fem_t *deleg_wrops;
278
279 /*
280 * NFS4 op dispatch table
281 */
282
283 struct rfsv4disp {
284 void (*dis_proc)(); /* proc to call */
285 void (*dis_resfree)(); /* frees space allocated by proc */
286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 };
288
289 static struct rfsv4disp rfsv4disptab[] = {
290 /*
291 * NFS VERSION 4
292 */
293
294 /* RFS_NULL = 0 */
295 {rfs4_op_illegal, nullfree, 0},
296
297 /* UNUSED = 1 */
298 {rfs4_op_illegal, nullfree, 0},
299
300 /* UNUSED = 2 */
301 {rfs4_op_illegal, nullfree, 0},
302
303 /* OP_ACCESS = 3 */
304 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
305
306 /* OP_CLOSE = 4 */
307 {rfs4_op_close, nullfree, 0},
308
309 /* OP_COMMIT = 5 */
310 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
311
312 /* OP_CREATE = 6 */
313 {rfs4_op_create, nullfree, 0},
314
315 /* OP_DELEGPURGE = 7 */
316 {rfs4_op_delegpurge, nullfree, 0},
317
318 /* OP_DELEGRETURN = 8 */
319 {rfs4_op_delegreturn, nullfree, 0},
320
321 /* OP_GETATTR = 9 */
322 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
323
324 /* OP_GETFH = 10 */
325 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
326
327 /* OP_LINK = 11 */
328 {rfs4_op_link, nullfree, 0},
329
330 /* OP_LOCK = 12 */
331 {rfs4_op_lock, lock_denied_free, 0},
332
333 /* OP_LOCKT = 13 */
334 {rfs4_op_lockt, lock_denied_free, 0},
335
336 /* OP_LOCKU = 14 */
337 {rfs4_op_locku, nullfree, 0},
338
339 /* OP_LOOKUP = 15 */
340 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
341
342 /* OP_LOOKUPP = 16 */
343 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
344
345 /* OP_NVERIFY = 17 */
346 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
347
348 /* OP_OPEN = 18 */
349 {rfs4_op_open, rfs4_free_reply, 0},
350
351 /* OP_OPENATTR = 19 */
352 {rfs4_op_openattr, nullfree, 0},
353
354 /* OP_OPEN_CONFIRM = 20 */
355 {rfs4_op_open_confirm, nullfree, 0},
356
357 /* OP_OPEN_DOWNGRADE = 21 */
358 {rfs4_op_open_downgrade, nullfree, 0},
359
360 /* OP_OPEN_PUTFH = 22 */
361 {rfs4_op_putfh, nullfree, RPC_ALL},
362
363 /* OP_PUTPUBFH = 23 */
364 {rfs4_op_putpubfh, nullfree, RPC_ALL},
365
366 /* OP_PUTROOTFH = 24 */
367 {rfs4_op_putrootfh, nullfree, RPC_ALL},
368
369 /* OP_READ = 25 */
370 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
371
372 /* OP_READDIR = 26 */
373 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
374
375 /* OP_READLINK = 27 */
376 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
377
378 /* OP_REMOVE = 28 */
379 {rfs4_op_remove, nullfree, 0},
380
381 /* OP_RENAME = 29 */
382 {rfs4_op_rename, nullfree, 0},
383
384 /* OP_RENEW = 30 */
385 {rfs4_op_renew, nullfree, 0},
386
387 /* OP_RESTOREFH = 31 */
388 {rfs4_op_restorefh, nullfree, RPC_ALL},
389
390 /* OP_SAVEFH = 32 */
391 {rfs4_op_savefh, nullfree, RPC_ALL},
392
393 /* OP_SECINFO = 33 */
394 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
395
396 /* OP_SETATTR = 34 */
397 {rfs4_op_setattr, nullfree, 0},
398
399 /* OP_SETCLIENTID = 35 */
400 {rfs4_op_setclientid, nullfree, 0},
401
402 /* OP_SETCLIENTID_CONFIRM = 36 */
403 {rfs4_op_setclientid_confirm, nullfree, 0},
404
405 /* OP_VERIFY = 37 */
406 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
407
408 /* OP_WRITE = 38 */
409 {rfs4_op_write, nullfree, 0},
410
411 /* OP_RELEASE_LOCKOWNER = 39 */
412 {rfs4_op_release_lockowner, nullfree, 0},
413 };
414
415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
416
417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
418
419 #ifdef DEBUG
420
421 int rfs4_fillone_debug = 0;
422 int rfs4_no_stub_access = 1;
423 int rfs4_rddir_debug = 0;
424
425 static char *rfs4_op_string[] = {
426 "rfs4_op_null",
427 "rfs4_op_1 unused",
428 "rfs4_op_2 unused",
429 "rfs4_op_access",
430 "rfs4_op_close",
431 "rfs4_op_commit",
432 "rfs4_op_create",
433 "rfs4_op_delegpurge",
434 "rfs4_op_delegreturn",
435 "rfs4_op_getattr",
436 "rfs4_op_getfh",
437 "rfs4_op_link",
438 "rfs4_op_lock",
439 "rfs4_op_lockt",
440 "rfs4_op_locku",
441 "rfs4_op_lookup",
442 "rfs4_op_lookupp",
443 "rfs4_op_nverify",
444 "rfs4_op_open",
445 "rfs4_op_openattr",
446 "rfs4_op_open_confirm",
447 "rfs4_op_open_downgrade",
448 "rfs4_op_putfh",
449 "rfs4_op_putpubfh",
450 "rfs4_op_putrootfh",
451 "rfs4_op_read",
452 "rfs4_op_readdir",
453 "rfs4_op_readlink",
454 "rfs4_op_remove",
455 "rfs4_op_rename",
456 "rfs4_op_renew",
457 "rfs4_op_restorefh",
458 "rfs4_op_savefh",
459 "rfs4_op_secinfo",
460 "rfs4_op_setattr",
461 "rfs4_op_setclientid",
462 "rfs4_op_setclient_confirm",
463 "rfs4_op_verify",
464 "rfs4_op_write",
465 "rfs4_op_release_lockowner",
466 "rfs4_op_illegal"
467 };
468 #endif
469
470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
471
472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
473
474 extern void rfs4_free_fs_locations4(fs_locations4 *);
475
476 #ifdef nextdp
477 #undef nextdp
478 #endif
479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
480
481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
489 NULL, NULL
490 };
491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
493 VOPNAME_READ, { .femop_read = deleg_wr_read },
494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
500 NULL, NULL
501 };
502
503 nfs4_srv_t *
504 nfs4_get_srv(void)
505 {
506 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
507 nfs4_srv_t *srv = ng->nfs4_srv;
508 ASSERT(srv != NULL);
509 return (srv);
510 }
511
512 void
513 rfs4_srv_zone_init(nfs_globals_t *ng)
514 {
515 nfs4_srv_t *nsrv4;
516 timespec32_t verf;
517
518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
519
520 /*
521 * The following algorithm attempts to find a unique verifier
522 * to be used as the write verifier returned from the server
523 * to the client. It is important that this verifier change
524 * whenever the server reboots. Of secondary importance, it
525 * is important for the verifier to be unique between two
526 * different servers.
527 *
528 * Thus, an attempt is made to use the system hostid and the
529 * current time in seconds when the nfssrv kernel module is
530 * loaded. It is assumed that an NFS server will not be able
531 * to boot and then to reboot in less than a second. If the
532 * hostid has not been set, then the current high resolution
533 * time is used. This will ensure different verifiers each
534 * time the server reboots and minimize the chances that two
535 * different servers will have the same verifier.
536 * XXX - this is broken on LP64 kernels.
537 */
538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
539 if (verf.tv_sec != 0) {
540 verf.tv_nsec = gethrestime_sec();
541 } else {
542 timespec_t tverf;
543
544 gethrestime(&tverf);
545 verf.tv_sec = (time_t)tverf.tv_sec;
546 verf.tv_nsec = tverf.tv_nsec;
547 }
548 nsrv4->write4verf = *(uint64_t *)&verf;
549
550 /* Used to manage create/destroy of server state */
551 nsrv4->nfs4_server_state = NULL;
552 nsrv4->nfs4_cur_servinst = NULL;
553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
558
559 ng->nfs4_srv = nsrv4;
560 }
561
562 void
563 rfs4_srv_zone_fini(nfs_globals_t *ng)
564 {
565 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
566
567 ng->nfs4_srv = NULL;
568
569 mutex_destroy(&nsrv4->deleg_lock);
570 mutex_destroy(&nsrv4->state_lock);
571 mutex_destroy(&nsrv4->servinst_lock);
572 rw_destroy(&nsrv4->deleg_policy_lock);
573
574 kmem_free(nsrv4, sizeof (*nsrv4));
575 }
576
577 void
578 rfs4_srvrinit(void)
579 {
580 extern void rfs4_attr_init();
581
582 rfs4_attr_init();
583
584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
585 rfs4_disable_delegation();
586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
587 &deleg_wrops) != 0) {
588 rfs4_disable_delegation();
589 fem_free(deleg_rdops);
590 }
591
592 nfs4_srv_caller_id = fs_new_caller_id();
593 lockt_sysid = lm_alloc_sysidt();
594 vsd_create(&nfs4_srv_vkey, NULL);
595 rfs4_state_g_init();
596 }
597
598 void
599 rfs4_srvrfini(void)
600 {
601 if (lockt_sysid != LM_NOSYSID) {
602 lm_free_sysidt(lockt_sysid);
603 lockt_sysid = LM_NOSYSID;
604 }
605
606 rfs4_state_g_fini();
607
608 fem_free(deleg_rdops);
609 fem_free(deleg_wrops);
610 }
611
612 void
613 rfs4_do_server_start(int server_upordown,
614 int srv_delegation, int cluster_booted)
615 {
616 nfs4_srv_t *nsrv4 = nfs4_get_srv();
617
618 /* Is this a warm start? */
619 if (server_upordown == NFS_SERVER_QUIESCED) {
620 cmn_err(CE_NOTE, "nfs4_srv: "
621 "server was previously quiesced; "
622 "existing NFSv4 state will be re-used");
623
624 /*
625 * HA-NFSv4: this is also the signal
626 * that a Resource Group failover has
627 * occurred.
628 */
629 if (cluster_booted)
630 hanfsv4_failover(nsrv4);
631 } else {
632 /* Cold start */
633 nsrv4->rfs4_start_time = 0;
634 rfs4_state_zone_init(nsrv4);
635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
636 nfs4_drc_hash);
637
638 /*
639 * The nfsd service was started with the -s option
640 * we need to pull in any state from the paths indicated.
641 */
642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
643 /* read in the stable storage state from these paths */
644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
645 rfs4_dss_newpaths);
646 }
647 }
648
649 /* Check if delegation is to be enabled */
650 if (srv_delegation != FALSE)
651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
652 }
653
654 void
655 rfs4_init_compound_state(struct compound_state *cs)
656 {
657 bzero(cs, sizeof (*cs));
658 cs->cont = TRUE;
659 cs->access = CS_ACCESS_DENIED;
660 cs->deleg = FALSE;
661 cs->mandlock = FALSE;
662 cs->fh.nfs_fh4_val = cs->fhbuf;
663 }
664
665 void
666 rfs4_grace_start(rfs4_servinst_t *sip)
667 {
668 rw_enter(&sip->rwlock, RW_WRITER);
669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
670 sip->grace_period = rfs4_grace_period;
671 rw_exit(&sip->rwlock);
672 }
673
674 /*
675 * returns true if the instance's grace period has never been started
676 */
677 int
678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
679 {
680 time_t start_time;
681
682 rw_enter(&sip->rwlock, RW_READER);
683 start_time = sip->start_time;
684 rw_exit(&sip->rwlock);
685
686 return (start_time == 0);
687 }
688
689 /*
690 * Indicates if server instance is within the
691 * grace period.
692 */
693 int
694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
695 {
696 time_t grace_expiry;
697
698 rw_enter(&sip->rwlock, RW_READER);
699 grace_expiry = sip->start_time + sip->grace_period;
700 rw_exit(&sip->rwlock);
701
702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
703 }
704
705 int
706 rfs4_clnt_in_grace(rfs4_client_t *cp)
707 {
708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
709
710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
711 }
712
713 /*
714 * reset all currently active grace periods
715 */
716 void
717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
718 {
719 rfs4_servinst_t *sip;
720
721 mutex_enter(&nsrv4->servinst_lock);
722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
723 if (rfs4_servinst_in_grace(sip))
724 rfs4_grace_start(sip);
725 mutex_exit(&nsrv4->servinst_lock);
726 }
727
728 /*
729 * start any new instances' grace periods
730 */
731 void
732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
733 {
734 rfs4_servinst_t *sip;
735
736 mutex_enter(&nsrv4->servinst_lock);
737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
738 if (rfs4_servinst_grace_new(sip))
739 rfs4_grace_start(sip);
740 mutex_exit(&nsrv4->servinst_lock);
741 }
742
743 static rfs4_dss_path_t *
744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
745 char *path, unsigned index)
746 {
747 size_t len;
748 rfs4_dss_path_t *dss_path;
749
750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
751
752 /*
753 * Take a copy of the string, since the original may be overwritten.
754 * Sadly, no strdup() in the kernel.
755 */
756 /* allow for NUL */
757 len = strlen(path) + 1;
758 dss_path->path = kmem_alloc(len, KM_SLEEP);
759 (void) strlcpy(dss_path->path, path, len);
760
761 /* associate with servinst */
762 dss_path->sip = sip;
763 dss_path->index = index;
764
765 /*
766 * Add to list of served paths.
767 * No locking required, as we're only ever called at startup.
768 */
769 if (nsrv4->dss_pathlist == NULL) {
770 /* this is the first dss_path_t */
771
772 /* needed for insque/remque */
773 dss_path->next = dss_path->prev = dss_path;
774
775 nsrv4->dss_pathlist = dss_path;
776 } else {
777 insque(dss_path, nsrv4->dss_pathlist);
778 }
779
780 return (dss_path);
781 }
782
783 /*
784 * Create a new server instance, and make it the currently active instance.
785 * Note that starting the grace period too early will reduce the clients'
786 * recovery window.
787 */
788 void
789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
790 int dss_npaths, char **dss_paths)
791 {
792 unsigned i;
793 rfs4_servinst_t *sip;
794 rfs4_oldstate_t *oldstate;
795
796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
798
799 sip->start_time = (time_t)0;
800 sip->grace_period = (time_t)0;
801 sip->next = NULL;
802 sip->prev = NULL;
803
804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
805 /*
806 * This initial dummy entry is required to setup for insque/remque.
807 * It must be skipped over whenever the list is traversed.
808 */
809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
810 /* insque/remque require initial list entry to be self-terminated */
811 oldstate->next = oldstate;
812 oldstate->prev = oldstate;
813 sip->oldstate = oldstate;
814
815
816 sip->dss_npaths = dss_npaths;
817 sip->dss_paths = kmem_alloc(dss_npaths *
818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
819
820 for (i = 0; i < dss_npaths; i++) {
821 sip->dss_paths[i] =
822 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
823 }
824
825 mutex_enter(&nsrv4->servinst_lock);
826 if (nsrv4->nfs4_cur_servinst != NULL) {
827 /* add to linked list */
828 sip->prev = nsrv4->nfs4_cur_servinst;
829 nsrv4->nfs4_cur_servinst->next = sip;
830 }
831 if (start_grace)
832 rfs4_grace_start(sip);
833 /* make the new instance "current" */
834 nsrv4->nfs4_cur_servinst = sip;
835
836 mutex_exit(&nsrv4->servinst_lock);
837 }
838
839 /*
840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
841 * all instances directly.
842 */
843 void
844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
845 {
846 rfs4_servinst_t *sip, *prev, *current;
847 #ifdef DEBUG
848 int n = 0;
849 #endif
850
851 mutex_enter(&nsrv4->servinst_lock);
852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
853 current = nsrv4->nfs4_cur_servinst;
854 nsrv4->nfs4_cur_servinst = NULL;
855 for (sip = current; sip != NULL; sip = prev) {
856 prev = sip->prev;
857 rw_destroy(&sip->rwlock);
858 if (sip->oldstate)
859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
860 if (sip->dss_paths) {
861 int i = sip->dss_npaths;
862
863 while (i > 0) {
864 i--;
865 if (sip->dss_paths[i] != NULL) {
866 char *path = sip->dss_paths[i]->path;
867
868 if (path != NULL) {
869 kmem_free(path,
870 strlen(path) + 1);
871 }
872 kmem_free(sip->dss_paths[i],
873 sizeof (rfs4_dss_path_t));
874 }
875 }
876 kmem_free(sip->dss_paths,
877 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
878 }
879 kmem_free(sip, sizeof (rfs4_servinst_t));
880 #ifdef DEBUG
881 n++;
882 #endif
883 }
884 mutex_exit(&nsrv4->servinst_lock);
885 }
886
887 /*
888 * Assign the current server instance to a client_t.
889 * Should be called with cp->rc_dbe held.
890 */
891 void
892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
893 rfs4_servinst_t *sip)
894 {
895 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
896
897 /*
898 * The lock ensures that if the current instance is in the process
899 * of changing, we will see the new one.
900 */
901 mutex_enter(&nsrv4->servinst_lock);
902 cp->rc_server_instance = sip;
903 mutex_exit(&nsrv4->servinst_lock);
904 }
905
906 rfs4_servinst_t *
907 rfs4_servinst(rfs4_client_t *cp)
908 {
909 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
910
911 return (cp->rc_server_instance);
912 }
913
914 /* ARGSUSED */
915 static void
916 nullfree(caddr_t resop)
917 {
918 }
919
920 /*
921 * This is a fall-through for invalid or not implemented (yet) ops
922 */
923 /* ARGSUSED */
924 static void
925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
926 struct compound_state *cs)
927 {
928 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
929 }
930
931 /*
932 * Check if the security flavor, nfsnum, is in the flavor_list.
933 */
934 bool_t
935 in_flavor_list(int nfsnum, int *flavor_list, int count)
936 {
937 int i;
938
939 for (i = 0; i < count; i++) {
940 if (nfsnum == flavor_list[i])
941 return (TRUE);
942 }
943 return (FALSE);
944 }
945
946 /*
947 * Used by rfs4_op_secinfo to get the security information from the
948 * export structure associated with the component.
949 */
950 /* ARGSUSED */
951 static nfsstat4
952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
953 {
954 int error, different_export = 0;
955 vnode_t *dvp, *vp;
956 struct exportinfo *exi = NULL;
957 fid_t fid;
958 uint_t count, i;
959 secinfo4 *resok_val;
960 struct secinfo *secp;
961 seconfig_t *si;
962 bool_t did_traverse = FALSE;
963 int dotdot, walk;
964 nfs_export_t *ne = nfs_get_export();
965
966 dvp = cs->vp;
967 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
968
969 /*
970 * If dotdotting, then need to check whether it's above the
971 * root of a filesystem, or above an export point.
972 */
973 if (dotdot) {
974
975 /*
976 * If dotdotting at the root of a filesystem, then
977 * need to traverse back to the mounted-on filesystem
978 * and do the dotdot lookup there.
979 */
980 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
981
982 /*
983 * If at the system root, then can
984 * go up no further.
985 */
986 if (VN_CMP(dvp, ZONE_ROOTVP()))
987 return (puterrno4(ENOENT));
988
989 /*
990 * Traverse back to the mounted-on filesystem
991 */
992 dvp = untraverse(cs->vp);
993
994 /*
995 * Set the different_export flag so we remember
996 * to pick up a new exportinfo entry for
997 * this new filesystem.
998 */
999 different_export = 1;
1000 } else {
1001
1002 /*
1003 * If dotdotting above an export point then set
1004 * the different_export to get new export info.
1005 */
1006 different_export = nfs_exported(cs->exi, cs->vp);
1007 }
1008 }
1009
1010 /*
1011 * Get the vnode for the component "nm".
1012 */
1013 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1014 NULL, NULL, NULL);
1015 if (error)
1016 return (puterrno4(error));
1017
1018 /*
1019 * If the vnode is in a pseudo filesystem, or if the security flavor
1020 * used in the request is valid but not an explicitly shared flavor,
1021 * or the access bit indicates that this is a limited access,
1022 * check whether this vnode is visible.
1023 */
1024 if (!different_export &&
1025 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1026 cs->access & CS_ACCESS_LIMITED)) {
1027 if (! nfs_visible(cs->exi, vp, &different_export)) {
1028 VN_RELE(vp);
1029 return (puterrno4(ENOENT));
1030 }
1031 }
1032
1033 /*
1034 * If it's a mountpoint, then traverse it.
1035 */
1036 if (vn_ismntpt(vp)) {
1037 if ((error = traverse(&vp)) != 0) {
1038 VN_RELE(vp);
1039 return (puterrno4(error));
1040 }
1041 /* remember that we had to traverse mountpoint */
1042 did_traverse = TRUE;
1043 different_export = 1;
1044 } else if (vp->v_vfsp != dvp->v_vfsp) {
1045 /*
1046 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1047 * then vp is probably an LOFS object. We don't need the
1048 * realvp, we just need to know that we might have crossed
1049 * a server fs boundary and need to call checkexport4.
1050 * (LOFS lookup hides server fs mountpoints, and actually calls
1051 * traverse)
1052 */
1053 different_export = 1;
1054 }
1055
1056 /*
1057 * Get the export information for it.
1058 */
1059 if (different_export) {
1060
1061 bzero(&fid, sizeof (fid));
1062 fid.fid_len = MAXFIDSZ;
1063 error = vop_fid_pseudo(vp, &fid);
1064 if (error) {
1065 VN_RELE(vp);
1066 return (puterrno4(error));
1067 }
1068
1069 if (dotdot)
1070 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1071 else
1072 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1073
1074 if (exi == NULL) {
1075 if (did_traverse == TRUE) {
1076 /*
1077 * If this vnode is a mounted-on vnode,
1078 * but the mounted-on file system is not
1079 * exported, send back the secinfo for
1080 * the exported node that the mounted-on
1081 * vnode lives in.
1082 */
1083 exi = cs->exi;
1084 } else {
1085 VN_RELE(vp);
1086 return (puterrno4(EACCES));
1087 }
1088 }
1089 } else {
1090 exi = cs->exi;
1091 }
1092 ASSERT(exi != NULL);
1093
1094
1095 /*
1096 * Create the secinfo result based on the security information
1097 * from the exportinfo structure (exi).
1098 *
1099 * Return all flavors for a pseudo node.
1100 * For a real export node, return the flavor that the client
1101 * has access with.
1102 */
1103 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1104 if (PSEUDO(exi)) {
1105 count = exi->exi_export.ex_seccnt; /* total sec count */
1106 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1107 secp = exi->exi_export.ex_secinfo;
1108
1109 for (i = 0; i < count; i++) {
1110 si = &secp[i].s_secinfo;
1111 resok_val[i].flavor = si->sc_rpcnum;
1112 if (resok_val[i].flavor == RPCSEC_GSS) {
1113 rpcsec_gss_info *info;
1114
1115 info = &resok_val[i].flavor_info;
1116 info->qop = si->sc_qop;
1117 info->service = (rpc_gss_svc_t)si->sc_service;
1118
1119 /* get oid opaque data */
1120 info->oid.sec_oid4_len =
1121 si->sc_gss_mech_type->length;
1122 info->oid.sec_oid4_val = kmem_alloc(
1123 si->sc_gss_mech_type->length, KM_SLEEP);
1124 bcopy(
1125 si->sc_gss_mech_type->elements,
1126 info->oid.sec_oid4_val,
1127 info->oid.sec_oid4_len);
1128 }
1129 }
1130 resp->SECINFO4resok_len = count;
1131 resp->SECINFO4resok_val = resok_val;
1132 } else {
1133 int ret_cnt = 0, k = 0;
1134 int *flavor_list;
1135
1136 count = exi->exi_export.ex_seccnt; /* total sec count */
1137 secp = exi->exi_export.ex_secinfo;
1138
1139 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1140 /* find out which flavors to return */
1141 for (i = 0; i < count; i ++) {
1142 int access, flavor, perm;
1143
1144 flavor = secp[i].s_secinfo.sc_nfsnum;
1145 perm = secp[i].s_flags;
1146
1147 access = nfsauth4_secinfo_access(exi, cs->req,
1148 flavor, perm, cs->basecr);
1149
1150 if (! (access & NFSAUTH_DENIED) &&
1151 ! (access & NFSAUTH_WRONGSEC)) {
1152 flavor_list[ret_cnt] = flavor;
1153 ret_cnt++;
1154 }
1155 }
1156
1157 /* Create the returning SECINFO value */
1158 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1159
1160 for (i = 0; i < count; i++) {
1161 /*
1162 * If the flavor is in the flavor list,
1163 * fill in resok_val.
1164 */
1165 si = &secp[i].s_secinfo;
1166 if (in_flavor_list(si->sc_nfsnum,
1167 flavor_list, ret_cnt)) {
1168 resok_val[k].flavor = si->sc_rpcnum;
1169 if (resok_val[k].flavor == RPCSEC_GSS) {
1170 rpcsec_gss_info *info;
1171
1172 info = &resok_val[k].flavor_info;
1173 info->qop = si->sc_qop;
1174 info->service = (rpc_gss_svc_t)
1175 si->sc_service;
1176
1177 /* get oid opaque data */
1178 info->oid.sec_oid4_len =
1179 si->sc_gss_mech_type->length;
1180 info->oid.sec_oid4_val = kmem_alloc(
1181 si->sc_gss_mech_type->length,
1182 KM_SLEEP);
1183 bcopy(si->sc_gss_mech_type->elements,
1184 info->oid.sec_oid4_val,
1185 info->oid.sec_oid4_len);
1186 }
1187 k++;
1188 }
1189 if (k >= ret_cnt)
1190 break;
1191 }
1192 resp->SECINFO4resok_len = ret_cnt;
1193 resp->SECINFO4resok_val = resok_val;
1194 kmem_free(flavor_list, count * sizeof (int));
1195 }
1196
1197 VN_RELE(vp);
1198 return (NFS4_OK);
1199 }
1200
1201 /*
1202 * SECINFO (Operation 33): Obtain required security information on
1203 * the component name in the format of (security-mechanism-oid, qop, service)
1204 * triplets.
1205 */
1206 /* ARGSUSED */
1207 static void
1208 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1209 struct compound_state *cs)
1210 {
1211 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1212 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1213 utf8string *utfnm = &args->name;
1214 uint_t len;
1215 char *nm;
1216 struct sockaddr *ca;
1217 char *name = NULL;
1218 nfsstat4 status = NFS4_OK;
1219
1220 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1221 SECINFO4args *, args);
1222
1223 /*
1224 * Current file handle (cfh) should have been set before getting
1225 * into this function. If not, return error.
1226 */
1227 if (cs->vp == NULL) {
1228 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1229 goto out;
1230 }
1231
1232 if (cs->vp->v_type != VDIR) {
1233 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1234 goto out;
1235 }
1236
1237 /*
1238 * Verify the component name. If failed, error out, but
1239 * do not error out if the component name is a "..".
1240 * SECINFO will return its parents secinfo data for SECINFO "..".
1241 */
1242 status = utf8_dir_verify(utfnm);
1243 if (status != NFS4_OK) {
1244 if (utfnm->utf8string_len != 2 ||
1245 utfnm->utf8string_val[0] != '.' ||
1246 utfnm->utf8string_val[1] != '.') {
1247 *cs->statusp = resp->status = status;
1248 goto out;
1249 }
1250 }
1251
1252 nm = utf8_to_str(utfnm, &len, NULL);
1253 if (nm == NULL) {
1254 *cs->statusp = resp->status = NFS4ERR_INVAL;
1255 goto out;
1256 }
1257
1258 if (len > MAXNAMELEN) {
1259 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1260 kmem_free(nm, len);
1261 goto out;
1262 }
1263
1264 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1265 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1266 MAXPATHLEN + 1);
1267
1268 if (name == NULL) {
1269 *cs->statusp = resp->status = NFS4ERR_INVAL;
1270 kmem_free(nm, len);
1271 goto out;
1272 }
1273
1274
1275 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1276
1277 if (name != nm)
1278 kmem_free(name, MAXPATHLEN + 1);
1279 kmem_free(nm, len);
1280
1281 out:
1282 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1283 SECINFO4res *, resp);
1284 }
1285
1286 /*
1287 * Free SECINFO result.
1288 */
1289 /* ARGSUSED */
1290 static void
1291 rfs4_op_secinfo_free(nfs_resop4 *resop)
1292 {
1293 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1294 int count, i;
1295 secinfo4 *resok_val;
1296
1297 /* If this is not an Ok result, nothing to free. */
1298 if (resp->status != NFS4_OK) {
1299 return;
1300 }
1301
1302 count = resp->SECINFO4resok_len;
1303 resok_val = resp->SECINFO4resok_val;
1304
1305 for (i = 0; i < count; i++) {
1306 if (resok_val[i].flavor == RPCSEC_GSS) {
1307 rpcsec_gss_info *info;
1308
1309 info = &resok_val[i].flavor_info;
1310 kmem_free(info->oid.sec_oid4_val,
1311 info->oid.sec_oid4_len);
1312 }
1313 }
1314 kmem_free(resok_val, count * sizeof (secinfo4));
1315 resp->SECINFO4resok_len = 0;
1316 resp->SECINFO4resok_val = NULL;
1317 }
1318
1319 /* ARGSUSED */
1320 static void
1321 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1322 struct compound_state *cs)
1323 {
1324 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1325 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1326 int error;
1327 vnode_t *vp;
1328 struct vattr va;
1329 int checkwriteperm;
1330 cred_t *cr = cs->cr;
1331 bslabel_t *clabel, *slabel;
1332 ts_label_t *tslabel;
1333 boolean_t admin_low_client;
1334
1335 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1336 ACCESS4args *, args);
1337
1338 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1339 if (cs->access == CS_ACCESS_DENIED) {
1340 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1341 goto out;
1342 }
1343 #endif
1344 if (cs->vp == NULL) {
1345 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1346 goto out;
1347 }
1348
1349 ASSERT(cr != NULL);
1350
1351 vp = cs->vp;
1352
1353 /*
1354 * If the file system is exported read only, it is not appropriate
1355 * to check write permissions for regular files and directories.
1356 * Special files are interpreted by the client, so the underlying
1357 * permissions are sent back to the client for interpretation.
1358 */
1359 if (rdonly4(req, cs) &&
1360 (vp->v_type == VREG || vp->v_type == VDIR))
1361 checkwriteperm = 0;
1362 else
1363 checkwriteperm = 1;
1364
1365 /*
1366 * XXX
1367 * We need the mode so that we can correctly determine access
1368 * permissions relative to a mandatory lock file. Access to
1369 * mandatory lock files is denied on the server, so it might
1370 * as well be reflected to the server during the open.
1371 */
1372 va.va_mask = AT_MODE;
1373 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1374 if (error) {
1375 *cs->statusp = resp->status = puterrno4(error);
1376 goto out;
1377 }
1378 resp->access = 0;
1379 resp->supported = 0;
1380
1381 if (is_system_labeled()) {
1382 ASSERT(req->rq_label != NULL);
1383 clabel = req->rq_label;
1384 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1385 "got client label from request(1)",
1386 struct svc_req *, req);
1387 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1388 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1389 *cs->statusp = resp->status = puterrno4(EACCES);
1390 goto out;
1391 }
1392 slabel = label2bslabel(tslabel);
1393 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1394 char *, "got server label(1) for vp(2)",
1395 bslabel_t *, slabel, vnode_t *, vp);
1396
1397 admin_low_client = B_FALSE;
1398 } else
1399 admin_low_client = B_TRUE;
1400 }
1401
1402 if (args->access & ACCESS4_READ) {
1403 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1404 if (!error && !MANDLOCK(vp, va.va_mode) &&
1405 (!is_system_labeled() || admin_low_client ||
1406 bldominates(clabel, slabel)))
1407 resp->access |= ACCESS4_READ;
1408 resp->supported |= ACCESS4_READ;
1409 }
1410 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1411 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1412 if (!error && (!is_system_labeled() || admin_low_client ||
1413 bldominates(clabel, slabel)))
1414 resp->access |= ACCESS4_LOOKUP;
1415 resp->supported |= ACCESS4_LOOKUP;
1416 }
1417 if (checkwriteperm &&
1418 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1419 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1420 if (!error && !MANDLOCK(vp, va.va_mode) &&
1421 (!is_system_labeled() || admin_low_client ||
1422 blequal(clabel, slabel)))
1423 resp->access |=
1424 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1425 resp->supported |=
1426 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1427 }
1428
1429 if (checkwriteperm &&
1430 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1431 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1432 if (!error && (!is_system_labeled() || admin_low_client ||
1433 blequal(clabel, slabel)))
1434 resp->access |= ACCESS4_DELETE;
1435 resp->supported |= ACCESS4_DELETE;
1436 }
1437 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1438 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1439 if (!error && !MANDLOCK(vp, va.va_mode) &&
1440 (!is_system_labeled() || admin_low_client ||
1441 bldominates(clabel, slabel)))
1442 resp->access |= ACCESS4_EXECUTE;
1443 resp->supported |= ACCESS4_EXECUTE;
1444 }
1445
1446 if (is_system_labeled() && !admin_low_client)
1447 label_rele(tslabel);
1448
1449 *cs->statusp = resp->status = NFS4_OK;
1450 out:
1451 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1452 ACCESS4res *, resp);
1453 }
1454
1455 /* ARGSUSED */
1456 static void
1457 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1458 struct compound_state *cs)
1459 {
1460 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1461 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1462 int error;
1463 vnode_t *vp = cs->vp;
1464 cred_t *cr = cs->cr;
1465 vattr_t va;
1466 nfs4_srv_t *nsrv4;
1467
1468 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1469 COMMIT4args *, args);
1470
1471 if (vp == NULL) {
1472 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1473 goto out;
1474 }
1475 if (cs->access == CS_ACCESS_DENIED) {
1476 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1477 goto out;
1478 }
1479
1480 if (args->offset + args->count < args->offset) {
1481 *cs->statusp = resp->status = NFS4ERR_INVAL;
1482 goto out;
1483 }
1484
1485 va.va_mask = AT_UID;
1486 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1487
1488 /*
1489 * If we can't get the attributes, then we can't do the
1490 * right access checking. So, we'll fail the request.
1491 */
1492 if (error) {
1493 *cs->statusp = resp->status = puterrno4(error);
1494 goto out;
1495 }
1496 if (rdonly4(req, cs)) {
1497 *cs->statusp = resp->status = NFS4ERR_ROFS;
1498 goto out;
1499 }
1500
1501 if (vp->v_type != VREG) {
1502 if (vp->v_type == VDIR)
1503 resp->status = NFS4ERR_ISDIR;
1504 else
1505 resp->status = NFS4ERR_INVAL;
1506 *cs->statusp = resp->status;
1507 goto out;
1508 }
1509
1510 if (crgetuid(cr) != va.va_uid &&
1511 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1512 *cs->statusp = resp->status = puterrno4(error);
1513 goto out;
1514 }
1515
1516 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1517
1518 if (error) {
1519 *cs->statusp = resp->status = puterrno4(error);
1520 goto out;
1521 }
1522
1523 nsrv4 = nfs4_get_srv();
1524 *cs->statusp = resp->status = NFS4_OK;
1525 resp->writeverf = nsrv4->write4verf;
1526 out:
1527 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1528 COMMIT4res *, resp);
1529 }
1530
1531 /*
1532 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1533 * was completed. It does the nfsv4 create for special files.
1534 */
1535 /* ARGSUSED */
1536 static vnode_t *
1537 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1538 struct compound_state *cs, vattr_t *vap, char *nm)
1539 {
1540 int error;
1541 cred_t *cr = cs->cr;
1542 vnode_t *dvp = cs->vp;
1543 vnode_t *vp = NULL;
1544 int mode;
1545 enum vcexcl excl;
1546
1547 switch (args->type) {
1548 case NF4CHR:
1549 case NF4BLK:
1550 if (secpolicy_sys_devices(cr) != 0) {
1551 *cs->statusp = resp->status = NFS4ERR_PERM;
1552 return (NULL);
1553 }
1554 if (args->type == NF4CHR)
1555 vap->va_type = VCHR;
1556 else
1557 vap->va_type = VBLK;
1558 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1559 args->ftype4_u.devdata.specdata2);
1560 vap->va_mask |= AT_RDEV;
1561 break;
1562 case NF4SOCK:
1563 vap->va_type = VSOCK;
1564 break;
1565 case NF4FIFO:
1566 vap->va_type = VFIFO;
1567 break;
1568 default:
1569 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1570 return (NULL);
1571 }
1572
1573 /*
1574 * Must specify the mode.
1575 */
1576 if (!(vap->va_mask & AT_MODE)) {
1577 *cs->statusp = resp->status = NFS4ERR_INVAL;
1578 return (NULL);
1579 }
1580
1581 excl = EXCL;
1582
1583 mode = 0;
1584
1585 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1586 if (error) {
1587 *cs->statusp = resp->status = puterrno4(error);
1588 return (NULL);
1589 }
1590 return (vp);
1591 }
1592
1593 /*
1594 * nfsv4 create is used to create non-regular files. For regular files,
1595 * use nfsv4 open.
1596 */
1597 /* ARGSUSED */
1598 static void
1599 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1600 struct compound_state *cs)
1601 {
1602 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1603 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1604 int error;
1605 struct vattr bva, iva, iva2, ava, *vap;
1606 cred_t *cr = cs->cr;
1607 vnode_t *dvp = cs->vp;
1608 vnode_t *vp = NULL;
1609 vnode_t *realvp;
1610 char *nm, *lnm;
1611 uint_t len, llen;
1612 int syncval = 0;
1613 struct nfs4_svgetit_arg sarg;
1614 struct nfs4_ntov_table ntov;
1615 struct statvfs64 sb;
1616 nfsstat4 status;
1617 struct sockaddr *ca;
1618 char *name = NULL;
1619 char *lname = NULL;
1620
1621 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1622 CREATE4args *, args);
1623
1624 resp->attrset = 0;
1625
1626 if (dvp == NULL) {
1627 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1628 goto out;
1629 }
1630
1631 /*
1632 * If there is an unshared filesystem mounted on this vnode,
1633 * do not allow to create an object in this directory.
1634 */
1635 if (vn_ismntpt(dvp)) {
1636 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1637 goto out;
1638 }
1639
1640 /* Verify that type is correct */
1641 switch (args->type) {
1642 case NF4LNK:
1643 case NF4BLK:
1644 case NF4CHR:
1645 case NF4SOCK:
1646 case NF4FIFO:
1647 case NF4DIR:
1648 break;
1649 default:
1650 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1651 goto out;
1652 };
1653
1654 if (cs->access == CS_ACCESS_DENIED) {
1655 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1656 goto out;
1657 }
1658 if (dvp->v_type != VDIR) {
1659 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1660 goto out;
1661 }
1662 status = utf8_dir_verify(&args->objname);
1663 if (status != NFS4_OK) {
1664 *cs->statusp = resp->status = status;
1665 goto out;
1666 }
1667
1668 if (rdonly4(req, cs)) {
1669 *cs->statusp = resp->status = NFS4ERR_ROFS;
1670 goto out;
1671 }
1672
1673 /*
1674 * Name of newly created object
1675 */
1676 nm = utf8_to_fn(&args->objname, &len, NULL);
1677 if (nm == NULL) {
1678 *cs->statusp = resp->status = NFS4ERR_INVAL;
1679 goto out;
1680 }
1681
1682 if (len > MAXNAMELEN) {
1683 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1684 kmem_free(nm, len);
1685 goto out;
1686 }
1687
1688 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1689 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1690 MAXPATHLEN + 1);
1691
1692 if (name == NULL) {
1693 *cs->statusp = resp->status = NFS4ERR_INVAL;
1694 kmem_free(nm, len);
1695 goto out;
1696 }
1697
1698 resp->attrset = 0;
1699
1700 sarg.sbp = &sb;
1701 sarg.is_referral = B_FALSE;
1702 nfs4_ntov_table_init(&ntov);
1703
1704 status = do_rfs4_set_attrs(&resp->attrset,
1705 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1706
1707 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1708 status = NFS4ERR_INVAL;
1709
1710 if (status != NFS4_OK) {
1711 *cs->statusp = resp->status = status;
1712 if (name != nm)
1713 kmem_free(name, MAXPATHLEN + 1);
1714 kmem_free(nm, len);
1715 nfs4_ntov_table_free(&ntov, &sarg);
1716 resp->attrset = 0;
1717 goto out;
1718 }
1719
1720 /* Get "before" change value */
1721 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1722 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1723 if (error) {
1724 *cs->statusp = resp->status = puterrno4(error);
1725 if (name != nm)
1726 kmem_free(name, MAXPATHLEN + 1);
1727 kmem_free(nm, len);
1728 nfs4_ntov_table_free(&ntov, &sarg);
1729 resp->attrset = 0;
1730 goto out;
1731 }
1732 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1733
1734 vap = sarg.vap;
1735
1736 /*
1737 * Set the default initial values for attributes when the parent
1738 * directory does not have the VSUID/VSGID bit set and they have
1739 * not been specified in createattrs.
1740 */
1741 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1742 vap->va_uid = crgetuid(cr);
1743 vap->va_mask |= AT_UID;
1744 }
1745 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1746 vap->va_gid = crgetgid(cr);
1747 vap->va_mask |= AT_GID;
1748 }
1749
1750 vap->va_mask |= AT_TYPE;
1751 switch (args->type) {
1752 case NF4DIR:
1753 vap->va_type = VDIR;
1754 if ((vap->va_mask & AT_MODE) == 0) {
1755 vap->va_mode = 0700; /* default: owner rwx only */
1756 vap->va_mask |= AT_MODE;
1757 }
1758 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1759 if (error)
1760 break;
1761
1762 /*
1763 * Get the initial "after" sequence number, if it fails,
1764 * set to zero
1765 */
1766 iva.va_mask = AT_SEQ;
1767 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1768 iva.va_seq = 0;
1769 break;
1770 case NF4LNK:
1771 vap->va_type = VLNK;
1772 if ((vap->va_mask & AT_MODE) == 0) {
1773 vap->va_mode = 0700; /* default: owner rwx only */
1774 vap->va_mask |= AT_MODE;
1775 }
1776
1777 /*
1778 * symlink names must be treated as data
1779 */
1780 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1781 &llen, NULL);
1782
1783 if (lnm == NULL) {
1784 *cs->statusp = resp->status = NFS4ERR_INVAL;
1785 if (name != nm)
1786 kmem_free(name, MAXPATHLEN + 1);
1787 kmem_free(nm, len);
1788 nfs4_ntov_table_free(&ntov, &sarg);
1789 resp->attrset = 0;
1790 goto out;
1791 }
1792
1793 if (llen > MAXPATHLEN) {
1794 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1795 if (name != nm)
1796 kmem_free(name, MAXPATHLEN + 1);
1797 kmem_free(nm, len);
1798 kmem_free(lnm, llen);
1799 nfs4_ntov_table_free(&ntov, &sarg);
1800 resp->attrset = 0;
1801 goto out;
1802 }
1803
1804 lname = nfscmd_convname(ca, cs->exi, lnm,
1805 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1806
1807 if (lname == NULL) {
1808 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1809 if (name != nm)
1810 kmem_free(name, MAXPATHLEN + 1);
1811 kmem_free(nm, len);
1812 kmem_free(lnm, llen);
1813 nfs4_ntov_table_free(&ntov, &sarg);
1814 resp->attrset = 0;
1815 goto out;
1816 }
1817
1818 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1819 if (lname != lnm)
1820 kmem_free(lname, MAXPATHLEN + 1);
1821 kmem_free(lnm, llen);
1822 if (error)
1823 break;
1824
1825 /*
1826 * Get the initial "after" sequence number, if it fails,
1827 * set to zero
1828 */
1829 iva.va_mask = AT_SEQ;
1830 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1831 iva.va_seq = 0;
1832
1833 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1834 NULL, NULL, NULL);
1835 if (error)
1836 break;
1837
1838 /*
1839 * va_seq is not safe over VOP calls, check it again
1840 * if it has changed zero out iva to force atomic = FALSE.
1841 */
1842 iva2.va_mask = AT_SEQ;
1843 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1844 iva2.va_seq != iva.va_seq)
1845 iva.va_seq = 0;
1846 break;
1847 default:
1848 /*
1849 * probably a special file.
1850 */
1851 if ((vap->va_mask & AT_MODE) == 0) {
1852 vap->va_mode = 0600; /* default: owner rw only */
1853 vap->va_mask |= AT_MODE;
1854 }
1855 syncval = FNODSYNC;
1856 /*
1857 * We know this will only generate one VOP call
1858 */
1859 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1860
1861 if (vp == NULL) {
1862 if (name != nm)
1863 kmem_free(name, MAXPATHLEN + 1);
1864 kmem_free(nm, len);
1865 nfs4_ntov_table_free(&ntov, &sarg);
1866 resp->attrset = 0;
1867 goto out;
1868 }
1869
1870 /*
1871 * Get the initial "after" sequence number, if it fails,
1872 * set to zero
1873 */
1874 iva.va_mask = AT_SEQ;
1875 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1876 iva.va_seq = 0;
1877
1878 break;
1879 }
1880 if (name != nm)
1881 kmem_free(name, MAXPATHLEN + 1);
1882 kmem_free(nm, len);
1883
1884 if (error) {
1885 *cs->statusp = resp->status = puterrno4(error);
1886 }
1887
1888 /*
1889 * Force modified data and metadata out to stable storage.
1890 */
1891 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1892
1893 if (resp->status != NFS4_OK) {
1894 if (vp != NULL)
1895 VN_RELE(vp);
1896 nfs4_ntov_table_free(&ntov, &sarg);
1897 resp->attrset = 0;
1898 goto out;
1899 }
1900
1901 /*
1902 * Finish setup of cinfo response, "before" value already set.
1903 * Get "after" change value, if it fails, simply return the
1904 * before value.
1905 */
1906 ava.va_mask = AT_CTIME|AT_SEQ;
1907 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1908 ava.va_ctime = bva.va_ctime;
1909 ava.va_seq = 0;
1910 }
1911 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1912
1913 /*
1914 * True verification that object was created with correct
1915 * attrs is impossible. The attrs could have been changed
1916 * immediately after object creation. If attributes did
1917 * not verify, the only recourse for the server is to
1918 * destroy the object. Maybe if some attrs (like gid)
1919 * are set incorrectly, the object should be destroyed;
1920 * however, seems bad as a default policy. Do we really
1921 * want to destroy an object over one of the times not
1922 * verifying correctly? For these reasons, the server
1923 * currently sets bits in attrset for createattrs
1924 * that were set; however, no verification is done.
1925 *
1926 * vmask_to_nmask accounts for vattr bits set on create
1927 * [do_rfs4_set_attrs() only sets resp bits for
1928 * non-vattr/vfs bits.]
1929 * Mask off any bits set by default so as not to return
1930 * more attrset bits than were requested in createattrs
1931 */
1932 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1933 resp->attrset &= args->createattrs.attrmask;
1934 nfs4_ntov_table_free(&ntov, &sarg);
1935
1936 error = makefh4(&cs->fh, vp, cs->exi);
1937 if (error) {
1938 *cs->statusp = resp->status = puterrno4(error);
1939 }
1940
1941 /*
1942 * The cinfo.atomic = TRUE only if we got no errors, we have
1943 * non-zero va_seq's, and it has incremented by exactly one
1944 * during the creation and it didn't change during the VOP_LOOKUP
1945 * or VOP_FSYNC.
1946 */
1947 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1948 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1949 resp->cinfo.atomic = TRUE;
1950 else
1951 resp->cinfo.atomic = FALSE;
1952
1953 /*
1954 * Force modified metadata out to stable storage.
1955 *
1956 * if a underlying vp exists, pass it to VOP_FSYNC
1957 */
1958 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1959 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1960 else
1961 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1962
1963 if (resp->status != NFS4_OK) {
1964 VN_RELE(vp);
1965 goto out;
1966 }
1967 if (cs->vp)
1968 VN_RELE(cs->vp);
1969
1970 cs->vp = vp;
1971 *cs->statusp = resp->status = NFS4_OK;
1972 out:
1973 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1974 CREATE4res *, resp);
1975 }
1976
1977 /*ARGSUSED*/
1978 static void
1979 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1980 struct compound_state *cs)
1981 {
1982 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1983 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1984
1985 rfs4_op_inval(argop, resop, req, cs);
1986
1987 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1988 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1989 }
1990
1991 /*ARGSUSED*/
1992 static void
1993 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1994 struct compound_state *cs)
1995 {
1996 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1997 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1998 rfs4_deleg_state_t *dsp;
1999 nfsstat4 status;
2000
2001 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2002 DELEGRETURN4args *, args);
2003
2004 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2005 resp->status = *cs->statusp = status;
2006 if (status != NFS4_OK)
2007 goto out;
2008
2009 /* Ensure specified filehandle matches */
2010 if (cs->vp != dsp->rds_finfo->rf_vp) {
2011 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2012 } else
2013 rfs4_return_deleg(dsp, FALSE);
2014
2015 rfs4_update_lease(dsp->rds_client);
2016
2017 rfs4_deleg_state_rele(dsp);
2018 out:
2019 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2020 DELEGRETURN4res *, resp);
2021 }
2022
2023 /*
2024 * Check to see if a given "flavor" is an explicitly shared flavor.
2025 * The assumption of this routine is the "flavor" is already a valid
2026 * flavor in the secinfo list of "exi".
2027 *
2028 * e.g.
2029 * # share -o sec=flavor1 /export
2030 * # share -o sec=flavor2 /export/home
2031 *
2032 * flavor2 is not an explicitly shared flavor for /export,
2033 * however it is in the secinfo list for /export thru the
2034 * server namespace setup.
2035 */
2036 int
2037 is_exported_sec(int flavor, struct exportinfo *exi)
2038 {
2039 int i;
2040 struct secinfo *sp;
2041
2042 sp = exi->exi_export.ex_secinfo;
2043 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2044 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2045 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2046 return (SEC_REF_EXPORTED(&sp[i]));
2047 }
2048 }
2049
2050 /* Should not reach this point based on the assumption */
2051 return (0);
2052 }
2053
2054 /*
2055 * Check if the security flavor used in the request matches what is
2056 * required at the export point or at the root pseudo node (exi_root).
2057 *
2058 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2059 *
2060 */
2061 static int
2062 secinfo_match_or_authnone(struct compound_state *cs)
2063 {
2064 int i;
2065 struct secinfo *sp;
2066
2067 /*
2068 * Check cs->nfsflavor (from the request) against
2069 * the current export data in cs->exi.
2070 */
2071 sp = cs->exi->exi_export.ex_secinfo;
2072 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2073 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2074 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2075 return (1);
2076 }
2077
2078 return (0);
2079 }
2080
2081 /*
2082 * Check the access authority for the client and return the correct error.
2083 */
2084 nfsstat4
2085 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2086 {
2087 int authres;
2088
2089 /*
2090 * First, check if the security flavor used in the request
2091 * are among the flavors set in the server namespace.
2092 */
2093 if (!secinfo_match_or_authnone(cs)) {
2094 *cs->statusp = NFS4ERR_WRONGSEC;
2095 return (*cs->statusp);
2096 }
2097
2098 authres = checkauth4(cs, req);
2099
2100 if (authres > 0) {
2101 *cs->statusp = NFS4_OK;
2102 if (! (cs->access & CS_ACCESS_LIMITED))
2103 cs->access = CS_ACCESS_OK;
2104 } else if (authres == 0) {
2105 *cs->statusp = NFS4ERR_ACCESS;
2106 } else if (authres == -2) {
2107 *cs->statusp = NFS4ERR_WRONGSEC;
2108 } else {
2109 *cs->statusp = NFS4ERR_DELAY;
2110 }
2111 return (*cs->statusp);
2112 }
2113
2114 /*
2115 * bitmap4_to_attrmask is called by getattr and readdir.
2116 * It sets up the vattr mask and determines whether vfsstat call is needed
2117 * based on the input bitmap.
2118 * Returns nfsv4 status.
2119 */
2120 static nfsstat4
2121 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2122 {
2123 int i;
2124 uint_t va_mask;
2125 struct statvfs64 *sbp = sargp->sbp;
2126
2127 sargp->sbp = NULL;
2128 sargp->flag = 0;
2129 sargp->rdattr_error = NFS4_OK;
2130 sargp->mntdfid_set = FALSE;
2131 if (sargp->cs->vp)
2132 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2133 FH4_ATTRDIR | FH4_NAMEDATTR);
2134 else
2135 sargp->xattr = 0;
2136
2137 /*
2138 * Set rdattr_error_req to true if return error per
2139 * failed entry rather than fail the readdir.
2140 */
2141 if (breq & FATTR4_RDATTR_ERROR_MASK)
2142 sargp->rdattr_error_req = 1;
2143 else
2144 sargp->rdattr_error_req = 0;
2145
2146 /*
2147 * generate the va_mask
2148 * Handle the easy cases first
2149 */
2150 switch (breq) {
2151 case NFS4_NTOV_ATTR_MASK:
2152 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2153 return (NFS4_OK);
2154
2155 case NFS4_FS_ATTR_MASK:
2156 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2157 sargp->sbp = sbp;
2158 return (NFS4_OK);
2159
2160 case NFS4_NTOV_ATTR_CACHE_MASK:
2161 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2162 return (NFS4_OK);
2163
2164 case FATTR4_LEASE_TIME_MASK:
2165 sargp->vap->va_mask = 0;
2166 return (NFS4_OK);
2167
2168 default:
2169 va_mask = 0;
2170 for (i = 0; i < nfs4_ntov_map_size; i++) {
2171 if ((breq & nfs4_ntov_map[i].fbit) &&
2172 nfs4_ntov_map[i].vbit)
2173 va_mask |= nfs4_ntov_map[i].vbit;
2174 }
2175
2176 /*
2177 * Check is vfsstat is needed
2178 */
2179 if (breq & NFS4_FS_ATTR_MASK)
2180 sargp->sbp = sbp;
2181
2182 sargp->vap->va_mask = va_mask;
2183 return (NFS4_OK);
2184 }
2185 /* NOTREACHED */
2186 }
2187
2188 /*
2189 * bitmap4_get_sysattrs is called by getattr and readdir.
2190 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2191 * Returns nfsv4 status.
2192 */
2193 static nfsstat4
2194 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2195 {
2196 int error;
2197 struct compound_state *cs = sargp->cs;
2198 vnode_t *vp = cs->vp;
2199
2200 if (sargp->sbp != NULL) {
2201 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2202 sargp->sbp = NULL; /* to identify error */
2203 return (puterrno4(error));
2204 }
2205 }
2206
2207 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2208 }
2209
2210 static void
2211 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2212 {
2213 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2214 KM_SLEEP);
2215 ntovp->attrcnt = 0;
2216 ntovp->vfsstat = FALSE;
2217 }
2218
2219 static void
2220 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2221 struct nfs4_svgetit_arg *sargp)
2222 {
2223 int i;
2224 union nfs4_attr_u *na;
2225 uint8_t *amap;
2226
2227 /*
2228 * XXX Should do the same checks for whether the bit is set
2229 */
2230 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2231 i < ntovp->attrcnt; i++, na++, amap++) {
2232 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2233 NFS4ATTR_FREEIT, sargp, na);
2234 }
2235 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2236 /*
2237 * xdr_free for getattr will be done later
2238 */
2239 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2240 i < ntovp->attrcnt; i++, na++, amap++) {
2241 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2242 }
2243 }
2244 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2245 }
2246
2247 /*
2248 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2249 */
2250 static nfsstat4
2251 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2252 struct nfs4_svgetit_arg *sargp)
2253 {
2254 int error = 0;
2255 int i, k;
2256 struct nfs4_ntov_table ntov;
2257 XDR xdr;
2258 ulong_t xdr_size;
2259 char *xdr_attrs;
2260 nfsstat4 status = NFS4_OK;
2261 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2262 union nfs4_attr_u *na;
2263 uint8_t *amap;
2264
2265 sargp->op = NFS4ATTR_GETIT;
2266 sargp->flag = 0;
2267
2268 fattrp->attrmask = 0;
2269 /* if no bits requested, then return empty fattr4 */
2270 if (breq == 0) {
2271 fattrp->attrlist4_len = 0;
2272 fattrp->attrlist4 = NULL;
2273 return (NFS4_OK);
2274 }
2275
2276 /*
2277 * return NFS4ERR_INVAL when client requests write-only attrs
2278 */
2279 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2280 return (NFS4ERR_INVAL);
2281
2282 nfs4_ntov_table_init(&ntov);
2283 na = ntov.na;
2284 amap = ntov.amap;
2285
2286 /*
2287 * Now loop to get or verify the attrs
2288 */
2289 for (i = 0; i < nfs4_ntov_map_size; i++) {
2290 if (breq & nfs4_ntov_map[i].fbit) {
2291 if ((*nfs4_ntov_map[i].sv_getit)(
2292 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2293
2294 error = (*nfs4_ntov_map[i].sv_getit)(
2295 NFS4ATTR_GETIT, sargp, na);
2296
2297 /*
2298 * Possible error values:
2299 * >0 if sv_getit failed to
2300 * get the attr; 0 if succeeded;
2301 * <0 if rdattr_error and the
2302 * attribute cannot be returned.
2303 */
2304 if (error && !(sargp->rdattr_error_req))
2305 goto done;
2306 /*
2307 * If error then just for entry
2308 */
2309 if (error == 0) {
2310 fattrp->attrmask |=
2311 nfs4_ntov_map[i].fbit;
2312 *amap++ =
2313 (uint8_t)nfs4_ntov_map[i].nval;
2314 na++;
2315 (ntov.attrcnt)++;
2316 } else if ((error > 0) &&
2317 (sargp->rdattr_error == NFS4_OK)) {
2318 sargp->rdattr_error = puterrno4(error);
2319 }
2320 error = 0;
2321 }
2322 }
2323 }
2324
2325 /*
2326 * If rdattr_error was set after the return value for it was assigned,
2327 * update it.
2328 */
2329 if (prev_rdattr_error != sargp->rdattr_error) {
2330 na = ntov.na;
2331 amap = ntov.amap;
2332 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2333 k = *amap;
2334 if (k < FATTR4_RDATTR_ERROR) {
2335 continue;
2336 }
2337 if ((k == FATTR4_RDATTR_ERROR) &&
2338 ((*nfs4_ntov_map[k].sv_getit)(
2339 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2340
2341 (void) (*nfs4_ntov_map[k].sv_getit)(
2342 NFS4ATTR_GETIT, sargp, na);
2343 }
2344 break;
2345 }
2346 }
2347
2348 xdr_size = 0;
2349 na = ntov.na;
2350 amap = ntov.amap;
2351 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2352 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2353 }
2354
2355 fattrp->attrlist4_len = xdr_size;
2356 if (xdr_size) {
2357 /* freed by rfs4_op_getattr_free() */
2358 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2359
2360 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2361
2362 na = ntov.na;
2363 amap = ntov.amap;
2364 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2365 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2366 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2367 int, *amap);
2368 status = NFS4ERR_SERVERFAULT;
2369 break;
2370 }
2371 }
2372 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2373 } else {
2374 fattrp->attrlist4 = NULL;
2375 }
2376 done:
2377
2378 nfs4_ntov_table_free(&ntov, sargp);
2379
2380 if (error != 0)
2381 status = puterrno4(error);
2382
2383 return (status);
2384 }
2385
2386 /* ARGSUSED */
2387 static void
2388 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2389 struct compound_state *cs)
2390 {
2391 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2392 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2393 struct nfs4_svgetit_arg sarg;
2394 struct statvfs64 sb;
2395 nfsstat4 status;
2396
2397 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2398 GETATTR4args *, args);
2399
2400 if (cs->vp == NULL) {
2401 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2402 goto out;
2403 }
2404
2405 if (cs->access == CS_ACCESS_DENIED) {
2406 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2407 goto out;
2408 }
2409
2410 sarg.sbp = &sb;
2411 sarg.cs = cs;
2412 sarg.is_referral = B_FALSE;
2413
2414 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2415 if (status == NFS4_OK) {
2416
2417 status = bitmap4_get_sysattrs(&sarg);
2418 if (status == NFS4_OK) {
2419
2420 /* Is this a referral? */
2421 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2422 /* Older V4 Solaris client sees a link */
2423 if (client_is_downrev(req))
2424 sarg.vap->va_type = VLNK;
2425 else
2426 sarg.is_referral = B_TRUE;
2427 }
2428
2429 status = do_rfs4_op_getattr(args->attr_request,
2430 &resp->obj_attributes, &sarg);
2431 }
2432 }
2433 *cs->statusp = resp->status = status;
2434 out:
2435 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2436 GETATTR4res *, resp);
2437 }
2438
2439 static void
2440 rfs4_op_getattr_free(nfs_resop4 *resop)
2441 {
2442 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2443
2444 nfs4_fattr4_free(&resp->obj_attributes);
2445 }
2446
2447 /* ARGSUSED */
2448 static void
2449 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2450 struct compound_state *cs)
2451 {
2452 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2453
2454 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2455
2456 if (cs->vp == NULL) {
2457 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2458 goto out;
2459 }
2460 if (cs->access == CS_ACCESS_DENIED) {
2461 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2462 goto out;
2463 }
2464
2465 /* check for reparse point at the share point */
2466 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2467 /* it's all bad */
2468 cs->exi->exi_moved = 1;
2469 *cs->statusp = resp->status = NFS4ERR_MOVED;
2470 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2471 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2472 return;
2473 }
2474
2475 /* check for reparse point at vp */
2476 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2477 /* it's not all bad */
2478 *cs->statusp = resp->status = NFS4ERR_MOVED;
2479 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2480 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2481 return;
2482 }
2483
2484 resp->object.nfs_fh4_val =
2485 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2486 nfs_fh4_copy(&cs->fh, &resp->object);
2487 *cs->statusp = resp->status = NFS4_OK;
2488 out:
2489 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2490 GETFH4res *, resp);
2491 }
2492
2493 static void
2494 rfs4_op_getfh_free(nfs_resop4 *resop)
2495 {
2496 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2497
2498 if (resp->status == NFS4_OK &&
2499 resp->object.nfs_fh4_val != NULL) {
2500 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2501 resp->object.nfs_fh4_val = NULL;
2502 resp->object.nfs_fh4_len = 0;
2503 }
2504 }
2505
2506 /*
2507 * illegal: args: void
2508 * res : status (NFS4ERR_OP_ILLEGAL)
2509 */
2510 /* ARGSUSED */
2511 static void
2512 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2513 struct svc_req *req, struct compound_state *cs)
2514 {
2515 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2516
2517 resop->resop = OP_ILLEGAL;
2518 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2519 }
2520
2521 /*
2522 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2523 * res: status. If success - CURRENT_FH unchanged, return change_info
2524 */
2525 /* ARGSUSED */
2526 static void
2527 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2528 struct compound_state *cs)
2529 {
2530 LINK4args *args = &argop->nfs_argop4_u.oplink;
2531 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2532 int error;
2533 vnode_t *vp;
2534 vnode_t *dvp;
2535 struct vattr bdva, idva, adva;
2536 char *nm;
2537 uint_t len;
2538 struct sockaddr *ca;
2539 char *name = NULL;
2540 nfsstat4 status;
2541
2542 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2543 LINK4args *, args);
2544
2545 /* SAVED_FH: source object */
2546 vp = cs->saved_vp;
2547 if (vp == NULL) {
2548 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2549 goto out;
2550 }
2551
2552 /* CURRENT_FH: target directory */
2553 dvp = cs->vp;
2554 if (dvp == NULL) {
2555 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2556 goto out;
2557 }
2558
2559 /*
2560 * If there is a non-shared filesystem mounted on this vnode,
2561 * do not allow to link any file in this directory.
2562 */
2563 if (vn_ismntpt(dvp)) {
2564 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2565 goto out;
2566 }
2567
2568 if (cs->access == CS_ACCESS_DENIED) {
2569 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2570 goto out;
2571 }
2572
2573 /* Check source object's type validity */
2574 if (vp->v_type == VDIR) {
2575 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2576 goto out;
2577 }
2578
2579 /* Check target directory's type */
2580 if (dvp->v_type != VDIR) {
2581 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2582 goto out;
2583 }
2584
2585 if (cs->saved_exi != cs->exi) {
2586 *cs->statusp = resp->status = NFS4ERR_XDEV;
2587 goto out;
2588 }
2589
2590 status = utf8_dir_verify(&args->newname);
2591 if (status != NFS4_OK) {
2592 *cs->statusp = resp->status = status;
2593 goto out;
2594 }
2595
2596 nm = utf8_to_fn(&args->newname, &len, NULL);
2597 if (nm == NULL) {
2598 *cs->statusp = resp->status = NFS4ERR_INVAL;
2599 goto out;
2600 }
2601
2602 if (len > MAXNAMELEN) {
2603 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2604 kmem_free(nm, len);
2605 goto out;
2606 }
2607
2608 if (rdonly4(req, cs)) {
2609 *cs->statusp = resp->status = NFS4ERR_ROFS;
2610 kmem_free(nm, len);
2611 goto out;
2612 }
2613
2614 /* Get "before" change value */
2615 bdva.va_mask = AT_CTIME|AT_SEQ;
2616 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2617 if (error) {
2618 *cs->statusp = resp->status = puterrno4(error);
2619 kmem_free(nm, len);
2620 goto out;
2621 }
2622
2623 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2624 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2625 MAXPATHLEN + 1);
2626
2627 if (name == NULL) {
2628 *cs->statusp = resp->status = NFS4ERR_INVAL;
2629 kmem_free(nm, len);
2630 goto out;
2631 }
2632
2633 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2634
2635 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2636
2637 if (nm != name)
2638 kmem_free(name, MAXPATHLEN + 1);
2639 kmem_free(nm, len);
2640
2641 /*
2642 * Get the initial "after" sequence number, if it fails, set to zero
2643 */
2644 idva.va_mask = AT_SEQ;
2645 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2646 idva.va_seq = 0;
2647
2648 /*
2649 * Force modified data and metadata out to stable storage.
2650 */
2651 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2652 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2653
2654 if (error) {
2655 *cs->statusp = resp->status = puterrno4(error);
2656 goto out;
2657 }
2658
2659 /*
2660 * Get "after" change value, if it fails, simply return the
2661 * before value.
2662 */
2663 adva.va_mask = AT_CTIME|AT_SEQ;
2664 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2665 adva.va_ctime = bdva.va_ctime;
2666 adva.va_seq = 0;
2667 }
2668
2669 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2670
2671 /*
2672 * The cinfo.atomic = TRUE only if we have
2673 * non-zero va_seq's, and it has incremented by exactly one
2674 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2675 */
2676 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2677 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2678 resp->cinfo.atomic = TRUE;
2679 else
2680 resp->cinfo.atomic = FALSE;
2681
2682 *cs->statusp = resp->status = NFS4_OK;
2683 out:
2684 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2685 LINK4res *, resp);
2686 }
2687
2688 /*
2689 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2690 */
2691
2692 /* ARGSUSED */
2693 static nfsstat4
2694 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2695 {
2696 int error;
2697 int different_export = 0;
2698 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2699 struct exportinfo *exi = NULL, *pre_exi = NULL;
2700 nfsstat4 stat;
2701 fid_t fid;
2702 int attrdir, dotdot, walk;
2703 bool_t is_newvp = FALSE;
2704
2705 if (cs->vp->v_flag & V_XATTRDIR) {
2706 attrdir = 1;
2707 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2708 } else {
2709 attrdir = 0;
2710 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2711 }
2712
2713 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2714
2715 /*
2716 * If dotdotting, then need to check whether it's
2717 * above the root of a filesystem, or above an
2718 * export point.
2719 */
2720 if (dotdot) {
2721
2722 /*
2723 * If dotdotting at the root of a filesystem, then
2724 * need to traverse back to the mounted-on filesystem
2725 * and do the dotdot lookup there.
2726 */
2727 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2728
2729 /*
2730 * If at the system root, then can
2731 * go up no further.
2732 */
2733 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2734 return (puterrno4(ENOENT));
2735
2736 /*
2737 * Traverse back to the mounted-on filesystem
2738 */
2739 cs->vp = untraverse(cs->vp);
2740
2741 /*
2742 * Set the different_export flag so we remember
2743 * to pick up a new exportinfo entry for
2744 * this new filesystem.
2745 */
2746 different_export = 1;
2747 } else {
2748
2749 /*
2750 * If dotdotting above an export point then set
2751 * the different_export to get new export info.
2752 */
2753 different_export = nfs_exported(cs->exi, cs->vp);
2754 }
2755 }
2756
2757 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2758 NULL, NULL, NULL);
2759 if (error)
2760 return (puterrno4(error));
2761
2762 /*
2763 * If the vnode is in a pseudo filesystem, check whether it is visible.
2764 *
2765 * XXX if the vnode is a symlink and it is not visible in
2766 * a pseudo filesystem, return ENOENT (not following symlink).
2767 * V4 client can not mount such symlink. This is a regression
2768 * from V2/V3.
2769 *
2770 * In the same exported filesystem, if the security flavor used
2771 * is not an explicitly shared flavor, limit the view to the visible
2772 * list entries only. This is not a WRONGSEC case because it's already
2773 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2774 */
2775 if (!different_export &&
2776 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2777 cs->access & CS_ACCESS_LIMITED)) {
2778 if (! nfs_visible(cs->exi, vp, &different_export)) {
2779 VN_RELE(vp);
2780 return (puterrno4(ENOENT));
2781 }
2782 }
2783
2784 /*
2785 * If it's a mountpoint, then traverse it.
2786 */
2787 if (vn_ismntpt(vp)) {
2788 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2789 pre_tvp = vp; /* save pre-traversed vnode */
2790
2791 /*
2792 * hold pre_tvp to counteract rele by traverse. We will
2793 * need pre_tvp below if checkexport4 fails
2794 */
2795 VN_HOLD(pre_tvp);
2796 if ((error = traverse(&vp)) != 0) {
2797 VN_RELE(vp);
2798 VN_RELE(pre_tvp);
2799 return (puterrno4(error));
2800 }
2801 different_export = 1;
2802 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2803 /*
2804 * The vfsp comparison is to handle the case where
2805 * a LOFS mount is shared. lo_lookup traverses mount points,
2806 * and NFS is unaware of local fs transistions because
2807 * v_vfsmountedhere isn't set. For this special LOFS case,
2808 * the dir and the obj returned by lookup will have different
2809 * vfs ptrs.
2810 */
2811 different_export = 1;
2812 }
2813
2814 if (different_export) {
2815
2816 bzero(&fid, sizeof (fid));
2817 fid.fid_len = MAXFIDSZ;
2818 error = vop_fid_pseudo(vp, &fid);
2819 if (error) {
2820 VN_RELE(vp);
2821 if (pre_tvp)
2822 VN_RELE(pre_tvp);
2823 return (puterrno4(error));
2824 }
2825
2826 if (dotdot)
2827 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2828 else
2829 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2830
2831 if (exi == NULL) {
2832 if (pre_tvp) {
2833 /*
2834 * If this vnode is a mounted-on vnode,
2835 * but the mounted-on file system is not
2836 * exported, send back the filehandle for
2837 * the mounted-on vnode, not the root of
2838 * the mounted-on file system.
2839 */
2840 VN_RELE(vp);
2841 vp = pre_tvp;
2842 exi = pre_exi;
2843 } else {
2844 VN_RELE(vp);
2845 return (puterrno4(EACCES));
2846 }
2847 } else if (pre_tvp) {
2848 /* we're done with pre_tvp now. release extra hold */
2849 VN_RELE(pre_tvp);
2850 }
2851
2852 cs->exi = exi;
2853
2854 /*
2855 * Now we do a checkauth4. The reason is that
2856 * this client/user may not have access to the new
2857 * exported file system, and if they do,
2858 * the client/user may be mapped to a different uid.
2859 *
2860 * We start with a new cr, because the checkauth4 done
2861 * in the PUT*FH operation over wrote the cred's uid,
2862 * gid, etc, and we want the real thing before calling
2863 * checkauth4()
2864 */
2865 crfree(cs->cr);
2866 cs->cr = crdup(cs->basecr);
2867
2868 oldvp = cs->vp;
2869 cs->vp = vp;
2870 is_newvp = TRUE;
2871
2872 stat = call_checkauth4(cs, req);
2873 if (stat != NFS4_OK) {
2874 VN_RELE(cs->vp);
2875 cs->vp = oldvp;
2876 return (stat);
2877 }
2878 }
2879
2880 /*
2881 * After various NFS checks, do a label check on the path
2882 * component. The label on this path should either be the
2883 * global zone's label or a zone's label. We are only
2884 * interested in the zone's label because exported files
2885 * in global zone is accessible (though read-only) to
2886 * clients. The exportability/visibility check is already
2887 * done before reaching this code.
2888 */
2889 if (is_system_labeled()) {
2890 bslabel_t *clabel;
2891
2892 ASSERT(req->rq_label != NULL);
2893 clabel = req->rq_label;
2894 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2895 "got client label from request(1)", struct svc_req *, req);
2896
2897 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2898 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2899 cs->exi)) {
2900 error = EACCES;
2901 goto err_out;
2902 }
2903 } else {
2904 /*
2905 * We grant access to admin_low label clients
2906 * only if the client is trusted, i.e. also
2907 * running Solaris Trusted Extension.
2908 */
2909 struct sockaddr *ca;
2910 int addr_type;
2911 void *ipaddr;
2912 tsol_tpc_t *tp;
2913
2914 ca = (struct sockaddr *)svc_getrpccaller(
2915 req->rq_xprt)->buf;
2916 if (ca->sa_family == AF_INET) {
2917 addr_type = IPV4_VERSION;
2918 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2919 } else if (ca->sa_family == AF_INET6) {
2920 addr_type = IPV6_VERSION;
2921 ipaddr = &((struct sockaddr_in6 *)
2922 ca)->sin6_addr;
2923 }
2924 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2925 if (tp == NULL || tp->tpc_tp.tp_doi !=
2926 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2927 SUN_CIPSO) {
2928 if (tp != NULL)
2929 TPC_RELE(tp);
2930 error = EACCES;
2931 goto err_out;
2932 }
2933 TPC_RELE(tp);
2934 }
2935 }
2936
2937 error = makefh4(&cs->fh, vp, cs->exi);
2938
2939 err_out:
2940 if (error) {
2941 if (is_newvp) {
2942 VN_RELE(cs->vp);
2943 cs->vp = oldvp;
2944 } else
2945 VN_RELE(vp);
2946 return (puterrno4(error));
2947 }
2948
2949 if (!is_newvp) {
2950 if (cs->vp)
2951 VN_RELE(cs->vp);
2952 cs->vp = vp;
2953 } else if (oldvp)
2954 VN_RELE(oldvp);
2955
2956 /*
2957 * if did lookup on attrdir and didn't lookup .., set named
2958 * attr fh flag
2959 */
2960 if (attrdir && ! dotdot)
2961 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2962
2963 /* Assume false for now, open proc will set this */
2964 cs->mandlock = FALSE;
2965
2966 return (NFS4_OK);
2967 }
2968
2969 /* ARGSUSED */
2970 static void
2971 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2972 struct compound_state *cs)
2973 {
2974 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2975 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2976 char *nm;
2977 uint_t len;
2978 struct sockaddr *ca;
2979 char *name = NULL;
2980 nfsstat4 status;
2981
2982 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2983 LOOKUP4args *, args);
2984
2985 if (cs->vp == NULL) {
2986 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2987 goto out;
2988 }
2989
2990 if (cs->vp->v_type == VLNK) {
2991 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2992 goto out;
2993 }
2994
2995 if (cs->vp->v_type != VDIR) {
2996 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2997 goto out;
2998 }
2999
3000 status = utf8_dir_verify(&args->objname);
3001 if (status != NFS4_OK) {
3002 *cs->statusp = resp->status = status;
3003 goto out;
3004 }
3005
3006 nm = utf8_to_str(&args->objname, &len, NULL);
3007 if (nm == NULL) {
3008 *cs->statusp = resp->status = NFS4ERR_INVAL;
3009 goto out;
3010 }
3011
3012 if (len > MAXNAMELEN) {
3013 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3014 kmem_free(nm, len);
3015 goto out;
3016 }
3017
3018 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3019 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3020 MAXPATHLEN + 1);
3021
3022 if (name == NULL) {
3023 *cs->statusp = resp->status = NFS4ERR_INVAL;
3024 kmem_free(nm, len);
3025 goto out;
3026 }
3027
3028 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3029
3030 if (name != nm)
3031 kmem_free(name, MAXPATHLEN + 1);
3032 kmem_free(nm, len);
3033
3034 out:
3035 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3036 LOOKUP4res *, resp);
3037 }
3038
3039 /* ARGSUSED */
3040 static void
3041 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3042 struct compound_state *cs)
3043 {
3044 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3045
3046 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3047
3048 if (cs->vp == NULL) {
3049 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3050 goto out;
3051 }
3052
3053 if (cs->vp->v_type != VDIR) {
3054 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3055 goto out;
3056 }
3057
3058 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3059
3060 /*
3061 * From NFSV4 Specification, LOOKUPP should not check for
3062 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3063 */
3064 if (resp->status == NFS4ERR_WRONGSEC) {
3065 *cs->statusp = resp->status = NFS4_OK;
3066 }
3067
3068 out:
3069 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3070 LOOKUPP4res *, resp);
3071 }
3072
3073
3074 /*ARGSUSED2*/
3075 static void
3076 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3077 struct compound_state *cs)
3078 {
3079 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3080 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3081 vnode_t *avp = NULL;
3082 int lookup_flags = LOOKUP_XATTR, error;
3083 int exp_ro = 0;
3084
3085 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3086 OPENATTR4args *, args);
3087
3088 if (cs->vp == NULL) {
3089 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3090 goto out;
3091 }
3092
3093 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3094 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3095 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3096 goto out;
3097 }
3098
3099 /*
3100 * If file system supports passing ACE mask to VOP_ACCESS then
3101 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3102 */
3103
3104 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3105 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3106 V_ACE_MASK, cs->cr, NULL);
3107 else
3108 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3109 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3110 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3111
3112 if (error) {
3113 *cs->statusp = resp->status = puterrno4(EACCES);
3114 goto out;
3115 }
3116
3117 /*
3118 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3119 * the file system is exported read-only -- regardless of
3120 * createdir flag. Otherwise the attrdir would be created
3121 * (assuming server fs isn't mounted readonly locally). If
3122 * VOP_LOOKUP returns ENOENT in this case, the error will
3123 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3124 * because specfs has no VOP_LOOKUP op, so the macro would
3125 * return ENOSYS. EINVAL is returned by all (current)
3126 * Solaris file system implementations when any of their
3127 * restrictions are violated (xattr(dir) can't have xattrdir).
3128 * Returning NOTSUPP is more appropriate in this case
3129 * because the object will never be able to have an attrdir.
3130 */
3131 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3132 lookup_flags |= CREATE_XATTR_DIR;
3133
3134 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3135 NULL, NULL, NULL);
3136
3137 if (error) {
3138 if (error == ENOENT && args->createdir && exp_ro)
3139 *cs->statusp = resp->status = puterrno4(EROFS);
3140 else if (error == EINVAL || error == ENOSYS)
3141 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3142 else
3143 *cs->statusp = resp->status = puterrno4(error);
3144 goto out;
3145 }
3146
3147 ASSERT(avp->v_flag & V_XATTRDIR);
3148
3149 error = makefh4(&cs->fh, avp, cs->exi);
3150
3151 if (error) {
3152 VN_RELE(avp);
3153 *cs->statusp = resp->status = puterrno4(error);
3154 goto out;
3155 }
3156
3157 VN_RELE(cs->vp);
3158 cs->vp = avp;
3159
3160 /*
3161 * There is no requirement for an attrdir fh flag
3162 * because the attrdir has a vnode flag to distinguish
3163 * it from regular (non-xattr) directories. The
3164 * FH4_ATTRDIR flag is set for future sanity checks.
3165 */
3166 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3167 *cs->statusp = resp->status = NFS4_OK;
3168
3169 out:
3170 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3171 OPENATTR4res *, resp);
3172 }
3173
3174 static int
3175 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3176 caller_context_t *ct)
3177 {
3178 int error;
3179 int i;
3180 clock_t delaytime;
3181
3182 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3183
3184 /*
3185 * Don't block on mandatory locks. If this routine returns
3186 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3187 */
3188 uio->uio_fmode = FNONBLOCK;
3189
3190 for (i = 0; i < rfs4_maxlock_tries; i++) {
3191
3192
3193 if (direction == FREAD) {
3194 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3195 error = VOP_READ(vp, uio, ioflag, cred, ct);
3196 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3197 } else {
3198 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3199 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3200 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3201 }
3202
3203 if (error != EAGAIN)
3204 break;
3205
3206 if (i < rfs4_maxlock_tries - 1) {
3207 delay(delaytime);
3208 delaytime *= 2;
3209 }
3210 }
3211
3212 return (error);
3213 }
3214
3215 /* ARGSUSED */
3216 static void
3217 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3218 struct compound_state *cs)
3219 {
3220 READ4args *args = &argop->nfs_argop4_u.opread;
3221 READ4res *resp = &resop->nfs_resop4_u.opread;
3222 int error;
3223 int verror;
3224 vnode_t *vp;
3225 struct vattr va;
3226 struct iovec iov, *iovp = NULL;
3227 int iovcnt;
3228 struct uio uio;
3229 u_offset_t offset;
3230 bool_t *deleg = &cs->deleg;
3231 nfsstat4 stat;
3232 int in_crit = 0;
3233 mblk_t *mp = NULL;
3234 int alloc_err = 0;
3235 int rdma_used = 0;
3236 int loaned_buffers;
3237 caller_context_t ct;
3238 struct uio *uiop;
3239
3240 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3241 READ4args, args);
3242
3243 vp = cs->vp;
3244 if (vp == NULL) {
3245 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3246 goto out;
3247 }
3248 if (cs->access == CS_ACCESS_DENIED) {
3249 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3250 goto out;
3251 }
3252
3253 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3254 deleg, TRUE, &ct)) != NFS4_OK) {
3255 *cs->statusp = resp->status = stat;
3256 goto out;
3257 }
3258
3259 /*
3260 * Enter the critical region before calling VOP_RWLOCK
3261 * to avoid a deadlock with write requests.
3262 */
3263 if (nbl_need_check(vp)) {
3264 nbl_start_crit(vp, RW_READER);
3265 in_crit = 1;
3266 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3267 &ct)) {
3268 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3269 goto out;
3270 }
3271 }
3272
3273 if (args->wlist) {
3274 if (args->count > clist_len(args->wlist)) {
3275 *cs->statusp = resp->status = NFS4ERR_INVAL;
3276 goto out;
3277 }
3278 rdma_used = 1;
3279 }
3280
3281 /* use loaned buffers for TCP */
3282 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3283
3284 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3285 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3286
3287 /*
3288 * If we can't get the attributes, then we can't do the
3289 * right access checking. So, we'll fail the request.
3290 */
3291 if (verror) {
3292 *cs->statusp = resp->status = puterrno4(verror);
3293 goto out;
3294 }
3295
3296 if (vp->v_type != VREG) {
3297 *cs->statusp = resp->status =
3298 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3299 goto out;
3300 }
3301
3302 if (crgetuid(cs->cr) != va.va_uid &&
3303 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3304 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3305 *cs->statusp = resp->status = puterrno4(error);
3306 goto out;
3307 }
3308
3309 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3310 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3311 goto out;
3312 }
3313
3314 offset = args->offset;
3315 if (offset >= va.va_size) {
3316 *cs->statusp = resp->status = NFS4_OK;
3317 resp->eof = TRUE;
3318 resp->data_len = 0;
3319 resp->data_val = NULL;
3320 resp->mblk = NULL;
3321 /* RDMA */
3322 resp->wlist = args->wlist;
3323 resp->wlist_len = resp->data_len;
3324 *cs->statusp = resp->status = NFS4_OK;
3325 if (resp->wlist)
3326 clist_zero_len(resp->wlist);
3327 goto out;
3328 }
3329
3330 if (args->count == 0) {
3331 *cs->statusp = resp->status = NFS4_OK;
3332 resp->eof = FALSE;
3333 resp->data_len = 0;
3334 resp->data_val = NULL;
3335 resp->mblk = NULL;
3336 /* RDMA */
3337 resp->wlist = args->wlist;
3338 resp->wlist_len = resp->data_len;
3339 if (resp->wlist)
3340 clist_zero_len(resp->wlist);
3341 goto out;
3342 }
3343
3344 /*
3345 * Do not allocate memory more than maximum allowed
3346 * transfer size
3347 */
3348 if (args->count > rfs4_tsize(req))
3349 args->count = rfs4_tsize(req);
3350
3351 if (loaned_buffers) {
3352 uiop = (uio_t *)rfs_setup_xuio(vp);
3353 ASSERT(uiop != NULL);
3354 uiop->uio_segflg = UIO_SYSSPACE;
3355 uiop->uio_loffset = args->offset;
3356 uiop->uio_resid = args->count;
3357
3358 /* Jump to do the read if successful */
3359 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3360 /*
3361 * Need to hold the vnode until after VOP_RETZCBUF()
3362 * is called.
3363 */
3364 VN_HOLD(vp);
3365 goto doio_read;
3366 }
3367
3368 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3369 uiop->uio_loffset, int, uiop->uio_resid);
3370
3371 uiop->uio_extflg = 0;
3372
3373 /* failure to setup for zero copy */
3374 rfs_free_xuio((void *)uiop);
3375 loaned_buffers = 0;
3376 }
3377
3378 /*
3379 * If returning data via RDMA Write, then grab the chunk list. If we
3380 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3381 */
3382 if (rdma_used) {
3383 mp = NULL;
3384 (void) rdma_get_wchunk(req, &iov, args->wlist);
3385 uio.uio_iov = &iov;
3386 uio.uio_iovcnt = 1;
3387 } else {
3388 /*
3389 * mp will contain the data to be sent out in the read reply.
3390 * It will be freed after the reply has been sent.
3391 */
3392 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3393 ASSERT(mp != NULL);
3394 ASSERT(alloc_err == 0);
3395 uio.uio_iov = iovp;
3396 uio.uio_iovcnt = iovcnt;
3397 }
3398
3399 uio.uio_segflg = UIO_SYSSPACE;
3400 uio.uio_extflg = UIO_COPY_CACHED;
3401 uio.uio_loffset = args->offset;
3402 uio.uio_resid = args->count;
3403 uiop = &uio;
3404
3405 doio_read:
3406 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3407
3408 va.va_mask = AT_SIZE;
3409 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3410
3411 if (error) {
3412 if (mp)
3413 freemsg(mp);
3414 *cs->statusp = resp->status = puterrno4(error);
3415 goto out;
3416 }
3417
3418 /* make mblk using zc buffers */
3419 if (loaned_buffers) {
3420 mp = uio_to_mblk(uiop);
3421 ASSERT(mp != NULL);
3422 }
3423
3424 *cs->statusp = resp->status = NFS4_OK;
3425
3426 ASSERT(uiop->uio_resid >= 0);
3427 resp->data_len = args->count - uiop->uio_resid;
3428 if (mp) {
3429 resp->data_val = (char *)mp->b_datap->db_base;
3430 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3431 } else {
3432 resp->data_val = (caddr_t)iov.iov_base;
3433 }
3434
3435 resp->mblk = mp;
3436
3437 if (!verror && offset + resp->data_len == va.va_size)
3438 resp->eof = TRUE;
3439 else
3440 resp->eof = FALSE;
3441
3442 if (rdma_used) {
3443 if (!rdma_setup_read_data4(args, resp)) {
3444 *cs->statusp = resp->status = NFS4ERR_INVAL;
3445 }
3446 } else {
3447 resp->wlist = NULL;
3448 }
3449
3450 out:
3451 if (in_crit)
3452 nbl_end_crit(vp);
3453
3454 if (iovp != NULL)
3455 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3456
3457 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3458 READ4res *, resp);
3459 }
3460
3461 static void
3462 rfs4_op_read_free(nfs_resop4 *resop)
3463 {
3464 READ4res *resp = &resop->nfs_resop4_u.opread;
3465
3466 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3467 freemsg(resp->mblk);
3468 resp->mblk = NULL;
3469 resp->data_val = NULL;
3470 resp->data_len = 0;
3471 }
3472 }
3473
3474 static void
3475 rfs4_op_readdir_free(nfs_resop4 * resop)
3476 {
3477 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3478
3479 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3480 freeb(resp->mblk);
3481 resp->mblk = NULL;
3482 resp->data_len = 0;
3483 }
3484 }
3485
3486
3487 /* ARGSUSED */
3488 static void
3489 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3490 struct compound_state *cs)
3491 {
3492 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3493 int error;
3494 vnode_t *vp;
3495 struct exportinfo *exi, *sav_exi;
3496 nfs_fh4_fmt_t *fh_fmtp;
3497 nfs_export_t *ne = nfs_get_export();
3498
3499 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3500
3501 if (cs->vp) {
3502 VN_RELE(cs->vp);
3503 cs->vp = NULL;
3504 }
3505
3506 if (cs->cr)
3507 crfree(cs->cr);
3508
3509 cs->cr = crdup(cs->basecr);
3510
3511 vp = ne->exi_public->exi_vp;
3512 if (vp == NULL) {
3513 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3514 goto out;
3515 }
3516
3517 error = makefh4(&cs->fh, vp, ne->exi_public);
3518 if (error != 0) {
3519 *cs->statusp = resp->status = puterrno4(error);
3520 goto out;
3521 }
3522 sav_exi = cs->exi;
3523 if (ne->exi_public == ne->exi_root) {
3524 /*
3525 * No filesystem is actually shared public, so we default
3526 * to exi_root. In this case, we must check whether root
3527 * is exported.
3528 */
3529 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3530
3531 /*
3532 * if root filesystem is exported, the exportinfo struct that we
3533 * should use is what checkexport4 returns, because root_exi is
3534 * actually a mostly empty struct.
3535 */
3536 exi = checkexport4(&fh_fmtp->fh4_fsid,
3537 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3538 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3539 } else {
3540 /*
3541 * it's a properly shared filesystem
3542 */
3543 cs->exi = ne->exi_public;
3544 }
3545
3546 if (is_system_labeled()) {
3547 bslabel_t *clabel;
3548
3549 ASSERT(req->rq_label != NULL);
3550 clabel = req->rq_label;
3551 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3552 "got client label from request(1)",
3553 struct svc_req *, req);
3554 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3555 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3556 cs->exi)) {
3557 *cs->statusp = resp->status =
3558 NFS4ERR_SERVERFAULT;
3559 goto out;
3560 }
3561 }
3562 }
3563
3564 VN_HOLD(vp);
3565 cs->vp = vp;
3566
3567 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3568 VN_RELE(cs->vp);
3569 cs->vp = NULL;
3570 cs->exi = sav_exi;
3571 goto out;
3572 }
3573
3574 *cs->statusp = resp->status = NFS4_OK;
3575 out:
3576 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3577 PUTPUBFH4res *, resp);
3578 }
3579
3580 /*
3581 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3582 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3583 * or joe have restrictive search permissions, then we shouldn't let
3584 * the client get a file handle. This is easy to enforce. However, we
3585 * don't know what security flavor should be used until we resolve the
3586 * path name. Another complication is uid mapping. If root is
3587 * the user, then it will be mapped to the anonymous user by default,
3588 * but we won't know that till we've resolved the path name. And we won't
3589 * know what the anonymous user is.
3590 * Luckily, SECINFO is specified to take a full filename.
3591 * So what we will have to in rfs4_op_lookup is check that flavor of
3592 * the target object matches that of the request, and if root was the
3593 * caller, check for the root= and anon= options, and if necessary,
3594 * repeat the lookup using the right cred_t. But that's not done yet.
3595 */
3596 /* ARGSUSED */
3597 static void
3598 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3599 struct compound_state *cs)
3600 {
3601 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3602 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3603 nfs_fh4_fmt_t *fh_fmtp;
3604
3605 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3606 PUTFH4args *, args);
3607
3608 if (cs->vp) {
3609 VN_RELE(cs->vp);
3610 cs->vp = NULL;
3611 }
3612
3613 if (cs->cr) {
3614 crfree(cs->cr);
3615 cs->cr = NULL;
3616 }
3617
3618
3619 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3620 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3621 goto out;
3622 }
3623
3624 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3625 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3626 NULL);
3627
3628 if (cs->exi == NULL) {
3629 *cs->statusp = resp->status = NFS4ERR_STALE;
3630 goto out;
3631 }
3632
3633 cs->cr = crdup(cs->basecr);
3634
3635 ASSERT(cs->cr != NULL);
3636
3637 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3638 *cs->statusp = resp->status;
3639 goto out;
3640 }
3641
3642 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3643 VN_RELE(cs->vp);
3644 cs->vp = NULL;
3645 goto out;
3646 }
3647
3648 nfs_fh4_copy(&args->object, &cs->fh);
3649 *cs->statusp = resp->status = NFS4_OK;
3650 cs->deleg = FALSE;
3651
3652 out:
3653 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3654 PUTFH4res *, resp);
3655 }
3656
3657 /* ARGSUSED */
3658 static void
3659 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3660 struct compound_state *cs)
3661 {
3662 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3663 int error;
3664 fid_t fid;
3665 struct exportinfo *exi, *sav_exi;
3666
3667 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3668
3669 if (cs->vp) {
3670 VN_RELE(cs->vp);
3671 cs->vp = NULL;
3672 }
3673
3674 if (cs->cr)
3675 crfree(cs->cr);
3676
3677 cs->cr = crdup(cs->basecr);
3678
3679 /*
3680 * Using rootdir, the system root vnode,
3681 * get its fid.
3682 */
3683 bzero(&fid, sizeof (fid));
3684 fid.fid_len = MAXFIDSZ;
3685 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3686 if (error != 0) {
3687 *cs->statusp = resp->status = puterrno4(error);
3688 goto out;
3689 }
3690
3691 /*
3692 * Then use the root fsid & fid it to find out if it's exported
3693 *
3694 * If the server root isn't exported directly, then
3695 * it should at least be a pseudo export based on
3696 * one or more exports further down in the server's
3697 * file tree.
3698 */
3699 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3700 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3701 NFS4_DEBUG(rfs4_debug,
3702 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3703 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3704 goto out;
3705 }
3706
3707 /*
3708 * Now make a filehandle based on the root
3709 * export and root vnode.
3710 */
3711 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3712 if (error != 0) {
3713 *cs->statusp = resp->status = puterrno4(error);
3714 goto out;
3715 }
3716
3717 sav_exi = cs->exi;
3718 cs->exi = exi;
3719
3720 VN_HOLD(ZONE_ROOTVP());
3721 cs->vp = ZONE_ROOTVP();
3722
3723 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3724 VN_RELE(cs->vp);
3725 cs->vp = NULL;
3726 cs->exi = sav_exi;
3727 goto out;
3728 }
3729
3730 *cs->statusp = resp->status = NFS4_OK;
3731 cs->deleg = FALSE;
3732 out:
3733 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3734 PUTROOTFH4res *, resp);
3735 }
3736
3737 /*
3738 * readlink: args: CURRENT_FH.
3739 * res: status. If success - CURRENT_FH unchanged, return linktext.
3740 */
3741
3742 /* ARGSUSED */
3743 static void
3744 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3745 struct compound_state *cs)
3746 {
3747 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3748 int error;
3749 vnode_t *vp;
3750 struct iovec iov;
3751 struct vattr va;
3752 struct uio uio;
3753 char *data;
3754 struct sockaddr *ca;
3755 char *name = NULL;
3756 int is_referral;
3757
3758 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3759
3760 /* CURRENT_FH: directory */
3761 vp = cs->vp;
3762 if (vp == NULL) {
3763 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3764 goto out;
3765 }
3766
3767 if (cs->access == CS_ACCESS_DENIED) {
3768 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3769 goto out;
3770 }
3771
3772 /* Is it a referral? */
3773 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3774
3775 is_referral = 1;
3776
3777 } else {
3778
3779 is_referral = 0;
3780
3781 if (vp->v_type == VDIR) {
3782 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3783 goto out;
3784 }
3785
3786 if (vp->v_type != VLNK) {
3787 *cs->statusp = resp->status = NFS4ERR_INVAL;
3788 goto out;
3789 }
3790
3791 }
3792
3793 va.va_mask = AT_MODE;
3794 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3795 if (error) {
3796 *cs->statusp = resp->status = puterrno4(error);
3797 goto out;
3798 }
3799
3800 if (MANDLOCK(vp, va.va_mode)) {
3801 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3802 goto out;
3803 }
3804
3805 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3806
3807 if (is_referral) {
3808 char *s;
3809 size_t strsz;
3810
3811 /* Get an artificial symlink based on a referral */
3812 s = build_symlink(vp, cs->cr, &strsz);
3813 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3814 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3815 vnode_t *, vp, char *, s);
3816 if (s == NULL)
3817 error = EINVAL;
3818 else {
3819 error = 0;
3820 (void) strlcpy(data, s, MAXPATHLEN + 1);
3821 kmem_free(s, strsz);
3822 }
3823
3824 } else {
3825
3826 iov.iov_base = data;
3827 iov.iov_len = MAXPATHLEN;
3828 uio.uio_iov = &iov;
3829 uio.uio_iovcnt = 1;
3830 uio.uio_segflg = UIO_SYSSPACE;
3831 uio.uio_extflg = UIO_COPY_CACHED;
3832 uio.uio_loffset = 0;
3833 uio.uio_resid = MAXPATHLEN;
3834
3835 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3836
3837 if (!error)
3838 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3839 }
3840
3841 if (error) {
3842 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3843 *cs->statusp = resp->status = puterrno4(error);
3844 goto out;
3845 }
3846
3847 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3848 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3849 MAXPATHLEN + 1);
3850
3851 if (name == NULL) {
3852 /*
3853 * Even though the conversion failed, we return
3854 * something. We just don't translate it.
3855 */
3856 name = data;
3857 }
3858
3859 /*
3860 * treat link name as data
3861 */
3862 (void) str_to_utf8(name, (utf8string *)&resp->link);
3863
3864 if (name != data)
3865 kmem_free(name, MAXPATHLEN + 1);
3866 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3867 *cs->statusp = resp->status = NFS4_OK;
3868
3869 out:
3870 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3871 READLINK4res *, resp);
3872 }
3873
3874 static void
3875 rfs4_op_readlink_free(nfs_resop4 *resop)
3876 {
3877 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3878 utf8string *symlink = (utf8string *)&resp->link;
3879
3880 if (symlink->utf8string_val) {
3881 UTF8STRING_FREE(*symlink)
3882 }
3883 }
3884
3885 /*
3886 * release_lockowner:
3887 * Release any state associated with the supplied
3888 * lockowner. Note if any lo_state is holding locks we will not
3889 * rele that lo_state and thus the lockowner will not be destroyed.
3890 * A client using lock after the lock owner stateid has been released
3891 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3892 * to reissue the lock with new_lock_owner set to TRUE.
3893 * args: lock_owner
3894 * res: status
3895 */
3896 /* ARGSUSED */
3897 static void
3898 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3899 struct svc_req *req, struct compound_state *cs)
3900 {
3901 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3902 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3903 rfs4_lockowner_t *lo;
3904 rfs4_openowner_t *oo;
3905 rfs4_state_t *sp;
3906 rfs4_lo_state_t *lsp;
3907 rfs4_client_t *cp;
3908 bool_t create = FALSE;
3909 locklist_t *llist;
3910 sysid_t sysid;
3911
3912 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3913 cs, RELEASE_LOCKOWNER4args *, ap);
3914
3915 /* Make sure there is a clientid around for this request */
3916 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3917
3918 if (cp == NULL) {
3919 *cs->statusp = resp->status =
3920 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3921 goto out;
3922 }
3923 rfs4_client_rele(cp);
3924
3925 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3926 if (lo == NULL) {
3927 *cs->statusp = resp->status = NFS4_OK;
3928 goto out;
3929 }
3930 ASSERT(lo->rl_client != NULL);
3931
3932 /*
3933 * Check for EXPIRED client. If so will reap state with in a lease
3934 * period or on next set_clientid_confirm step
3935 */
3936 if (rfs4_lease_expired(lo->rl_client)) {
3937 rfs4_lockowner_rele(lo);
3938 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3939 goto out;
3940 }
3941
3942 /*
3943 * If no sysid has been assigned, then no locks exist; just return.
3944 */
3945 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3946 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3947 rfs4_lockowner_rele(lo);
3948 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3949 goto out;
3950 }
3951
3952 sysid = lo->rl_client->rc_sysidt;
3953 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3954
3955 /*
3956 * Mark the lockowner invalid.
3957 */
3958 rfs4_dbe_hide(lo->rl_dbe);
3959
3960 /*
3961 * sysid-pid pair should now not be used since the lockowner is
3962 * invalid. If the client were to instantiate the lockowner again
3963 * it would be assigned a new pid. Thus we can get the list of
3964 * current locks.
3965 */
3966
3967 llist = flk_get_active_locks(sysid, lo->rl_pid);
3968 /* If we are still holding locks fail */
3969 if (llist != NULL) {
3970
3971 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3972
3973 flk_free_locklist(llist);
3974 /*
3975 * We need to unhide the lockowner so the client can
3976 * try it again. The bad thing here is if the client
3977 * has a logic error that took it here in the first place
3978 * they probably have lost accounting of the locks that it
3979 * is holding. So we may have dangling state until the
3980 * open owner state is reaped via close. One scenario
3981 * that could possibly occur is that the client has
3982 * sent the unlock request(s) in separate threads
3983 * and has not waited for the replies before sending the
3984 * RELEASE_LOCKOWNER request. Presumably, it would expect
3985 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3986 * reissuing the request.
3987 */
3988 rfs4_dbe_unhide(lo->rl_dbe);
3989 rfs4_lockowner_rele(lo);
3990 goto out;
3991 }
3992
3993 /*
3994 * For the corresponding client we need to check each open
3995 * owner for any opens that have lockowner state associated
3996 * with this lockowner.
3997 */
3998
3999 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4000 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4001 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4002
4003 rfs4_dbe_lock(oo->ro_dbe);
4004 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4005 sp = list_next(&oo->ro_statelist, sp)) {
4006
4007 rfs4_dbe_lock(sp->rs_dbe);
4008 for (lsp = list_head(&sp->rs_lostatelist);
4009 lsp != NULL;
4010 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4011 if (lsp->rls_locker == lo) {
4012 rfs4_dbe_lock(lsp->rls_dbe);
4013 rfs4_dbe_invalidate(lsp->rls_dbe);
4014 rfs4_dbe_unlock(lsp->rls_dbe);
4015 }
4016 }
4017 rfs4_dbe_unlock(sp->rs_dbe);
4018 }
4019 rfs4_dbe_unlock(oo->ro_dbe);
4020 }
4021 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4022
4023 rfs4_lockowner_rele(lo);
4024
4025 *cs->statusp = resp->status = NFS4_OK;
4026
4027 out:
4028 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4029 cs, RELEASE_LOCKOWNER4res *, resp);
4030 }
4031
4032 /*
4033 * short utility function to lookup a file and recall the delegation
4034 */
4035 static rfs4_file_t *
4036 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4037 int *lkup_error, cred_t *cr)
4038 {
4039 vnode_t *vp;
4040 rfs4_file_t *fp = NULL;
4041 bool_t fcreate = FALSE;
4042 int error;
4043
4044 if (vpp)
4045 *vpp = NULL;
4046
4047 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4048 NULL)) == 0) {
4049 if (vp->v_type == VREG)
4050 fp = rfs4_findfile(vp, NULL, &fcreate);
4051 if (vpp)
4052 *vpp = vp;
4053 else
4054 VN_RELE(vp);
4055 }
4056
4057 if (lkup_error)
4058 *lkup_error = error;
4059
4060 return (fp);
4061 }
4062
4063 /*
4064 * remove: args: CURRENT_FH: directory; name.
4065 * res: status. If success - CURRENT_FH unchanged, return change_info
4066 * for directory.
4067 */
4068 /* ARGSUSED */
4069 static void
4070 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4071 struct compound_state *cs)
4072 {
4073 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4074 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4075 int error;
4076 vnode_t *dvp, *vp;
4077 struct vattr bdva, idva, adva;
4078 char *nm;
4079 uint_t len;
4080 rfs4_file_t *fp;
4081 int in_crit = 0;
4082 bslabel_t *clabel;
4083 struct sockaddr *ca;
4084 char *name = NULL;
4085 nfsstat4 status;
4086
4087 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4088 REMOVE4args *, args);
4089
4090 /* CURRENT_FH: directory */
4091 dvp = cs->vp;
4092 if (dvp == NULL) {
4093 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4094 goto out;
4095 }
4096
4097 if (cs->access == CS_ACCESS_DENIED) {
4098 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4099 goto out;
4100 }
4101
4102 /*
4103 * If there is an unshared filesystem mounted on this vnode,
4104 * Do not allow to remove anything in this directory.
4105 */
4106 if (vn_ismntpt(dvp)) {
4107 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4108 goto out;
4109 }
4110
4111 if (dvp->v_type != VDIR) {
4112 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4113 goto out;
4114 }
4115
4116 status = utf8_dir_verify(&args->target);
4117 if (status != NFS4_OK) {
4118 *cs->statusp = resp->status = status;
4119 goto out;
4120 }
4121
4122 /*
4123 * Lookup the file so that we can check if it's a directory
4124 */
4125 nm = utf8_to_fn(&args->target, &len, NULL);
4126 if (nm == NULL) {
4127 *cs->statusp = resp->status = NFS4ERR_INVAL;
4128 goto out;
4129 }
4130
4131 if (len > MAXNAMELEN) {
4132 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4133 kmem_free(nm, len);
4134 goto out;
4135 }
4136
4137 if (rdonly4(req, cs)) {
4138 *cs->statusp = resp->status = NFS4ERR_ROFS;
4139 kmem_free(nm, len);
4140 goto out;
4141 }
4142
4143 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4144 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4145 MAXPATHLEN + 1);
4146
4147 if (name == NULL) {
4148 *cs->statusp = resp->status = NFS4ERR_INVAL;
4149 kmem_free(nm, len);
4150 goto out;
4151 }
4152
4153 /*
4154 * Lookup the file to determine type and while we are see if
4155 * there is a file struct around and check for delegation.
4156 * We don't need to acquire va_seq before this lookup, if
4157 * it causes an update, cinfo.before will not match, which will
4158 * trigger a cache flush even if atomic is TRUE.
4159 */
4160 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4161 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4162 NULL)) {
4163 VN_RELE(vp);
4164 rfs4_file_rele(fp);
4165 *cs->statusp = resp->status = NFS4ERR_DELAY;
4166 if (nm != name)
4167 kmem_free(name, MAXPATHLEN + 1);
4168 kmem_free(nm, len);
4169 goto out;
4170 }
4171 }
4172
4173 /* Didn't find anything to remove */
4174 if (vp == NULL) {
4175 *cs->statusp = resp->status = error;
4176 if (nm != name)
4177 kmem_free(name, MAXPATHLEN + 1);
4178 kmem_free(nm, len);
4179 goto out;
4180 }
4181
4182 if (nbl_need_check(vp)) {
4183 nbl_start_crit(vp, RW_READER);
4184 in_crit = 1;
4185 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4186 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4187 if (nm != name)
4188 kmem_free(name, MAXPATHLEN + 1);
4189 kmem_free(nm, len);
4190 nbl_end_crit(vp);
4191 VN_RELE(vp);
4192 if (fp) {
4193 rfs4_clear_dont_grant(fp);
4194 rfs4_file_rele(fp);
4195 }
4196 goto out;
4197 }
4198 }
4199
4200 /* check label before allowing removal */
4201 if (is_system_labeled()) {
4202 ASSERT(req->rq_label != NULL);
4203 clabel = req->rq_label;
4204 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4205 "got client label from request(1)",
4206 struct svc_req *, req);
4207 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4208 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4209 cs->exi)) {
4210 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4211 if (name != nm)
4212 kmem_free(name, MAXPATHLEN + 1);
4213 kmem_free(nm, len);
4214 if (in_crit)
4215 nbl_end_crit(vp);
4216 VN_RELE(vp);
4217 if (fp) {
4218 rfs4_clear_dont_grant(fp);
4219 rfs4_file_rele(fp);
4220 }
4221 goto out;
4222 }
4223 }
4224 }
4225
4226 /* Get dir "before" change value */
4227 bdva.va_mask = AT_CTIME|AT_SEQ;
4228 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4229 if (error) {
4230 *cs->statusp = resp->status = puterrno4(error);
4231 if (nm != name)
4232 kmem_free(name, MAXPATHLEN + 1);
4233 kmem_free(nm, len);
4234 if (in_crit)
4235 nbl_end_crit(vp);
4236 VN_RELE(vp);
4237 if (fp) {
4238 rfs4_clear_dont_grant(fp);
4239 rfs4_file_rele(fp);
4240 }
4241 goto out;
4242 }
4243 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4244
4245 /* Actually do the REMOVE operation */
4246 if (vp->v_type == VDIR) {
4247 /*
4248 * Can't remove a directory that has a mounted-on filesystem.
4249 */
4250 if (vn_ismntpt(vp)) {
4251 error = EACCES;
4252 } else {
4253 /*
4254 * System V defines rmdir to return EEXIST,
4255 * not ENOTEMPTY, if the directory is not
4256 * empty. A System V NFS server needs to map
4257 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4258 * transmit over the wire.
4259 */
4260 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4261 NULL, 0)) == EEXIST)
4262 error = ENOTEMPTY;
4263 }
4264 } else {
4265 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4266 fp != NULL) {
4267 struct vattr va;
4268 vnode_t *tvp;
4269
4270 rfs4_dbe_lock(fp->rf_dbe);
4271 tvp = fp->rf_vp;
4272 if (tvp)
4273 VN_HOLD(tvp);
4274 rfs4_dbe_unlock(fp->rf_dbe);
4275
4276 if (tvp) {
4277 /*
4278 * This is va_seq safe because we are not
4279 * manipulating dvp.
4280 */
4281 va.va_mask = AT_NLINK;
4282 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4283 va.va_nlink == 0) {
4284 /* Remove state on file remove */
4285 if (in_crit) {
4286 nbl_end_crit(vp);
4287 in_crit = 0;
4288 }
4289 rfs4_close_all_state(fp);
4290 }
4291 VN_RELE(tvp);
4292 }
4293 }
4294 }
4295
4296 if (in_crit)
4297 nbl_end_crit(vp);
4298 VN_RELE(vp);
4299
4300 if (fp) {
4301 rfs4_clear_dont_grant(fp);
4302 rfs4_file_rele(fp);
4303 }
4304 if (nm != name)
4305 kmem_free(name, MAXPATHLEN + 1);
4306 kmem_free(nm, len);
4307
4308 if (error) {
4309 *cs->statusp = resp->status = puterrno4(error);
4310 goto out;
4311 }
4312
4313 /*
4314 * Get the initial "after" sequence number, if it fails, set to zero
4315 */
4316 idva.va_mask = AT_SEQ;
4317 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4318 idva.va_seq = 0;
4319
4320 /*
4321 * Force modified data and metadata out to stable storage.
4322 */
4323 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4324
4325 /*
4326 * Get "after" change value, if it fails, simply return the
4327 * before value.
4328 */
4329 adva.va_mask = AT_CTIME|AT_SEQ;
4330 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4331 adva.va_ctime = bdva.va_ctime;
4332 adva.va_seq = 0;
4333 }
4334
4335 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4336
4337 /*
4338 * The cinfo.atomic = TRUE only if we have
4339 * non-zero va_seq's, and it has incremented by exactly one
4340 * during the VOP_REMOVE/RMDIR and it didn't change during
4341 * the VOP_FSYNC.
4342 */
4343 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4344 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4345 resp->cinfo.atomic = TRUE;
4346 else
4347 resp->cinfo.atomic = FALSE;
4348
4349 *cs->statusp = resp->status = NFS4_OK;
4350
4351 out:
4352 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4353 REMOVE4res *, resp);
4354 }
4355
4356 /*
4357 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4358 * oldname and newname.
4359 * res: status. If success - CURRENT_FH unchanged, return change_info
4360 * for both from and target directories.
4361 */
4362 /* ARGSUSED */
4363 static void
4364 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4365 struct compound_state *cs)
4366 {
4367 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4368 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4369 int error;
4370 vnode_t *odvp;
4371 vnode_t *ndvp;
4372 vnode_t *srcvp, *targvp, *tvp;
4373 struct vattr obdva, oidva, oadva;
4374 struct vattr nbdva, nidva, nadva;
4375 char *onm, *nnm;
4376 uint_t olen, nlen;
4377 rfs4_file_t *fp, *sfp;
4378 int in_crit_src, in_crit_targ;
4379 int fp_rele_grant_hold, sfp_rele_grant_hold;
4380 int unlinked;
4381 bslabel_t *clabel;
4382 struct sockaddr *ca;
4383 char *converted_onm = NULL;
4384 char *converted_nnm = NULL;
4385 nfsstat4 status;
4386
4387 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4388 RENAME4args *, args);
4389
4390 fp = sfp = NULL;
4391 srcvp = targvp = tvp = NULL;
4392 in_crit_src = in_crit_targ = 0;
4393 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4394 unlinked = 0;
4395
4396 /* CURRENT_FH: target directory */
4397 ndvp = cs->vp;
4398 if (ndvp == NULL) {
4399 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4400 goto out;
4401 }
4402
4403 /* SAVED_FH: from directory */
4404 odvp = cs->saved_vp;
4405 if (odvp == NULL) {
4406 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4407 goto out;
4408 }
4409
4410 if (cs->access == CS_ACCESS_DENIED) {
4411 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4412 goto out;
4413 }
4414
4415 /*
4416 * If there is an unshared filesystem mounted on this vnode,
4417 * do not allow to rename objects in this directory.
4418 */
4419 if (vn_ismntpt(odvp)) {
4420 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4421 goto out;
4422 }
4423
4424 /*
4425 * If there is an unshared filesystem mounted on this vnode,
4426 * do not allow to rename to this directory.
4427 */
4428 if (vn_ismntpt(ndvp)) {
4429 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4430 goto out;
4431 }
4432
4433 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4434 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4435 goto out;
4436 }
4437
4438 if (cs->saved_exi != cs->exi) {
4439 *cs->statusp = resp->status = NFS4ERR_XDEV;
4440 goto out;
4441 }
4442
4443 status = utf8_dir_verify(&args->oldname);
4444 if (status != NFS4_OK) {
4445 *cs->statusp = resp->status = status;
4446 goto out;
4447 }
4448
4449 status = utf8_dir_verify(&args->newname);
4450 if (status != NFS4_OK) {
4451 *cs->statusp = resp->status = status;
4452 goto out;
4453 }
4454
4455 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4456 if (onm == NULL) {
4457 *cs->statusp = resp->status = NFS4ERR_INVAL;
4458 goto out;
4459 }
4460 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4461 nlen = MAXPATHLEN + 1;
4462 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4463 nlen);
4464
4465 if (converted_onm == NULL) {
4466 *cs->statusp = resp->status = NFS4ERR_INVAL;
4467 kmem_free(onm, olen);
4468 goto out;
4469 }
4470
4471 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4472 if (nnm == NULL) {
4473 *cs->statusp = resp->status = NFS4ERR_INVAL;
4474 if (onm != converted_onm)
4475 kmem_free(converted_onm, MAXPATHLEN + 1);
4476 kmem_free(onm, olen);
4477 goto out;
4478 }
4479 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4480 MAXPATHLEN + 1);
4481
4482 if (converted_nnm == NULL) {
4483 *cs->statusp = resp->status = NFS4ERR_INVAL;
4484 kmem_free(nnm, nlen);
4485 nnm = NULL;
4486 if (onm != converted_onm)
4487 kmem_free(converted_onm, MAXPATHLEN + 1);
4488 kmem_free(onm, olen);
4489 goto out;
4490 }
4491
4492
4493 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4494 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4495 kmem_free(onm, olen);
4496 kmem_free(nnm, nlen);
4497 goto out;
4498 }
4499
4500
4501 if (rdonly4(req, cs)) {
4502 *cs->statusp = resp->status = NFS4ERR_ROFS;
4503 if (onm != converted_onm)
4504 kmem_free(converted_onm, MAXPATHLEN + 1);
4505 kmem_free(onm, olen);
4506 if (nnm != converted_nnm)
4507 kmem_free(converted_nnm, MAXPATHLEN + 1);
4508 kmem_free(nnm, nlen);
4509 goto out;
4510 }
4511
4512 /* check label of the target dir */
4513 if (is_system_labeled()) {
4514 ASSERT(req->rq_label != NULL);
4515 clabel = req->rq_label;
4516 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4517 "got client label from request(1)",
4518 struct svc_req *, req);
4519 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4520 if (!do_rfs_label_check(clabel, ndvp,
4521 EQUALITY_CHECK, cs->exi)) {
4522 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4523 goto err_out;
4524 }
4525 }
4526 }
4527
4528 /*
4529 * Is the source a file and have a delegation?
4530 * We don't need to acquire va_seq before these lookups, if
4531 * it causes an update, cinfo.before will not match, which will
4532 * trigger a cache flush even if atomic is TRUE.
4533 */
4534 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4535 &error, cs->cr)) {
4536 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4537 NULL)) {
4538 *cs->statusp = resp->status = NFS4ERR_DELAY;
4539 goto err_out;
4540 }
4541 }
4542
4543 if (srcvp == NULL) {
4544 *cs->statusp = resp->status = puterrno4(error);
4545 if (onm != converted_onm)
4546 kmem_free(converted_onm, MAXPATHLEN + 1);
4547 kmem_free(onm, olen);
4548 if (nnm != converted_nnm)
4549 kmem_free(converted_nnm, MAXPATHLEN + 1);
4550 kmem_free(nnm, nlen);
4551 goto out;
4552 }
4553
4554 sfp_rele_grant_hold = 1;
4555
4556 /* Does the destination exist and a file and have a delegation? */
4557 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4558 NULL, cs->cr)) {
4559 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4560 NULL)) {
4561 *cs->statusp = resp->status = NFS4ERR_DELAY;
4562 goto err_out;
4563 }
4564 }
4565 fp_rele_grant_hold = 1;
4566
4567 /* Check for NBMAND lock on both source and target */
4568 if (nbl_need_check(srcvp)) {
4569 nbl_start_crit(srcvp, RW_READER);
4570 in_crit_src = 1;
4571 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4572 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4573 goto err_out;
4574 }
4575 }
4576
4577 if (targvp && nbl_need_check(targvp)) {
4578 nbl_start_crit(targvp, RW_READER);
4579 in_crit_targ = 1;
4580 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4581 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4582 goto err_out;
4583 }
4584 }
4585
4586 /* Get source "before" change value */
4587 obdva.va_mask = AT_CTIME|AT_SEQ;
4588 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4589 if (!error) {
4590 nbdva.va_mask = AT_CTIME|AT_SEQ;
4591 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4592 }
4593 if (error) {
4594 *cs->statusp = resp->status = puterrno4(error);
4595 goto err_out;
4596 }
4597
4598 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4599 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4600
4601 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4602 NULL, 0);
4603
4604 /*
4605 * If target existed and was unlinked by VOP_RENAME, state will need
4606 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4607 * any necessary nbl_end_crit on srcvp and tgtvp.
4608 */
4609 if (error == 0 && fp != NULL) {
4610 rfs4_dbe_lock(fp->rf_dbe);
4611 tvp = fp->rf_vp;
4612 if (tvp)
4613 VN_HOLD(tvp);
4614 rfs4_dbe_unlock(fp->rf_dbe);
4615
4616 if (tvp) {
4617 struct vattr va;
4618 va.va_mask = AT_NLINK;
4619
4620 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4621 va.va_nlink == 0) {
4622 unlinked = 1;
4623
4624 /* DEBUG data */
4625 if ((srcvp == targvp) || (tvp != targvp)) {
4626 cmn_err(CE_WARN, "rfs4_op_rename: "
4627 "srcvp %p, targvp: %p, tvp: %p",
4628 (void *)srcvp, (void *)targvp,
4629 (void *)tvp);
4630 }
4631 } else {
4632 VN_RELE(tvp);
4633 }
4634 }
4635 }
4636 if (error == 0)
4637 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4638
4639 if (in_crit_src)
4640 nbl_end_crit(srcvp);
4641 if (srcvp)
4642 VN_RELE(srcvp);
4643 if (in_crit_targ)
4644 nbl_end_crit(targvp);
4645 if (targvp)
4646 VN_RELE(targvp);
4647
4648 if (unlinked) {
4649 ASSERT(fp != NULL);
4650 ASSERT(tvp != NULL);
4651
4652 /* DEBUG data */
4653 if (RW_READ_HELD(&tvp->v_nbllock)) {
4654 cmn_err(CE_WARN, "rfs4_op_rename: "
4655 "RW_READ_HELD(%p)", (void *)tvp);
4656 }
4657
4658 /* The file is gone and so should the state */
4659 rfs4_close_all_state(fp);
4660 VN_RELE(tvp);
4661 }
4662
4663 if (sfp) {
4664 rfs4_clear_dont_grant(sfp);
4665 rfs4_file_rele(sfp);
4666 }
4667 if (fp) {
4668 rfs4_clear_dont_grant(fp);
4669 rfs4_file_rele(fp);
4670 }
4671
4672 if (converted_onm != onm)
4673 kmem_free(converted_onm, MAXPATHLEN + 1);
4674 kmem_free(onm, olen);
4675 if (converted_nnm != nnm)
4676 kmem_free(converted_nnm, MAXPATHLEN + 1);
4677 kmem_free(nnm, nlen);
4678
4679 /*
4680 * Get the initial "after" sequence number, if it fails, set to zero
4681 */
4682 oidva.va_mask = AT_SEQ;
4683 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4684 oidva.va_seq = 0;
4685
4686 nidva.va_mask = AT_SEQ;
4687 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4688 nidva.va_seq = 0;
4689
4690 /*
4691 * Force modified data and metadata out to stable storage.
4692 */
4693 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4694 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4695
4696 if (error) {
4697 *cs->statusp = resp->status = puterrno4(error);
4698 goto out;
4699 }
4700
4701 /*
4702 * Get "after" change values, if it fails, simply return the
4703 * before value.
4704 */
4705 oadva.va_mask = AT_CTIME|AT_SEQ;
4706 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4707 oadva.va_ctime = obdva.va_ctime;
4708 oadva.va_seq = 0;
4709 }
4710
4711 nadva.va_mask = AT_CTIME|AT_SEQ;
4712 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4713 nadva.va_ctime = nbdva.va_ctime;
4714 nadva.va_seq = 0;
4715 }
4716
4717 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4718 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4719
4720 /*
4721 * The cinfo.atomic = TRUE only if we have
4722 * non-zero va_seq's, and it has incremented by exactly one
4723 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4724 */
4725 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4726 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4727 resp->source_cinfo.atomic = TRUE;
4728 else
4729 resp->source_cinfo.atomic = FALSE;
4730
4731 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4732 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4733 resp->target_cinfo.atomic = TRUE;
4734 else
4735 resp->target_cinfo.atomic = FALSE;
4736
4737 #ifdef VOLATILE_FH_TEST
4738 {
4739 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4740
4741 /*
4742 * Add the renamed file handle to the volatile rename list
4743 */
4744 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4745 /* file handles may expire on rename */
4746 vnode_t *vp;
4747
4748 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4749 /*
4750 * Already know that nnm will be a valid string
4751 */
4752 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4753 NULL, NULL, NULL);
4754 kmem_free(nnm, nlen);
4755 if (!error) {
4756 add_volrnm_fh(cs->exi, vp);
4757 VN_RELE(vp);
4758 }
4759 }
4760 }
4761 #endif /* VOLATILE_FH_TEST */
4762
4763 *cs->statusp = resp->status = NFS4_OK;
4764 out:
4765 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4766 RENAME4res *, resp);
4767 return;
4768
4769 err_out:
4770 if (onm != converted_onm)
4771 kmem_free(converted_onm, MAXPATHLEN + 1);
4772 if (onm != NULL)
4773 kmem_free(onm, olen);
4774 if (nnm != converted_nnm)
4775 kmem_free(converted_nnm, MAXPATHLEN + 1);
4776 if (nnm != NULL)
4777 kmem_free(nnm, nlen);
4778
4779 if (in_crit_src) nbl_end_crit(srcvp);
4780 if (in_crit_targ) nbl_end_crit(targvp);
4781 if (targvp) VN_RELE(targvp);
4782 if (srcvp) VN_RELE(srcvp);
4783 if (sfp) {
4784 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4785 rfs4_file_rele(sfp);
4786 }
4787 if (fp) {
4788 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4789 rfs4_file_rele(fp);
4790 }
4791
4792 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4793 RENAME4res *, resp);
4794 }
4795
4796 /* ARGSUSED */
4797 static void
4798 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4799 struct compound_state *cs)
4800 {
4801 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4802 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4803 rfs4_client_t *cp;
4804
4805 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4806 RENEW4args *, args);
4807
4808 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4809 *cs->statusp = resp->status =
4810 rfs4_check_clientid(&args->clientid, 0);
4811 goto out;
4812 }
4813
4814 if (rfs4_lease_expired(cp)) {
4815 rfs4_client_rele(cp);
4816 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4817 goto out;
4818 }
4819
4820 rfs4_update_lease(cp);
4821
4822 mutex_enter(cp->rc_cbinfo.cb_lock);
4823 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4824 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4825 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4826 } else {
4827 *cs->statusp = resp->status = NFS4_OK;
4828 }
4829 mutex_exit(cp->rc_cbinfo.cb_lock);
4830
4831 rfs4_client_rele(cp);
4832
4833 out:
4834 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4835 RENEW4res *, resp);
4836 }
4837
4838 /* ARGSUSED */
4839 static void
4840 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4841 struct compound_state *cs)
4842 {
4843 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4844
4845 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4846
4847 /* No need to check cs->access - we are not accessing any object */
4848 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4849 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4850 goto out;
4851 }
4852 if (cs->vp != NULL) {
4853 VN_RELE(cs->vp);
4854 }
4855 cs->vp = cs->saved_vp;
4856 cs->saved_vp = NULL;
4857 cs->exi = cs->saved_exi;
4858 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4859 *cs->statusp = resp->status = NFS4_OK;
4860 cs->deleg = FALSE;
4861
4862 out:
4863 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4864 RESTOREFH4res *, resp);
4865 }
4866
4867 /* ARGSUSED */
4868 static void
4869 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4870 struct compound_state *cs)
4871 {
4872 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4873
4874 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4875
4876 /* No need to check cs->access - we are not accessing any object */
4877 if (cs->vp == NULL) {
4878 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4879 goto out;
4880 }
4881 if (cs->saved_vp != NULL) {
4882 VN_RELE(cs->saved_vp);
4883 }
4884 cs->saved_vp = cs->vp;
4885 VN_HOLD(cs->saved_vp);
4886 cs->saved_exi = cs->exi;
4887 /*
4888 * since SAVEFH is fairly rare, don't alloc space for its fh
4889 * unless necessary.
4890 */
4891 if (cs->saved_fh.nfs_fh4_val == NULL) {
4892 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4893 }
4894 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4895 *cs->statusp = resp->status = NFS4_OK;
4896
4897 out:
4898 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4899 SAVEFH4res *, resp);
4900 }
4901
4902 /*
4903 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4904 * return the bitmap of attrs that were set successfully. It is also
4905 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4906 * always be called only after rfs4_do_set_attrs().
4907 *
4908 * Verify that the attributes are same as the expected ones. sargp->vap
4909 * and sargp->sbp contain the input attributes as translated from fattr4.
4910 *
4911 * This function verifies only the attrs that correspond to a vattr or
4912 * vfsstat struct. That is because of the extra step needed to get the
4913 * corresponding system structs. Other attributes have already been set or
4914 * verified by do_rfs4_set_attrs.
4915 *
4916 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4917 */
4918 static int
4919 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4920 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4921 {
4922 int error, ret_error = 0;
4923 int i, k;
4924 uint_t sva_mask = sargp->vap->va_mask;
4925 uint_t vbit;
4926 union nfs4_attr_u *na;
4927 uint8_t *amap;
4928 bool_t getsb = ntovp->vfsstat;
4929
4930 if (sva_mask != 0) {
4931 /*
4932 * Okay to overwrite sargp->vap because we verify based
4933 * on the incoming values.
4934 */
4935 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4936 sargp->cs->cr, NULL);
4937 if (ret_error) {
4938 if (resp == NULL)
4939 return (ret_error);
4940 /*
4941 * Must return bitmap of successful attrs
4942 */
4943 sva_mask = 0; /* to prevent checking vap later */
4944 } else {
4945 /*
4946 * Some file systems clobber va_mask. it is probably
4947 * wrong of them to do so, nonethless we practice
4948 * defensive coding.
4949 * See bug id 4276830.
4950 */
4951 sargp->vap->va_mask = sva_mask;
4952 }
4953 }
4954
4955 if (getsb) {
4956 /*
4957 * Now get the superblock and loop on the bitmap, as there is
4958 * no simple way of translating from superblock to bitmap4.
4959 */
4960 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4961 if (ret_error) {
4962 if (resp == NULL)
4963 goto errout;
4964 getsb = FALSE;
4965 }
4966 }
4967
4968 /*
4969 * Now loop and verify each attribute which getattr returned
4970 * whether it's the same as the input.
4971 */
4972 if (resp == NULL && !getsb && (sva_mask == 0))
4973 goto errout;
4974
4975 na = ntovp->na;
4976 amap = ntovp->amap;
4977 k = 0;
4978 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4979 k = *amap;
4980 ASSERT(nfs4_ntov_map[k].nval == k);
4981 vbit = nfs4_ntov_map[k].vbit;
4982
4983 /*
4984 * If vattr attribute but VOP_GETATTR failed, or it's
4985 * superblock attribute but VFS_STATVFS failed, skip
4986 */
4987 if (vbit) {
4988 if ((vbit & sva_mask) == 0)
4989 continue;
4990 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4991 continue;
4992 }
4993 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4994 if (resp != NULL) {
4995 if (error)
4996 ret_error = -1; /* not all match */
4997 else /* update response bitmap */
4998 *resp |= nfs4_ntov_map[k].fbit;
4999 continue;
5000 }
5001 if (error) {
5002 ret_error = -1; /* not all match */
5003 break;
5004 }
5005 }
5006 errout:
5007 return (ret_error);
5008 }
5009
5010 /*
5011 * Decode the attribute to be set/verified. If the attr requires a sys op
5012 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5013 * call the sv_getit function for it, because the sys op hasn't yet been done.
5014 * Return 0 for success, error code if failed.
5015 *
5016 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5017 */
5018 static int
5019 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5020 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5021 {
5022 int error = 0;
5023 bool_t set_later;
5024
5025 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5026
5027 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5028 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5029 /*
5030 * don't verify yet if a vattr or sb dependent attr,
5031 * because we don't have their sys values yet.
5032 * Will be done later.
5033 */
5034 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5035 /*
5036 * ACLs are a special case, since setting the MODE
5037 * conflicts with setting the ACL. We delay setting
5038 * the ACL until all other attributes have been set.
5039 * The ACL gets set in do_rfs4_op_setattr().
5040 */
5041 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5042 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5043 sargp, nap);
5044 if (error) {
5045 xdr_free(nfs4_ntov_map[k].xfunc,
5046 (caddr_t)nap);
5047 }
5048 }
5049 }
5050 } else {
5051 #ifdef DEBUG
5052 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5053 "decoding attribute %d\n", k);
5054 #endif
5055 error = EINVAL;
5056 }
5057 if (!error && resp_bval && !set_later) {
5058 *resp_bval |= nfs4_ntov_map[k].fbit;
5059 }
5060
5061 return (error);
5062 }
5063
5064 /*
5065 * Set vattr based on incoming fattr4 attrs - used by setattr.
5066 * Set response mask. Ignore any values that are not writable vattr attrs.
5067 */
5068 static nfsstat4
5069 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5070 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5071 nfs4_attr_cmd_t cmd)
5072 {
5073 int error = 0;
5074 int i;
5075 char *attrs = fattrp->attrlist4;
5076 uint32_t attrslen = fattrp->attrlist4_len;
5077 XDR xdr;
5078 nfsstat4 status = NFS4_OK;
5079 vnode_t *vp = cs->vp;
5080 union nfs4_attr_u *na;
5081 uint8_t *amap;
5082
5083 #ifndef lint
5084 /*
5085 * Make sure that maximum attribute number can be expressed as an
5086 * 8 bit quantity.
5087 */
5088 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5089 #endif
5090
5091 if (vp == NULL) {
5092 if (resp)
5093 *resp = 0;
5094 return (NFS4ERR_NOFILEHANDLE);
5095 }
5096 if (cs->access == CS_ACCESS_DENIED) {
5097 if (resp)
5098 *resp = 0;
5099 return (NFS4ERR_ACCESS);
5100 }
5101
5102 sargp->op = cmd;
5103 sargp->cs = cs;
5104 sargp->flag = 0; /* may be set later */
5105 sargp->vap->va_mask = 0;
5106 sargp->rdattr_error = NFS4_OK;
5107 sargp->rdattr_error_req = FALSE;
5108 /* sargp->sbp is set by the caller */
5109
5110 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5111
5112 na = ntovp->na;
5113 amap = ntovp->amap;
5114
5115 /*
5116 * The following loop iterates on the nfs4_ntov_map checking
5117 * if the fbit is set in the requested bitmap.
5118 * If set then we process the arguments using the
5119 * rfs4_fattr4 conversion functions to populate the setattr
5120 * vattr and va_mask. Any settable attrs that are not using vattr
5121 * will be set in this loop.
5122 */
5123 for (i = 0; i < nfs4_ntov_map_size; i++) {
5124 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5125 continue;
5126 }
5127 /*
5128 * If setattr, must be a writable attr.
5129 * If verify/nverify, must be a readable attr.
5130 */
5131 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5132 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5133 /*
5134 * Client tries to set/verify an
5135 * unsupported attribute, tries to set
5136 * a read only attr or verify a write
5137 * only one - error!
5138 */
5139 break;
5140 }
5141 /*
5142 * Decode the attribute to set/verify
5143 */
5144 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5145 &xdr, resp ? resp : NULL, na);
5146 if (error)
5147 break;
5148 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5149 na++;
5150 (ntovp->attrcnt)++;
5151 if (nfs4_ntov_map[i].vfsstat)
5152 ntovp->vfsstat = TRUE;
5153 }
5154
5155 if (error != 0)
5156 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5157 puterrno4(error));
5158 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5159 return (status);
5160 }
5161
5162 static nfsstat4
5163 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5164 stateid4 *stateid)
5165 {
5166 int error = 0;
5167 struct nfs4_svgetit_arg sarg;
5168 bool_t trunc;
5169
5170 nfsstat4 status = NFS4_OK;
5171 cred_t *cr = cs->cr;
5172 vnode_t *vp = cs->vp;
5173 struct nfs4_ntov_table ntov;
5174 struct statvfs64 sb;
5175 struct vattr bva;
5176 struct flock64 bf;
5177 int in_crit = 0;
5178 uint_t saved_mask = 0;
5179 caller_context_t ct;
5180
5181 *resp = 0;
5182 sarg.sbp = &sb;
5183 sarg.is_referral = B_FALSE;
5184 nfs4_ntov_table_init(&ntov);
5185 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5186 NFS4ATTR_SETIT);
5187 if (status != NFS4_OK) {
5188 /*
5189 * failed set attrs
5190 */
5191 goto done;
5192 }
5193 if ((sarg.vap->va_mask == 0) &&
5194 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5195 /*
5196 * no further work to be done
5197 */
5198 goto done;
5199 }
5200
5201 /*
5202 * If we got a request to set the ACL and the MODE, only
5203 * allow changing VSUID, VSGID, and VSVTX. Attempting
5204 * to change any other bits, along with setting an ACL,
5205 * gives NFS4ERR_INVAL.
5206 */
5207 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5208 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5209 vattr_t va;
5210
5211 va.va_mask = AT_MODE;
5212 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5213 if (error) {
5214 status = puterrno4(error);
5215 goto done;
5216 }
5217 if ((sarg.vap->va_mode ^ va.va_mode) &
5218 ~(VSUID | VSGID | VSVTX)) {
5219 status = NFS4ERR_INVAL;
5220 goto done;
5221 }
5222 }
5223
5224 /* Check stateid only if size has been set */
5225 if (sarg.vap->va_mask & AT_SIZE) {
5226 trunc = (sarg.vap->va_size == 0);
5227 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5228 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5229 if (status != NFS4_OK)
5230 goto done;
5231 } else {
5232 ct.cc_sysid = 0;
5233 ct.cc_pid = 0;
5234 ct.cc_caller_id = nfs4_srv_caller_id;
5235 ct.cc_flags = CC_DONTBLOCK;
5236 }
5237
5238 /* XXX start of possible race with delegations */
5239
5240 /*
5241 * We need to specially handle size changes because it is
5242 * possible for the client to create a file with read-only
5243 * modes, but with the file opened for writing. If the client
5244 * then tries to set the file size, e.g. ftruncate(3C),
5245 * fcntl(F_FREESP), the normal access checking done in
5246 * VOP_SETATTR would prevent the client from doing it even though
5247 * it should be allowed to do so. To get around this, we do the
5248 * access checking for ourselves and use VOP_SPACE which doesn't
5249 * do the access checking.
5250 * Also the client should not be allowed to change the file
5251 * size if there is a conflicting non-blocking mandatory lock in
5252 * the region of the change.
5253 */
5254 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5255 u_offset_t offset;
5256 ssize_t length;
5257
5258 /*
5259 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5260 * before returning, sarg.vap->va_mask is used to
5261 * generate the setattr reply bitmap. We also clear
5262 * AT_SIZE below before calling VOP_SPACE. For both
5263 * of these cases, the va_mask needs to be saved here
5264 * and restored after calling VOP_SETATTR.
5265 */
5266 saved_mask = sarg.vap->va_mask;
5267
5268 /*
5269 * Check any possible conflict due to NBMAND locks.
5270 * Get into critical region before VOP_GETATTR, so the
5271 * size attribute is valid when checking conflicts.
5272 */
5273 if (nbl_need_check(vp)) {
5274 nbl_start_crit(vp, RW_READER);
5275 in_crit = 1;
5276 }
5277
5278 bva.va_mask = AT_UID|AT_SIZE;
5279 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5280 status = puterrno4(error);
5281 goto done;
5282 }
5283
5284 if (in_crit) {
5285 if (sarg.vap->va_size < bva.va_size) {
5286 offset = sarg.vap->va_size;
5287 length = bva.va_size - sarg.vap->va_size;
5288 } else {
5289 offset = bva.va_size;
5290 length = sarg.vap->va_size - bva.va_size;
5291 }
5292 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5293 &ct)) {
5294 status = NFS4ERR_LOCKED;
5295 goto done;
5296 }
5297 }
5298
5299 if (crgetuid(cr) == bva.va_uid) {
5300 sarg.vap->va_mask &= ~AT_SIZE;
5301 bf.l_type = F_WRLCK;
5302 bf.l_whence = 0;
5303 bf.l_start = (off64_t)sarg.vap->va_size;
5304 bf.l_len = 0;
5305 bf.l_sysid = 0;
5306 bf.l_pid = 0;
5307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5308 (offset_t)sarg.vap->va_size, cr, &ct);
5309 }
5310 }
5311
5312 if (!error && sarg.vap->va_mask != 0)
5313 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5314
5315 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5316 if (saved_mask & AT_SIZE)
5317 sarg.vap->va_mask |= AT_SIZE;
5318
5319 /*
5320 * If an ACL was being set, it has been delayed until now,
5321 * in order to set the mode (via the VOP_SETATTR() above) first.
5322 */
5323 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5324 int i;
5325
5326 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5327 if (ntov.amap[i] == FATTR4_ACL)
5328 break;
5329 if (i < NFS4_MAXNUM_ATTRS) {
5330 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5331 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5332 if (error == 0) {
5333 *resp |= FATTR4_ACL_MASK;
5334 } else if (error == ENOTSUP) {
5335 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5336 status = NFS4ERR_ATTRNOTSUPP;
5337 goto done;
5338 }
5339 } else {
5340 NFS4_DEBUG(rfs4_debug,
5341 (CE_NOTE, "do_rfs4_op_setattr: "
5342 "unable to find ACL in fattr4"));
5343 error = EINVAL;
5344 }
5345 }
5346
5347 if (error) {
5348 /* check if a monitor detected a delegation conflict */
5349 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5350 status = NFS4ERR_DELAY;
5351 else
5352 status = puterrno4(error);
5353
5354 /*
5355 * Set the response bitmap when setattr failed.
5356 * If VOP_SETATTR partially succeeded, test by doing a
5357 * VOP_GETATTR on the object and comparing the data
5358 * to the setattr arguments.
5359 */
5360 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5361 } else {
5362 /*
5363 * Force modified metadata out to stable storage.
5364 */
5365 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5366 /*
5367 * Set response bitmap
5368 */
5369 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5370 }
5371
5372 /* Return early and already have a NFSv4 error */
5373 done:
5374 /*
5375 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5376 * conversion sets both readable and writeable NFS4 attrs
5377 * for AT_MTIME and AT_ATIME. The line below masks out
5378 * unrequested attrs from the setattr result bitmap. This
5379 * is placed after the done: label to catch the ATTRNOTSUP
5380 * case.
5381 */
5382 *resp &= fattrp->attrmask;
5383
5384 if (in_crit)
5385 nbl_end_crit(vp);
5386
5387 nfs4_ntov_table_free(&ntov, &sarg);
5388
5389 return (status);
5390 }
5391
5392 /* ARGSUSED */
5393 static void
5394 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5395 struct compound_state *cs)
5396 {
5397 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5398 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5399 bslabel_t *clabel;
5400
5401 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5402 SETATTR4args *, args);
5403
5404 if (cs->vp == NULL) {
5405 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5406 goto out;
5407 }
5408
5409 /*
5410 * If there is an unshared filesystem mounted on this vnode,
5411 * do not allow to setattr on this vnode.
5412 */
5413 if (vn_ismntpt(cs->vp)) {
5414 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5415 goto out;
5416 }
5417
5418 resp->attrsset = 0;
5419
5420 if (rdonly4(req, cs)) {
5421 *cs->statusp = resp->status = NFS4ERR_ROFS;
5422 goto out;
5423 }
5424
5425 /* check label before setting attributes */
5426 if (is_system_labeled()) {
5427 ASSERT(req->rq_label != NULL);
5428 clabel = req->rq_label;
5429 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5430 "got client label from request(1)",
5431 struct svc_req *, req);
5432 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5433 if (!do_rfs_label_check(clabel, cs->vp,
5434 EQUALITY_CHECK, cs->exi)) {
5435 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5436 goto out;
5437 }
5438 }
5439 }
5440
5441 *cs->statusp = resp->status =
5442 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5443 &args->stateid);
5444
5445 out:
5446 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5447 SETATTR4res *, resp);
5448 }
5449
5450 /* ARGSUSED */
5451 static void
5452 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5453 struct compound_state *cs)
5454 {
5455 /*
5456 * verify and nverify are exactly the same, except that nverify
5457 * succeeds when some argument changed, and verify succeeds when
5458 * when none changed.
5459 */
5460
5461 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5462 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5463
5464 int error;
5465 struct nfs4_svgetit_arg sarg;
5466 struct statvfs64 sb;
5467 struct nfs4_ntov_table ntov;
5468
5469 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5470 VERIFY4args *, args);
5471
5472 if (cs->vp == NULL) {
5473 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5474 goto out;
5475 }
5476
5477 sarg.sbp = &sb;
5478 sarg.is_referral = B_FALSE;
5479 nfs4_ntov_table_init(&ntov);
5480 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5481 &sarg, &ntov, NFS4ATTR_VERIT);
5482 if (resp->status != NFS4_OK) {
5483 /*
5484 * do_rfs4_set_attrs will try to verify systemwide attrs,
5485 * so could return -1 for "no match".
5486 */
5487 if (resp->status == -1)
5488 resp->status = NFS4ERR_NOT_SAME;
5489 goto done;
5490 }
5491 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5492 switch (error) {
5493 case 0:
5494 resp->status = NFS4_OK;
5495 break;
5496 case -1:
5497 resp->status = NFS4ERR_NOT_SAME;
5498 break;
5499 default:
5500 resp->status = puterrno4(error);
5501 break;
5502 }
5503 done:
5504 *cs->statusp = resp->status;
5505 nfs4_ntov_table_free(&ntov, &sarg);
5506 out:
5507 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5508 VERIFY4res *, resp);
5509 }
5510
5511 /* ARGSUSED */
5512 static void
5513 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5514 struct compound_state *cs)
5515 {
5516 /*
5517 * verify and nverify are exactly the same, except that nverify
5518 * succeeds when some argument changed, and verify succeeds when
5519 * when none changed.
5520 */
5521
5522 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5523 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5524
5525 int error;
5526 struct nfs4_svgetit_arg sarg;
5527 struct statvfs64 sb;
5528 struct nfs4_ntov_table ntov;
5529
5530 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5531 NVERIFY4args *, args);
5532
5533 if (cs->vp == NULL) {
5534 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5535 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5536 NVERIFY4res *, resp);
5537 return;
5538 }
5539 sarg.sbp = &sb;
5540 sarg.is_referral = B_FALSE;
5541 nfs4_ntov_table_init(&ntov);
5542 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5543 &sarg, &ntov, NFS4ATTR_VERIT);
5544 if (resp->status != NFS4_OK) {
5545 /*
5546 * do_rfs4_set_attrs will try to verify systemwide attrs,
5547 * so could return -1 for "no match".
5548 */
5549 if (resp->status == -1)
5550 resp->status = NFS4_OK;
5551 goto done;
5552 }
5553 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5554 switch (error) {
5555 case 0:
5556 resp->status = NFS4ERR_SAME;
5557 break;
5558 case -1:
5559 resp->status = NFS4_OK;
5560 break;
5561 default:
5562 resp->status = puterrno4(error);
5563 break;
5564 }
5565 done:
5566 *cs->statusp = resp->status;
5567 nfs4_ntov_table_free(&ntov, &sarg);
5568
5569 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5570 NVERIFY4res *, resp);
5571 }
5572
5573 /*
5574 * XXX - This should live in an NFS header file.
5575 */
5576 #define MAX_IOVECS 12
5577
5578 /* ARGSUSED */
5579 static void
5580 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5581 struct compound_state *cs)
5582 {
5583 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5584 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5585 int error;
5586 vnode_t *vp;
5587 struct vattr bva;
5588 u_offset_t rlimit;
5589 struct uio uio;
5590 struct iovec iov[MAX_IOVECS];
5591 struct iovec *iovp;
5592 int iovcnt;
5593 int ioflag;
5594 cred_t *savecred, *cr;
5595 bool_t *deleg = &cs->deleg;
5596 nfsstat4 stat;
5597 int in_crit = 0;
5598 caller_context_t ct;
5599 nfs4_srv_t *nsrv4;
5600
5601 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5602 WRITE4args *, args);
5603
5604 vp = cs->vp;
5605 if (vp == NULL) {
5606 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5607 goto out;
5608 }
5609 if (cs->access == CS_ACCESS_DENIED) {
5610 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5611 goto out;
5612 }
5613
5614 cr = cs->cr;
5615
5616 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5617 deleg, TRUE, &ct)) != NFS4_OK) {
5618 *cs->statusp = resp->status = stat;
5619 goto out;
5620 }
5621
5622 /*
5623 * We have to enter the critical region before calling VOP_RWLOCK
5624 * to avoid a deadlock with ufs.
5625 */
5626 if (nbl_need_check(vp)) {
5627 nbl_start_crit(vp, RW_READER);
5628 in_crit = 1;
5629 if (nbl_conflict(vp, NBL_WRITE,
5630 args->offset, args->data_len, 0, &ct)) {
5631 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5632 goto out;
5633 }
5634 }
5635
5636 bva.va_mask = AT_MODE | AT_UID;
5637 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5638
5639 /*
5640 * If we can't get the attributes, then we can't do the
5641 * right access checking. So, we'll fail the request.
5642 */
5643 if (error) {
5644 *cs->statusp = resp->status = puterrno4(error);
5645 goto out;
5646 }
5647
5648 if (rdonly4(req, cs)) {
5649 *cs->statusp = resp->status = NFS4ERR_ROFS;
5650 goto out;
5651 }
5652
5653 if (vp->v_type != VREG) {
5654 *cs->statusp = resp->status =
5655 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5656 goto out;
5657 }
5658
5659 if (crgetuid(cr) != bva.va_uid &&
5660 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5661 *cs->statusp = resp->status = puterrno4(error);
5662 goto out;
5663 }
5664
5665 if (MANDLOCK(vp, bva.va_mode)) {
5666 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5667 goto out;
5668 }
5669
5670 nsrv4 = nfs4_get_srv();
5671 if (args->data_len == 0) {
5672 *cs->statusp = resp->status = NFS4_OK;
5673 resp->count = 0;
5674 resp->committed = args->stable;
5675 resp->writeverf = nsrv4->write4verf;
5676 goto out;
5677 }
5678
5679 if (args->mblk != NULL) {
5680 mblk_t *m;
5681 uint_t bytes, round_len;
5682
5683 iovcnt = 0;
5684 bytes = 0;
5685 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5686 for (m = args->mblk;
5687 m != NULL && bytes < round_len;
5688 m = m->b_cont) {
5689 iovcnt++;
5690 bytes += MBLKL(m);
5691 }
5692 #ifdef DEBUG
5693 /* should have ended on an mblk boundary */
5694 if (bytes != round_len) {
5695 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5696 bytes, round_len, args->data_len);
5697 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5698 (void *)args->mblk, (void *)m);
5699 ASSERT(bytes == round_len);
5700 }
5701 #endif
5702 if (iovcnt <= MAX_IOVECS) {
5703 iovp = iov;
5704 } else {
5705 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5706 }
5707 mblk_to_iov(args->mblk, iovcnt, iovp);
5708 } else if (args->rlist != NULL) {
5709 iovcnt = 1;
5710 iovp = iov;
5711 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5712 iovp->iov_len = args->data_len;
5713 } else {
5714 iovcnt = 1;
5715 iovp = iov;
5716 iovp->iov_base = args->data_val;
5717 iovp->iov_len = args->data_len;
5718 }
5719
5720 uio.uio_iov = iovp;
5721 uio.uio_iovcnt = iovcnt;
5722
5723 uio.uio_segflg = UIO_SYSSPACE;
5724 uio.uio_extflg = UIO_COPY_DEFAULT;
5725 uio.uio_loffset = args->offset;
5726 uio.uio_resid = args->data_len;
5727 uio.uio_llimit = curproc->p_fsz_ctl;
5728 rlimit = uio.uio_llimit - args->offset;
5729 if (rlimit < (u_offset_t)uio.uio_resid)
5730 uio.uio_resid = (int)rlimit;
5731
5732 if (args->stable == UNSTABLE4)
5733 ioflag = 0;
5734 else if (args->stable == FILE_SYNC4)
5735 ioflag = FSYNC;
5736 else if (args->stable == DATA_SYNC4)
5737 ioflag = FDSYNC;
5738 else {
5739 if (iovp != iov)
5740 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5741 *cs->statusp = resp->status = NFS4ERR_INVAL;
5742 goto out;
5743 }
5744
5745 /*
5746 * We're changing creds because VM may fault and we need
5747 * the cred of the current thread to be used if quota
5748 * checking is enabled.
5749 */
5750 savecred = curthread->t_cred;
5751 curthread->t_cred = cr;
5752 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5753 curthread->t_cred = savecred;
5754
5755 if (iovp != iov)
5756 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5757
5758 if (error) {
5759 *cs->statusp = resp->status = puterrno4(error);
5760 goto out;
5761 }
5762
5763 *cs->statusp = resp->status = NFS4_OK;
5764 resp->count = args->data_len - uio.uio_resid;
5765
5766 if (ioflag == 0)
5767 resp->committed = UNSTABLE4;
5768 else
5769 resp->committed = FILE_SYNC4;
5770
5771 resp->writeverf = nsrv4->write4verf;
5772
5773 out:
5774 if (in_crit)
5775 nbl_end_crit(vp);
5776
5777 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5778 WRITE4res *, resp);
5779 }
5780
5781
5782 /* XXX put in a header file */
5783 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5784
5785 void
5786 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5787 struct svc_req *req, cred_t *cr, int *rv)
5788 {
5789 uint_t i;
5790 struct compound_state cs;
5791 nfs4_srv_t *nsrv4;
5792 nfs_export_t *ne = nfs_get_export();
5793
5794 if (rv != NULL)
5795 *rv = 0;
5796 rfs4_init_compound_state(&cs);
5797 /*
5798 * Form a reply tag by copying over the reqeuest tag.
5799 */
5800 resp->tag.utf8string_val =
5801 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5802 resp->tag.utf8string_len = args->tag.utf8string_len;
5803 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5804 resp->tag.utf8string_len);
5805
5806 cs.statusp = &resp->status;
5807 cs.req = req;
5808 resp->array = NULL;
5809 resp->array_len = 0;
5810
5811 /*
5812 * XXX for now, minorversion should be zero
5813 */
5814 if (args->minorversion != NFS4_MINORVERSION) {
5815 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5816 &cs, COMPOUND4args *, args);
5817 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5818 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5819 &cs, COMPOUND4res *, resp);
5820 return;
5821 }
5822
5823 if (args->array_len == 0) {
5824 resp->status = NFS4_OK;
5825 return;
5826 }
5827
5828 ASSERT(exi == NULL);
5829 ASSERT(cr == NULL);
5830
5831 cr = crget();
5832 ASSERT(cr != NULL);
5833
5834 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5835 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5836 &cs, COMPOUND4args *, args);
5837 crfree(cr);
5838 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5839 &cs, COMPOUND4res *, resp);
5840 svcerr_badcred(req->rq_xprt);
5841 if (rv != NULL)
5842 *rv = 1;
5843 return;
5844 }
5845 resp->array_len = args->array_len;
5846 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5847 KM_SLEEP);
5848
5849 cs.basecr = cr;
5850 nsrv4 = nfs4_get_srv();
5851
5852 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5853 COMPOUND4args *, args);
5854
5855 /*
5856 * For now, NFS4 compound processing must be protected by
5857 * exported_lock because it can access more than one exportinfo
5858 * per compound and share/unshare can now change multiple
5859 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5860 * per proc (excluding public exinfo), and exi_count design
5861 * is sufficient to protect concurrent execution of NFS2/3
5862 * ops along with unexport. This lock will be removed as
5863 * part of the NFSv4 phase 2 namespace redesign work.
5864 */
5865 rw_enter(&ne->exported_lock, RW_READER);
5866
5867 /*
5868 * If this is the first compound we've seen, we need to start all
5869 * new instances' grace periods.
5870 */
5871 if (nsrv4->seen_first_compound == 0) {
5872 rfs4_grace_start_new(nsrv4);
5873 /*
5874 * This must be set after rfs4_grace_start_new(), otherwise
5875 * another thread could proceed past here before the former
5876 * is finished.
5877 */
5878 nsrv4->seen_first_compound = 1;
5879 }
5880
5881 for (i = 0; i < args->array_len && cs.cont; i++) {
5882 nfs_argop4 *argop;
5883 nfs_resop4 *resop;
5884 uint_t op;
5885
5886 argop = &args->array[i];
5887 resop = &resp->array[i];
5888 resop->resop = argop->argop;
5889 op = (uint_t)resop->resop;
5890
5891 if (op < rfsv4disp_cnt) {
5892 /*
5893 * Count the individual ops here; NULL and COMPOUND
5894 * are counted in common_dispatch()
5895 */
5896 rfsproccnt_v4_ptr[op].value.ui64++;
5897
5898 NFS4_DEBUG(rfs4_debug > 1,
5899 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5900 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5901 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5902 rfs4_op_string[op], *cs.statusp));
5903 if (*cs.statusp != NFS4_OK)
5904 cs.cont = FALSE;
5905 } else {
5906 /*
5907 * This is effectively dead code since XDR code
5908 * will have already returned BADXDR if op doesn't
5909 * decode to legal value. This only done for a
5910 * day when XDR code doesn't verify v4 opcodes.
5911 */
5912 op = OP_ILLEGAL;
5913 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5914
5915 rfs4_op_illegal(argop, resop, req, &cs);
5916 cs.cont = FALSE;
5917 }
5918
5919 /*
5920 * If not at last op, and if we are to stop, then
5921 * compact the results array.
5922 */
5923 if ((i + 1) < args->array_len && !cs.cont) {
5924 nfs_resop4 *new_res = kmem_alloc(
5925 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5926 bcopy(resp->array,
5927 new_res, (i+1) * sizeof (nfs_resop4));
5928 kmem_free(resp->array,
5929 args->array_len * sizeof (nfs_resop4));
5930
5931 resp->array_len = i + 1;
5932 resp->array = new_res;
5933 }
5934 }
5935
5936 rw_exit(&ne->exported_lock);
5937
5938 /*
5939 * clear exportinfo and vnode fields from compound_state before dtrace
5940 * probe, to avoid tracing residual values for path and share path.
5941 */
5942 if (cs.vp)
5943 VN_RELE(cs.vp);
5944 if (cs.saved_vp)
5945 VN_RELE(cs.saved_vp);
5946 cs.exi = cs.saved_exi = NULL;
5947 cs.vp = cs.saved_vp = NULL;
5948
5949 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5950 COMPOUND4res *, resp);
5951
5952 if (cs.saved_fh.nfs_fh4_val)
5953 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5954
5955 if (cs.basecr)
5956 crfree(cs.basecr);
5957 if (cs.cr)
5958 crfree(cs.cr);
5959 /*
5960 * done with this compound request, free the label
5961 */
5962
5963 if (req->rq_label != NULL) {
5964 kmem_free(req->rq_label, sizeof (bslabel_t));
5965 req->rq_label = NULL;
5966 }
5967 }
5968
5969 /*
5970 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5971 * XXX zero out the tag and array values. Need to investigate why the
5972 * XXX calls occur, but at least prevent the panic for now.
5973 */
5974 void
5975 rfs4_compound_free(COMPOUND4res *resp)
5976 {
5977 uint_t i;
5978
5979 if (resp->tag.utf8string_val) {
5980 UTF8STRING_FREE(resp->tag)
5981 }
5982
5983 for (i = 0; i < resp->array_len; i++) {
5984 nfs_resop4 *resop;
5985 uint_t op;
5986
5987 resop = &resp->array[i];
5988 op = (uint_t)resop->resop;
5989 if (op < rfsv4disp_cnt) {
5990 (*rfsv4disptab[op].dis_resfree)(resop);
5991 }
5992 }
5993 if (resp->array != NULL) {
5994 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5995 }
5996 }
5997
5998 /*
5999 * Process the value of the compound request rpc flags, as a bit-AND
6000 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6001 */
6002 void
6003 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6004 {
6005 int i;
6006 int flag = RPC_ALL;
6007
6008 for (i = 0; flag && i < args->array_len; i++) {
6009 uint_t op;
6010
6011 op = (uint_t)args->array[i].argop;
6012
6013 if (op < rfsv4disp_cnt)
6014 flag &= rfsv4disptab[op].dis_flags;
6015 else
6016 flag = 0;
6017 }
6018 *flagp = flag;
6019 }
6020
6021 nfsstat4
6022 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6023 {
6024 nfsstat4 e;
6025
6026 rfs4_dbe_lock(cp->rc_dbe);
6027
6028 if (cp->rc_sysidt != LM_NOSYSID) {
6029 *sp = cp->rc_sysidt;
6030 e = NFS4_OK;
6031
6032 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6033 *sp = cp->rc_sysidt;
6034 e = NFS4_OK;
6035
6036 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6037 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6038 } else
6039 e = NFS4ERR_DELAY;
6040
6041 rfs4_dbe_unlock(cp->rc_dbe);
6042 return (e);
6043 }
6044
6045 #if defined(DEBUG) && ! defined(lint)
6046 static void lock_print(char *str, int operation, struct flock64 *flk)
6047 {
6048 char *op, *type;
6049
6050 switch (operation) {
6051 case F_GETLK: op = "F_GETLK";
6052 break;
6053 case F_SETLK: op = "F_SETLK";
6054 break;
6055 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6056 break;
6057 default: op = "F_UNKNOWN";
6058 break;
6059 }
6060 switch (flk->l_type) {
6061 case F_UNLCK: type = "F_UNLCK";
6062 break;
6063 case F_RDLCK: type = "F_RDLCK";
6064 break;
6065 case F_WRLCK: type = "F_WRLCK";
6066 break;
6067 default: type = "F_UNKNOWN";
6068 break;
6069 }
6070
6071 ASSERT(flk->l_whence == 0);
6072 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6073 str, op, type, (longlong_t)flk->l_start,
6074 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6075 }
6076
6077 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6078 #else
6079 #define LOCK_PRINT(d, s, t, f)
6080 #endif
6081
6082 /*ARGSUSED*/
6083 static bool_t
6084 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6085 {
6086 return (TRUE);
6087 }
6088
6089 /*
6090 * Look up the pathname using the vp in cs as the directory vnode.
6091 * cs->vp will be the vnode for the file on success
6092 */
6093
6094 static nfsstat4
6095 rfs4_lookup(component4 *component, struct svc_req *req,
6096 struct compound_state *cs)
6097 {
6098 char *nm;
6099 uint32_t len;
6100 nfsstat4 status;
6101 struct sockaddr *ca;
6102 char *name;
6103
6104 if (cs->vp == NULL) {
6105 return (NFS4ERR_NOFILEHANDLE);
6106 }
6107 if (cs->vp->v_type != VDIR) {
6108 return (NFS4ERR_NOTDIR);
6109 }
6110
6111 status = utf8_dir_verify(component);
6112 if (status != NFS4_OK)
6113 return (status);
6114
6115 nm = utf8_to_fn(component, &len, NULL);
6116 if (nm == NULL) {
6117 return (NFS4ERR_INVAL);
6118 }
6119
6120 if (len > MAXNAMELEN) {
6121 kmem_free(nm, len);
6122 return (NFS4ERR_NAMETOOLONG);
6123 }
6124
6125 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6126 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6127 MAXPATHLEN + 1);
6128
6129 if (name == NULL) {
6130 kmem_free(nm, len);
6131 return (NFS4ERR_INVAL);
6132 }
6133
6134 status = do_rfs4_op_lookup(name, req, cs);
6135
6136 if (name != nm)
6137 kmem_free(name, MAXPATHLEN + 1);
6138
6139 kmem_free(nm, len);
6140
6141 return (status);
6142 }
6143
6144 static nfsstat4
6145 rfs4_lookupfile(component4 *component, struct svc_req *req,
6146 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6147 {
6148 nfsstat4 status;
6149 vnode_t *dvp = cs->vp;
6150 vattr_t bva, ava, fva;
6151 int error;
6152
6153 /* Get "before" change value */
6154 bva.va_mask = AT_CTIME|AT_SEQ;
6155 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6156 if (error)
6157 return (puterrno4(error));
6158
6159 /* rfs4_lookup may VN_RELE directory */
6160 VN_HOLD(dvp);
6161
6162 status = rfs4_lookup(component, req, cs);
6163 if (status != NFS4_OK) {
6164 VN_RELE(dvp);
6165 return (status);
6166 }
6167
6168 /*
6169 * Get "after" change value, if it fails, simply return the
6170 * before value.
6171 */
6172 ava.va_mask = AT_CTIME|AT_SEQ;
6173 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6174 ava.va_ctime = bva.va_ctime;
6175 ava.va_seq = 0;
6176 }
6177 VN_RELE(dvp);
6178
6179 /*
6180 * Validate the file is a file
6181 */
6182 fva.va_mask = AT_TYPE|AT_MODE;
6183 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6184 if (error)
6185 return (puterrno4(error));
6186
6187 if (fva.va_type != VREG) {
6188 if (fva.va_type == VDIR)
6189 return (NFS4ERR_ISDIR);
6190 if (fva.va_type == VLNK)
6191 return (NFS4ERR_SYMLINK);
6192 return (NFS4ERR_INVAL);
6193 }
6194
6195 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6196 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6197
6198 /*
6199 * It is undefined if VOP_LOOKUP will change va_seq, so
6200 * cinfo.atomic = TRUE only if we have
6201 * non-zero va_seq's, and they have not changed.
6202 */
6203 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6204 cinfo->atomic = TRUE;
6205 else
6206 cinfo->atomic = FALSE;
6207
6208 /* Check for mandatory locking */
6209 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6210 return (check_open_access(access, cs, req));
6211 }
6212
6213 static nfsstat4
6214 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6215 cred_t *cr, vnode_t **vpp, bool_t *created)
6216 {
6217 int error;
6218 nfsstat4 status = NFS4_OK;
6219 vattr_t va;
6220
6221 tryagain:
6222
6223 /*
6224 * The file open mode used is VWRITE. If the client needs
6225 * some other semantic, then it should do the access checking
6226 * itself. It would have been nice to have the file open mode
6227 * passed as part of the arguments.
6228 */
6229
6230 *created = TRUE;
6231 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6232
6233 if (error) {
6234 *created = FALSE;
6235
6236 /*
6237 * If we got something other than file already exists
6238 * then just return this error. Otherwise, we got
6239 * EEXIST. If we were doing a GUARDED create, then
6240 * just return this error. Otherwise, we need to
6241 * make sure that this wasn't a duplicate of an
6242 * exclusive create request.
6243 *
6244 * The assumption is made that a non-exclusive create
6245 * request will never return EEXIST.
6246 */
6247
6248 if (error != EEXIST || mode == GUARDED4) {
6249 status = puterrno4(error);
6250 return (status);
6251 }
6252 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6253 NULL, NULL, NULL);
6254
6255 if (error) {
6256 /*
6257 * We couldn't find the file that we thought that
6258 * we just created. So, we'll just try creating
6259 * it again.
6260 */
6261 if (error == ENOENT)
6262 goto tryagain;
6263
6264 status = puterrno4(error);
6265 return (status);
6266 }
6267
6268 if (mode == UNCHECKED4) {
6269 /* existing object must be regular file */
6270 if ((*vpp)->v_type != VREG) {
6271 if ((*vpp)->v_type == VDIR)
6272 status = NFS4ERR_ISDIR;
6273 else if ((*vpp)->v_type == VLNK)
6274 status = NFS4ERR_SYMLINK;
6275 else
6276 status = NFS4ERR_INVAL;
6277 VN_RELE(*vpp);
6278 return (status);
6279 }
6280
6281 return (NFS4_OK);
6282 }
6283
6284 /* Check for duplicate request */
6285 va.va_mask = AT_MTIME;
6286 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6287 if (!error) {
6288 /* We found the file */
6289 const timestruc_t *mtime = &vap->va_mtime;
6290
6291 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6292 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6293 /* but its not our creation */
6294 VN_RELE(*vpp);
6295 return (NFS4ERR_EXIST);
6296 }
6297 *created = TRUE; /* retrans of create == created */
6298 return (NFS4_OK);
6299 }
6300 VN_RELE(*vpp);
6301 return (NFS4ERR_EXIST);
6302 }
6303
6304 return (NFS4_OK);
6305 }
6306
6307 static nfsstat4
6308 check_open_access(uint32_t access, struct compound_state *cs,
6309 struct svc_req *req)
6310 {
6311 int error;
6312 vnode_t *vp;
6313 bool_t readonly;
6314 cred_t *cr = cs->cr;
6315
6316 /* For now we don't allow mandatory locking as per V2/V3 */
6317 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6318 return (NFS4ERR_ACCESS);
6319 }
6320
6321 vp = cs->vp;
6322 ASSERT(cr != NULL && vp->v_type == VREG);
6323
6324 /*
6325 * If the file system is exported read only and we are trying
6326 * to open for write, then return NFS4ERR_ROFS
6327 */
6328
6329 readonly = rdonly4(req, cs);
6330
6331 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6332 return (NFS4ERR_ROFS);
6333
6334 if (access & OPEN4_SHARE_ACCESS_READ) {
6335 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6336 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6337 return (NFS4ERR_ACCESS);
6338 }
6339 }
6340
6341 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6342 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6343 if (error)
6344 return (NFS4ERR_ACCESS);
6345 }
6346
6347 return (NFS4_OK);
6348 }
6349
6350 static nfsstat4
6351 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6352 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6353 {
6354 struct nfs4_svgetit_arg sarg;
6355 struct nfs4_ntov_table ntov;
6356
6357 bool_t ntov_table_init = FALSE;
6358 struct statvfs64 sb;
6359 nfsstat4 status;
6360 vnode_t *vp;
6361 vattr_t bva, ava, iva, cva, *vap;
6362 vnode_t *dvp;
6363 timespec32_t *mtime;
6364 char *nm = NULL;
6365 uint_t buflen;
6366 bool_t created;
6367 bool_t setsize = FALSE;
6368 len_t reqsize;
6369 int error;
6370 bool_t trunc;
6371 caller_context_t ct;
6372 component4 *component;
6373 bslabel_t *clabel;
6374 struct sockaddr *ca;
6375 char *name = NULL;
6376
6377 sarg.sbp = &sb;
6378 sarg.is_referral = B_FALSE;
6379
6380 dvp = cs->vp;
6381
6382 /* Check if the file system is read only */
6383 if (rdonly4(req, cs))
6384 return (NFS4ERR_ROFS);
6385
6386 /* check the label of including directory */
6387 if (is_system_labeled()) {
6388 ASSERT(req->rq_label != NULL);
6389 clabel = req->rq_label;
6390 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6391 "got client label from request(1)",
6392 struct svc_req *, req);
6393 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6394 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6395 cs->exi)) {
6396 return (NFS4ERR_ACCESS);
6397 }
6398 }
6399 }
6400
6401 /*
6402 * Get the last component of path name in nm. cs will reference
6403 * the including directory on success.
6404 */
6405 component = &args->open_claim4_u.file;
6406 status = utf8_dir_verify(component);
6407 if (status != NFS4_OK)
6408 return (status);
6409
6410 nm = utf8_to_fn(component, &buflen, NULL);
6411
6412 if (nm == NULL)
6413 return (NFS4ERR_RESOURCE);
6414
6415 if (buflen > MAXNAMELEN) {
6416 kmem_free(nm, buflen);
6417 return (NFS4ERR_NAMETOOLONG);
6418 }
6419
6420 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6421 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6422 if (error) {
6423 kmem_free(nm, buflen);
6424 return (puterrno4(error));
6425 }
6426
6427 if (bva.va_type != VDIR) {
6428 kmem_free(nm, buflen);
6429 return (NFS4ERR_NOTDIR);
6430 }
6431
6432 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6433
6434 switch (args->mode) {
6435 case GUARDED4:
6436 /*FALLTHROUGH*/
6437 case UNCHECKED4:
6438 nfs4_ntov_table_init(&ntov);
6439 ntov_table_init = TRUE;
6440
6441 *attrset = 0;
6442 status = do_rfs4_set_attrs(attrset,
6443 &args->createhow4_u.createattrs,
6444 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6445
6446 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6447 sarg.vap->va_type != VREG) {
6448 if (sarg.vap->va_type == VDIR)
6449 status = NFS4ERR_ISDIR;
6450 else if (sarg.vap->va_type == VLNK)
6451 status = NFS4ERR_SYMLINK;
6452 else
6453 status = NFS4ERR_INVAL;
6454 }
6455
6456 if (status != NFS4_OK) {
6457 kmem_free(nm, buflen);
6458 nfs4_ntov_table_free(&ntov, &sarg);
6459 *attrset = 0;
6460 return (status);
6461 }
6462
6463 vap = sarg.vap;
6464 vap->va_type = VREG;
6465 vap->va_mask |= AT_TYPE;
6466
6467 if ((vap->va_mask & AT_MODE) == 0) {
6468 vap->va_mask |= AT_MODE;
6469 vap->va_mode = (mode_t)0600;
6470 }
6471
6472 if (vap->va_mask & AT_SIZE) {
6473
6474 /* Disallow create with a non-zero size */
6475
6476 if ((reqsize = sarg.vap->va_size) != 0) {
6477 kmem_free(nm, buflen);
6478 nfs4_ntov_table_free(&ntov, &sarg);
6479 *attrset = 0;
6480 return (NFS4ERR_INVAL);
6481 }
6482 setsize = TRUE;
6483 }
6484 break;
6485
6486 case EXCLUSIVE4:
6487 /* prohibit EXCL create of named attributes */
6488 if (dvp->v_flag & V_XATTRDIR) {
6489 kmem_free(nm, buflen);
6490 *attrset = 0;
6491 return (NFS4ERR_INVAL);
6492 }
6493
6494 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6495 cva.va_type = VREG;
6496 /*
6497 * Ensure no time overflows. Assumes underlying
6498 * filesystem supports at least 32 bits.
6499 * Truncate nsec to usec resolution to allow valid
6500 * compares even if the underlying filesystem truncates.
6501 */
6502 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6503 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6504 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6505 cva.va_mode = (mode_t)0;
6506 vap = &cva;
6507
6508 /*
6509 * For EXCL create, attrset is set to the server attr
6510 * used to cache the client's verifier.
6511 */
6512 *attrset = FATTR4_TIME_MODIFY_MASK;
6513 break;
6514 }
6515
6516 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6517 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6518 MAXPATHLEN + 1);
6519
6520 if (name == NULL) {
6521 kmem_free(nm, buflen);
6522 return (NFS4ERR_SERVERFAULT);
6523 }
6524
6525 status = create_vnode(dvp, name, vap, args->mode,
6526 cs->cr, &vp, &created);
6527 if (nm != name)
6528 kmem_free(name, MAXPATHLEN + 1);
6529 kmem_free(nm, buflen);
6530
6531 if (status != NFS4_OK) {
6532 if (ntov_table_init)
6533 nfs4_ntov_table_free(&ntov, &sarg);
6534 *attrset = 0;
6535 return (status);
6536 }
6537
6538 trunc = (setsize && !created);
6539
6540 if (args->mode != EXCLUSIVE4) {
6541 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6542
6543 /*
6544 * True verification that object was created with correct
6545 * attrs is impossible. The attrs could have been changed
6546 * immediately after object creation. If attributes did
6547 * not verify, the only recourse for the server is to
6548 * destroy the object. Maybe if some attrs (like gid)
6549 * are set incorrectly, the object should be destroyed;
6550 * however, seems bad as a default policy. Do we really
6551 * want to destroy an object over one of the times not
6552 * verifying correctly? For these reasons, the server
6553 * currently sets bits in attrset for createattrs
6554 * that were set; however, no verification is done.
6555 *
6556 * vmask_to_nmask accounts for vattr bits set on create
6557 * [do_rfs4_set_attrs() only sets resp bits for
6558 * non-vattr/vfs bits.]
6559 * Mask off any bits we set by default so as not to return
6560 * more attrset bits than were requested in createattrs
6561 */
6562 if (created) {
6563 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6564 *attrset &= createmask;
6565 } else {
6566 /*
6567 * We did not create the vnode (we tried but it
6568 * already existed). In this case, the only createattr
6569 * that the spec allows the server to set is size,
6570 * and even then, it can only be set if it is 0.
6571 */
6572 *attrset = 0;
6573 if (trunc)
6574 *attrset = FATTR4_SIZE_MASK;
6575 }
6576 }
6577 if (ntov_table_init)
6578 nfs4_ntov_table_free(&ntov, &sarg);
6579
6580 /*
6581 * Get the initial "after" sequence number, if it fails,
6582 * set to zero, time to before.
6583 */
6584 iva.va_mask = AT_CTIME|AT_SEQ;
6585 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6586 iva.va_seq = 0;
6587 iva.va_ctime = bva.va_ctime;
6588 }
6589
6590 /*
6591 * create_vnode attempts to create the file exclusive,
6592 * if it already exists the VOP_CREATE will fail and
6593 * may not increase va_seq. It is atomic if
6594 * we haven't changed the directory, but if it has changed
6595 * we don't know what changed it.
6596 */
6597 if (!created) {
6598 if (bva.va_seq && iva.va_seq &&
6599 bva.va_seq == iva.va_seq)
6600 cinfo->atomic = TRUE;
6601 else
6602 cinfo->atomic = FALSE;
6603 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6604 } else {
6605 /*
6606 * The entry was created, we need to sync the
6607 * directory metadata.
6608 */
6609 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6610
6611 /*
6612 * Get "after" change value, if it fails, simply return the
6613 * before value.
6614 */
6615 ava.va_mask = AT_CTIME|AT_SEQ;
6616 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6617 ava.va_ctime = bva.va_ctime;
6618 ava.va_seq = 0;
6619 }
6620
6621 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6622
6623 /*
6624 * The cinfo->atomic = TRUE only if we have
6625 * non-zero va_seq's, and it has incremented by exactly one
6626 * during the create_vnode and it didn't
6627 * change during the VOP_FSYNC.
6628 */
6629 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6630 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6631 cinfo->atomic = TRUE;
6632 else
6633 cinfo->atomic = FALSE;
6634 }
6635
6636 /* Check for mandatory locking and that the size gets set. */
6637 cva.va_mask = AT_MODE;
6638 if (setsize)
6639 cva.va_mask |= AT_SIZE;
6640
6641 /* Assume the worst */
6642 cs->mandlock = TRUE;
6643
6644 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6645 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6646
6647 /*
6648 * Truncate the file if necessary; this would be
6649 * the case for create over an existing file.
6650 */
6651
6652 if (trunc) {
6653 int in_crit = 0;
6654 rfs4_file_t *fp;
6655 nfs4_srv_t *nsrv4;
6656 bool_t create = FALSE;
6657
6658 /*
6659 * We are writing over an existing file.
6660 * Check to see if we need to recall a delegation.
6661 */
6662 nsrv4 = nfs4_get_srv();
6663 rfs4_hold_deleg_policy(nsrv4);
6664 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6665 if (rfs4_check_delegated_byfp(FWRITE, fp,
6666 (reqsize == 0), FALSE, FALSE, &clientid)) {
6667 rfs4_file_rele(fp);
6668 rfs4_rele_deleg_policy(nsrv4);
6669 VN_RELE(vp);
6670 *attrset = 0;
6671 return (NFS4ERR_DELAY);
6672 }
6673 rfs4_file_rele(fp);
6674 }
6675 rfs4_rele_deleg_policy(nsrv4);
6676
6677 if (nbl_need_check(vp)) {
6678 in_crit = 1;
6679
6680 ASSERT(reqsize == 0);
6681
6682 nbl_start_crit(vp, RW_READER);
6683 if (nbl_conflict(vp, NBL_WRITE, 0,
6684 cva.va_size, 0, NULL)) {
6685 in_crit = 0;
6686 nbl_end_crit(vp);
6687 VN_RELE(vp);
6688 *attrset = 0;
6689 return (NFS4ERR_ACCESS);
6690 }
6691 }
6692 ct.cc_sysid = 0;
6693 ct.cc_pid = 0;
6694 ct.cc_caller_id = nfs4_srv_caller_id;
6695 ct.cc_flags = CC_DONTBLOCK;
6696
6697 cva.va_mask = AT_SIZE;
6698 cva.va_size = reqsize;
6699 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6700 if (in_crit)
6701 nbl_end_crit(vp);
6702 }
6703 }
6704
6705 error = makefh4(&cs->fh, vp, cs->exi);
6706
6707 /*
6708 * Force modified data and metadata out to stable storage.
6709 */
6710 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6711
6712 if (error) {
6713 VN_RELE(vp);
6714 *attrset = 0;
6715 return (puterrno4(error));
6716 }
6717
6718 /* if parent dir is attrdir, set namedattr fh flag */
6719 if (dvp->v_flag & V_XATTRDIR)
6720 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6721
6722 if (cs->vp)
6723 VN_RELE(cs->vp);
6724
6725 cs->vp = vp;
6726
6727 /*
6728 * if we did not create the file, we will need to check
6729 * the access bits on the file
6730 */
6731
6732 if (!created) {
6733 if (setsize)
6734 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6735 status = check_open_access(args->share_access, cs, req);
6736 if (status != NFS4_OK)
6737 *attrset = 0;
6738 }
6739 return (status);
6740 }
6741
6742 /*ARGSUSED*/
6743 static void
6744 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6745 rfs4_openowner_t *oo, delegreq_t deleg,
6746 uint32_t access, uint32_t deny,
6747 OPEN4res *resp, int deleg_cur)
6748 {
6749 /* XXX Currently not using req */
6750 rfs4_state_t *sp;
6751 rfs4_file_t *fp;
6752 bool_t screate = TRUE;
6753 bool_t fcreate = TRUE;
6754 uint32_t open_a, share_a;
6755 uint32_t open_d, share_d;
6756 rfs4_deleg_state_t *dsp;
6757 sysid_t sysid;
6758 nfsstat4 status;
6759 caller_context_t ct;
6760 int fflags = 0;
6761 int recall = 0;
6762 int err;
6763 int first_open;
6764
6765 /* get the file struct and hold a lock on it during initial open */
6766 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6767 if (fp == NULL) {
6768 resp->status = NFS4ERR_RESOURCE;
6769 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6770 return;
6771 }
6772
6773 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6774 if (sp == NULL) {
6775 resp->status = NFS4ERR_RESOURCE;
6776 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6777 /* No need to keep any reference */
6778 rw_exit(&fp->rf_file_rwlock);
6779 rfs4_file_rele(fp);
6780 return;
6781 }
6782
6783 /* try to get the sysid before continuing */
6784 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6785 resp->status = status;
6786 rfs4_file_rele(fp);
6787 /* Not a fully formed open; "close" it */
6788 if (screate == TRUE)
6789 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6790 rfs4_state_rele(sp);
6791 return;
6792 }
6793
6794 /* Calculate the fflags for this OPEN. */
6795 if (access & OPEN4_SHARE_ACCESS_READ)
6796 fflags |= FREAD;
6797 if (access & OPEN4_SHARE_ACCESS_WRITE)
6798 fflags |= FWRITE;
6799
6800 rfs4_dbe_lock(sp->rs_dbe);
6801
6802 /*
6803 * Calculate the new deny and access mode that this open is adding to
6804 * the file for this open owner;
6805 */
6806 open_d = (deny & ~sp->rs_open_deny);
6807 open_a = (access & ~sp->rs_open_access);
6808
6809 /*
6810 * Calculate the new share access and share deny modes that this open
6811 * is adding to the file for this open owner;
6812 */
6813 share_a = (access & ~sp->rs_share_access);
6814 share_d = (deny & ~sp->rs_share_deny);
6815
6816 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6817
6818 /*
6819 * Check to see the client has already sent an open for this
6820 * open owner on this file with the same share/deny modes.
6821 * If so, we don't need to check for a conflict and we don't
6822 * need to add another shrlock. If not, then we need to
6823 * check for conflicts in deny and access before checking for
6824 * conflicts in delegation. We don't want to recall a
6825 * delegation based on an open that will eventually fail based
6826 * on shares modes.
6827 */
6828
6829 if (share_a || share_d) {
6830 if ((err = rfs4_share(sp, access, deny)) != 0) {
6831 rfs4_dbe_unlock(sp->rs_dbe);
6832 resp->status = err;
6833
6834 rfs4_file_rele(fp);
6835 /* Not a fully formed open; "close" it */
6836 if (screate == TRUE)
6837 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6838 rfs4_state_rele(sp);
6839 return;
6840 }
6841 }
6842
6843 rfs4_dbe_lock(fp->rf_dbe);
6844
6845 /*
6846 * Check to see if this file is delegated and if so, if a
6847 * recall needs to be done.
6848 */
6849 if (rfs4_check_recall(sp, access)) {
6850 rfs4_dbe_unlock(fp->rf_dbe);
6851 rfs4_dbe_unlock(sp->rs_dbe);
6852 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6853 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6854 rfs4_dbe_lock(sp->rs_dbe);
6855
6856 /* if state closed while lock was dropped */
6857 if (sp->rs_closed) {
6858 if (share_a || share_d)
6859 (void) rfs4_unshare(sp);
6860 rfs4_dbe_unlock(sp->rs_dbe);
6861 rfs4_file_rele(fp);
6862 /* Not a fully formed open; "close" it */
6863 if (screate == TRUE)
6864 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6865 rfs4_state_rele(sp);
6866 resp->status = NFS4ERR_OLD_STATEID;
6867 return;
6868 }
6869
6870 rfs4_dbe_lock(fp->rf_dbe);
6871 /* Let's see if the delegation was returned */
6872 if (rfs4_check_recall(sp, access)) {
6873 rfs4_dbe_unlock(fp->rf_dbe);
6874 if (share_a || share_d)
6875 (void) rfs4_unshare(sp);
6876 rfs4_dbe_unlock(sp->rs_dbe);
6877 rfs4_file_rele(fp);
6878 rfs4_update_lease(sp->rs_owner->ro_client);
6879
6880 /* Not a fully formed open; "close" it */
6881 if (screate == TRUE)
6882 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6883 rfs4_state_rele(sp);
6884 resp->status = NFS4ERR_DELAY;
6885 return;
6886 }
6887 }
6888 /*
6889 * the share check passed and any delegation conflict has been
6890 * taken care of, now call vop_open.
6891 * if this is the first open then call vop_open with fflags.
6892 * if not, call vn_open_upgrade with just the upgrade flags.
6893 *
6894 * if the file has been opened already, it will have the current
6895 * access mode in the state struct. if it has no share access, then
6896 * this is a new open.
6897 *
6898 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6899 * call VOP_OPEN(), just do the open upgrade.
6900 */
6901 if (first_open && !deleg_cur) {
6902 ct.cc_sysid = sysid;
6903 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6904 ct.cc_caller_id = nfs4_srv_caller_id;
6905 ct.cc_flags = CC_DONTBLOCK;
6906 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6907 if (err) {
6908 rfs4_dbe_unlock(fp->rf_dbe);
6909 if (share_a || share_d)
6910 (void) rfs4_unshare(sp);
6911 rfs4_dbe_unlock(sp->rs_dbe);
6912 rfs4_file_rele(fp);
6913
6914 /* Not a fully formed open; "close" it */
6915 if (screate == TRUE)
6916 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6917 rfs4_state_rele(sp);
6918 /* check if a monitor detected a delegation conflict */
6919 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6920 resp->status = NFS4ERR_DELAY;
6921 else
6922 resp->status = NFS4ERR_SERVERFAULT;
6923 return;
6924 }
6925 } else { /* open upgrade */
6926 /*
6927 * calculate the fflags for the new mode that is being added
6928 * by this upgrade.
6929 */
6930 fflags = 0;
6931 if (open_a & OPEN4_SHARE_ACCESS_READ)
6932 fflags |= FREAD;
6933 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6934 fflags |= FWRITE;
6935 vn_open_upgrade(cs->vp, fflags);
6936 }
6937 sp->rs_open_access |= access;
6938 sp->rs_open_deny |= deny;
6939
6940 if (open_d & OPEN4_SHARE_DENY_READ)
6941 fp->rf_deny_read++;
6942 if (open_d & OPEN4_SHARE_DENY_WRITE)
6943 fp->rf_deny_write++;
6944 fp->rf_share_deny |= deny;
6945
6946 if (open_a & OPEN4_SHARE_ACCESS_READ)
6947 fp->rf_access_read++;
6948 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6949 fp->rf_access_write++;
6950 fp->rf_share_access |= access;
6951
6952 /*
6953 * Check for delegation here. if the deleg argument is not
6954 * DELEG_ANY, then this is a reclaim from a client and
6955 * we must honor the delegation requested. If necessary we can
6956 * set the recall flag.
6957 */
6958
6959 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6960
6961 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6962
6963 next_stateid(&sp->rs_stateid);
6964
6965 resp->stateid = sp->rs_stateid.stateid;
6966
6967 rfs4_dbe_unlock(fp->rf_dbe);
6968 rfs4_dbe_unlock(sp->rs_dbe);
6969
6970 if (dsp) {
6971 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6972 rfs4_deleg_state_rele(dsp);
6973 }
6974
6975 rfs4_file_rele(fp);
6976 rfs4_state_rele(sp);
6977
6978 resp->status = NFS4_OK;
6979 }
6980
6981 /*ARGSUSED*/
6982 static void
6983 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6984 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6985 {
6986 change_info4 *cinfo = &resp->cinfo;
6987 bitmap4 *attrset = &resp->attrset;
6988
6989 if (args->opentype == OPEN4_NOCREATE)
6990 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6991 req, cs, args->share_access, cinfo);
6992 else {
6993 /* inhibit delegation grants during exclusive create */
6994
6995 if (args->mode == EXCLUSIVE4)
6996 rfs4_disable_delegation();
6997
6998 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6999 oo->ro_client->rc_clientid);
7000 }
7001
7002 if (resp->status == NFS4_OK) {
7003
7004 /* cs->vp cs->fh now reference the desired file */
7005
7006 rfs4_do_open(cs, req, oo,
7007 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7008 args->share_access, args->share_deny, resp, 0);
7009
7010 /*
7011 * If rfs4_createfile set attrset, we must
7012 * clear this attrset before the response is copied.
7013 */
7014 if (resp->status != NFS4_OK && resp->attrset) {
7015 resp->attrset = 0;
7016 }
7017 }
7018 else
7019 *cs->statusp = resp->status;
7020
7021 if (args->mode == EXCLUSIVE4)
7022 rfs4_enable_delegation();
7023 }
7024
7025 /*ARGSUSED*/
7026 static void
7027 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7028 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7029 {
7030 change_info4 *cinfo = &resp->cinfo;
7031 vattr_t va;
7032 vtype_t v_type = cs->vp->v_type;
7033 int error = 0;
7034
7035 /* Verify that we have a regular file */
7036 if (v_type != VREG) {
7037 if (v_type == VDIR)
7038 resp->status = NFS4ERR_ISDIR;
7039 else if (v_type == VLNK)
7040 resp->status = NFS4ERR_SYMLINK;
7041 else
7042 resp->status = NFS4ERR_INVAL;
7043 return;
7044 }
7045
7046 va.va_mask = AT_MODE|AT_UID;
7047 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7048 if (error) {
7049 resp->status = puterrno4(error);
7050 return;
7051 }
7052
7053 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7054
7055 /*
7056 * Check if we have access to the file, Note the the file
7057 * could have originally been open UNCHECKED or GUARDED
7058 * with mode bits that will now fail, but there is nothing
7059 * we can really do about that except in the case that the
7060 * owner of the file is the one requesting the open.
7061 */
7062 if (crgetuid(cs->cr) != va.va_uid) {
7063 resp->status = check_open_access(args->share_access, cs, req);
7064 if (resp->status != NFS4_OK) {
7065 return;
7066 }
7067 }
7068
7069 /*
7070 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7071 */
7072 cinfo->before = 0;
7073 cinfo->after = 0;
7074 cinfo->atomic = FALSE;
7075
7076 rfs4_do_open(cs, req, oo,
7077 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7078 args->share_access, args->share_deny, resp, 0);
7079 }
7080
7081 static void
7082 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7083 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7084 {
7085 int error;
7086 nfsstat4 status;
7087 stateid4 stateid =
7088 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7089 rfs4_deleg_state_t *dsp;
7090
7091 /*
7092 * Find the state info from the stateid and confirm that the
7093 * file is delegated. If the state openowner is the same as
7094 * the supplied openowner we're done. If not, get the file
7095 * info from the found state info. Use that file info to
7096 * create the state for this lock owner. Note solaris doen't
7097 * really need the pathname to find the file. We may want to
7098 * lookup the pathname and make sure that the vp exist and
7099 * matches the vp in the file structure. However it is
7100 * possible that the pathname nolonger exists (local process
7101 * unlinks the file), so this may not be that useful.
7102 */
7103
7104 status = rfs4_get_deleg_state(&stateid, &dsp);
7105 if (status != NFS4_OK) {
7106 resp->status = status;
7107 return;
7108 }
7109
7110 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7111
7112 /*
7113 * New lock owner, create state. Since this was probably called
7114 * in response to a CB_RECALL we set deleg to DELEG_NONE
7115 */
7116
7117 ASSERT(cs->vp != NULL);
7118 VN_RELE(cs->vp);
7119 VN_HOLD(dsp->rds_finfo->rf_vp);
7120 cs->vp = dsp->rds_finfo->rf_vp;
7121
7122 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7123 rfs4_deleg_state_rele(dsp);
7124 *cs->statusp = resp->status = puterrno4(error);
7125 return;
7126 }
7127
7128 /* Mark progress for delegation returns */
7129 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7130 rfs4_deleg_state_rele(dsp);
7131 rfs4_do_open(cs, req, oo, DELEG_NONE,
7132 args->share_access, args->share_deny, resp, 1);
7133 }
7134
7135 /*ARGSUSED*/
7136 static void
7137 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7138 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7139 {
7140 /*
7141 * Lookup the pathname, it must already exist since this file
7142 * was delegated.
7143 *
7144 * Find the file and state info for this vp and open owner pair.
7145 * check that they are in fact delegated.
7146 * check that the state access and deny modes are the same.
7147 *
7148 * Return the delgation possibly seting the recall flag.
7149 */
7150 rfs4_file_t *fp;
7151 rfs4_state_t *sp;
7152 bool_t create = FALSE;
7153 bool_t dcreate = FALSE;
7154 rfs4_deleg_state_t *dsp;
7155 nfsace4 *ace;
7156
7157 /* Note we ignore oflags */
7158 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7159 req, cs, args->share_access, &resp->cinfo);
7160
7161 if (resp->status != NFS4_OK) {
7162 return;
7163 }
7164
7165 /* get the file struct and hold a lock on it during initial open */
7166 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7167 if (fp == NULL) {
7168 resp->status = NFS4ERR_RESOURCE;
7169 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7170 return;
7171 }
7172
7173 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7174 if (sp == NULL) {
7175 resp->status = NFS4ERR_SERVERFAULT;
7176 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7177 rw_exit(&fp->rf_file_rwlock);
7178 rfs4_file_rele(fp);
7179 return;
7180 }
7181
7182 rfs4_dbe_lock(sp->rs_dbe);
7183 rfs4_dbe_lock(fp->rf_dbe);
7184 if (args->share_access != sp->rs_share_access ||
7185 args->share_deny != sp->rs_share_deny ||
7186 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7187 NFS4_DEBUG(rfs4_debug,
7188 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7189 rfs4_dbe_unlock(fp->rf_dbe);
7190 rfs4_dbe_unlock(sp->rs_dbe);
7191 rfs4_file_rele(fp);
7192 rfs4_state_rele(sp);
7193 resp->status = NFS4ERR_SERVERFAULT;
7194 return;
7195 }
7196 rfs4_dbe_unlock(fp->rf_dbe);
7197 rfs4_dbe_unlock(sp->rs_dbe);
7198
7199 dsp = rfs4_finddeleg(sp, &dcreate);
7200 if (dsp == NULL) {
7201 rfs4_state_rele(sp);
7202 rfs4_file_rele(fp);
7203 resp->status = NFS4ERR_SERVERFAULT;
7204 return;
7205 }
7206
7207 next_stateid(&sp->rs_stateid);
7208
7209 resp->stateid = sp->rs_stateid.stateid;
7210
7211 resp->delegation.delegation_type = dsp->rds_dtype;
7212
7213 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7214 open_read_delegation4 *rv =
7215 &resp->delegation.open_delegation4_u.read;
7216
7217 rv->stateid = dsp->rds_delegid.stateid;
7218 rv->recall = FALSE; /* no policy in place to set to TRUE */
7219 ace = &rv->permissions;
7220 } else {
7221 open_write_delegation4 *rv =
7222 &resp->delegation.open_delegation4_u.write;
7223
7224 rv->stateid = dsp->rds_delegid.stateid;
7225 rv->recall = FALSE; /* no policy in place to set to TRUE */
7226 ace = &rv->permissions;
7227 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7228 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7229 }
7230
7231 /* XXX For now */
7232 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7233 ace->flag = 0;
7234 ace->access_mask = 0;
7235 ace->who.utf8string_len = 0;
7236 ace->who.utf8string_val = 0;
7237
7238 rfs4_deleg_state_rele(dsp);
7239 rfs4_state_rele(sp);
7240 rfs4_file_rele(fp);
7241 }
7242
7243 typedef enum {
7244 NFS4_CHKSEQ_OKAY = 0,
7245 NFS4_CHKSEQ_REPLAY = 1,
7246 NFS4_CHKSEQ_BAD = 2
7247 } rfs4_chkseq_t;
7248
7249 /*
7250 * Generic function for sequence number checks.
7251 */
7252 static rfs4_chkseq_t
7253 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7254 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7255 {
7256 /* Same sequence ids and matching operations? */
7257 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7258 if (copyres == TRUE) {
7259 rfs4_free_reply(resop);
7260 rfs4_copy_reply(resop, lastop);
7261 }
7262 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7263 "Replayed SEQID %d\n", seqid));
7264 return (NFS4_CHKSEQ_REPLAY);
7265 }
7266
7267 /* If the incoming sequence is not the next expected then it is bad */
7268 if (rqst_seq != seqid + 1) {
7269 if (rqst_seq == seqid) {
7270 NFS4_DEBUG(rfs4_debug,
7271 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7272 "but last op was %d current op is %d\n",
7273 lastop->resop, resop->resop));
7274 return (NFS4_CHKSEQ_BAD);
7275 }
7276 NFS4_DEBUG(rfs4_debug,
7277 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7278 rqst_seq, seqid));
7279 return (NFS4_CHKSEQ_BAD);
7280 }
7281
7282 /* Everything okay -- next expected */
7283 return (NFS4_CHKSEQ_OKAY);
7284 }
7285
7286
7287 static rfs4_chkseq_t
7288 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7289 {
7290 rfs4_chkseq_t rc;
7291
7292 rfs4_dbe_lock(op->ro_dbe);
7293 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7294 TRUE);
7295 rfs4_dbe_unlock(op->ro_dbe);
7296
7297 if (rc == NFS4_CHKSEQ_OKAY)
7298 rfs4_update_lease(op->ro_client);
7299
7300 return (rc);
7301 }
7302
7303 static rfs4_chkseq_t
7304 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7305 {
7306 rfs4_chkseq_t rc;
7307
7308 rfs4_dbe_lock(op->ro_dbe);
7309 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7310 olo_seqid, resop, FALSE);
7311 rfs4_dbe_unlock(op->ro_dbe);
7312
7313 return (rc);
7314 }
7315
7316 static rfs4_chkseq_t
7317 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7318 {
7319 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7320
7321 rfs4_dbe_lock(lsp->rls_dbe);
7322 if (!lsp->rls_skip_seqid_check)
7323 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7324 resop, TRUE);
7325 rfs4_dbe_unlock(lsp->rls_dbe);
7326
7327 return (rc);
7328 }
7329
7330 static void
7331 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7332 struct svc_req *req, struct compound_state *cs)
7333 {
7334 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7335 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7336 open_owner4 *owner = &args->owner;
7337 open_claim_type4 claim = args->claim;
7338 rfs4_client_t *cp;
7339 rfs4_openowner_t *oo;
7340 bool_t create;
7341 bool_t replay = FALSE;
7342 int can_reclaim;
7343
7344 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7345 OPEN4args *, args);
7346
7347 if (cs->vp == NULL) {
7348 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7349 goto end;
7350 }
7351
7352 /*
7353 * Need to check clientid and lease expiration first based on
7354 * error ordering and incrementing sequence id.
7355 */
7356 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7357 if (cp == NULL) {
7358 *cs->statusp = resp->status =
7359 rfs4_check_clientid(&owner->clientid, 0);
7360 goto end;
7361 }
7362
7363 if (rfs4_lease_expired(cp)) {
7364 rfs4_client_close(cp);
7365 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7366 goto end;
7367 }
7368 can_reclaim = cp->rc_can_reclaim;
7369
7370 /*
7371 * Find the open_owner for use from this point forward. Take
7372 * care in updating the sequence id based on the type of error
7373 * being returned.
7374 */
7375 retry:
7376 create = TRUE;
7377 oo = rfs4_findopenowner(owner, &create, args->seqid);
7378 if (oo == NULL) {
7379 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7380 rfs4_client_rele(cp);
7381 goto end;
7382 }
7383
7384 /* Hold off access to the sequence space while the open is done */
7385 rfs4_sw_enter(&oo->ro_sw);
7386
7387 /*
7388 * If the open_owner existed before at the server, then check
7389 * the sequence id.
7390 */
7391 if (!create && !oo->ro_postpone_confirm) {
7392 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7393 case NFS4_CHKSEQ_BAD:
7394 if ((args->seqid > oo->ro_open_seqid) &&
7395 oo->ro_need_confirm) {
7396 rfs4_free_opens(oo, TRUE, FALSE);
7397 rfs4_sw_exit(&oo->ro_sw);
7398 rfs4_openowner_rele(oo);
7399 goto retry;
7400 }
7401 resp->status = NFS4ERR_BAD_SEQID;
7402 goto out;
7403 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7404 replay = TRUE;
7405 goto out;
7406 default:
7407 break;
7408 }
7409
7410 /*
7411 * Sequence was ok and open owner exists
7412 * check to see if we have yet to see an
7413 * open_confirm.
7414 */
7415 if (oo->ro_need_confirm) {
7416 rfs4_free_opens(oo, TRUE, FALSE);
7417 rfs4_sw_exit(&oo->ro_sw);
7418 rfs4_openowner_rele(oo);
7419 goto retry;
7420 }
7421 }
7422 /* Grace only applies to regular-type OPENs */
7423 if (rfs4_clnt_in_grace(cp) &&
7424 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7425 *cs->statusp = resp->status = NFS4ERR_GRACE;
7426 goto out;
7427 }
7428
7429 /*
7430 * If previous state at the server existed then can_reclaim
7431 * will be set. If not reply NFS4ERR_NO_GRACE to the
7432 * client.
7433 */
7434 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7435 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7436 goto out;
7437 }
7438
7439
7440 /*
7441 * Reject the open if the client has missed the grace period
7442 */
7443 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7444 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7445 goto out;
7446 }
7447
7448 /* Couple of up-front bookkeeping items */
7449 if (oo->ro_need_confirm) {
7450 /*
7451 * If this is a reclaim OPEN then we should not ask
7452 * for a confirmation of the open_owner per the
7453 * protocol specification.
7454 */
7455 if (claim == CLAIM_PREVIOUS)
7456 oo->ro_need_confirm = FALSE;
7457 else
7458 resp->rflags |= OPEN4_RESULT_CONFIRM;
7459 }
7460 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7461
7462 /*
7463 * If there is an unshared filesystem mounted on this vnode,
7464 * do not allow to open/create in this directory.
7465 */
7466 if (vn_ismntpt(cs->vp)) {
7467 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7468 goto out;
7469 }
7470
7471 /*
7472 * access must READ, WRITE, or BOTH. No access is invalid.
7473 * deny can be READ, WRITE, BOTH, or NONE.
7474 * bits not defined for access/deny are invalid.
7475 */
7476 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7477 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7478 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7479 *cs->statusp = resp->status = NFS4ERR_INVAL;
7480 goto out;
7481 }
7482
7483
7484 /*
7485 * make sure attrset is zero before response is built.
7486 */
7487 resp->attrset = 0;
7488
7489 switch (claim) {
7490 case CLAIM_NULL:
7491 rfs4_do_opennull(cs, req, args, oo, resp);
7492 break;
7493 case CLAIM_PREVIOUS:
7494 rfs4_do_openprev(cs, req, args, oo, resp);
7495 break;
7496 case CLAIM_DELEGATE_CUR:
7497 rfs4_do_opendelcur(cs, req, args, oo, resp);
7498 break;
7499 case CLAIM_DELEGATE_PREV:
7500 rfs4_do_opendelprev(cs, req, args, oo, resp);
7501 break;
7502 default:
7503 resp->status = NFS4ERR_INVAL;
7504 break;
7505 }
7506
7507 out:
7508 rfs4_client_rele(cp);
7509
7510 /* Catch sequence id handling here to make it a little easier */
7511 switch (resp->status) {
7512 case NFS4ERR_BADXDR:
7513 case NFS4ERR_BAD_SEQID:
7514 case NFS4ERR_BAD_STATEID:
7515 case NFS4ERR_NOFILEHANDLE:
7516 case NFS4ERR_RESOURCE:
7517 case NFS4ERR_STALE_CLIENTID:
7518 case NFS4ERR_STALE_STATEID:
7519 /*
7520 * The protocol states that if any of these errors are
7521 * being returned, the sequence id should not be
7522 * incremented. Any other return requires an
7523 * increment.
7524 */
7525 break;
7526 default:
7527 /* Always update the lease in this case */
7528 rfs4_update_lease(oo->ro_client);
7529
7530 /* Regular response - copy the result */
7531 if (!replay)
7532 rfs4_update_open_resp(oo, resop, &cs->fh);
7533
7534 /*
7535 * REPLAY case: Only if the previous response was OK
7536 * do we copy the filehandle. If not OK, no
7537 * filehandle to copy.
7538 */
7539 if (replay == TRUE &&
7540 resp->status == NFS4_OK &&
7541 oo->ro_reply_fh.nfs_fh4_val) {
7542 /*
7543 * If this is a replay, we must restore the
7544 * current filehandle/vp to that of what was
7545 * returned originally. Try our best to do
7546 * it.
7547 */
7548 nfs_fh4_fmt_t *fh_fmtp =
7549 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7550
7551 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7552 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7553
7554 if (cs->exi == NULL) {
7555 resp->status = NFS4ERR_STALE;
7556 goto finish;
7557 }
7558
7559 VN_RELE(cs->vp);
7560
7561 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7562 &resp->status);
7563
7564 if (cs->vp == NULL)
7565 goto finish;
7566
7567 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7568 }
7569
7570 /*
7571 * If this was a replay, no need to update the
7572 * sequence id. If the open_owner was not created on
7573 * this pass, then update. The first use of an
7574 * open_owner will not bump the sequence id.
7575 */
7576 if (replay == FALSE && !create)
7577 rfs4_update_open_sequence(oo);
7578 /*
7579 * If the client is receiving an error and the
7580 * open_owner needs to be confirmed, there is no way
7581 * to notify the client of this fact ignoring the fact
7582 * that the server has no method of returning a
7583 * stateid to confirm. Therefore, the server needs to
7584 * mark this open_owner in a way as to avoid the
7585 * sequence id checking the next time the client uses
7586 * this open_owner.
7587 */
7588 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7589 oo->ro_postpone_confirm = TRUE;
7590 /*
7591 * If OK response then clear the postpone flag and
7592 * reset the sequence id to keep in sync with the
7593 * client.
7594 */
7595 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7596 oo->ro_postpone_confirm = FALSE;
7597 oo->ro_open_seqid = args->seqid;
7598 }
7599 break;
7600 }
7601
7602 finish:
7603 *cs->statusp = resp->status;
7604
7605 rfs4_sw_exit(&oo->ro_sw);
7606 rfs4_openowner_rele(oo);
7607
7608 end:
7609 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7610 OPEN4res *, resp);
7611 }
7612
7613 /*ARGSUSED*/
7614 void
7615 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7616 struct svc_req *req, struct compound_state *cs)
7617 {
7618 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7619 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7620 rfs4_state_t *sp;
7621 nfsstat4 status;
7622
7623 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7624 OPEN_CONFIRM4args *, args);
7625
7626 if (cs->vp == NULL) {
7627 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7628 goto out;
7629 }
7630
7631 if (cs->vp->v_type != VREG) {
7632 *cs->statusp = resp->status =
7633 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7634 return;
7635 }
7636
7637 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7638 if (status != NFS4_OK) {
7639 *cs->statusp = resp->status = status;
7640 goto out;
7641 }
7642
7643 /* Ensure specified filehandle matches */
7644 if (cs->vp != sp->rs_finfo->rf_vp) {
7645 rfs4_state_rele(sp);
7646 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7647 goto out;
7648 }
7649
7650 /* hold off other access to open_owner while we tinker */
7651 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7652
7653 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7654 case NFS4_CHECK_STATEID_OKAY:
7655 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7656 resop) != 0) {
7657 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7658 break;
7659 }
7660 /*
7661 * If it is the appropriate stateid and determined to
7662 * be "OKAY" then this means that the stateid does not
7663 * need to be confirmed and the client is in error for
7664 * sending an OPEN_CONFIRM.
7665 */
7666 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7667 break;
7668 case NFS4_CHECK_STATEID_OLD:
7669 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7670 break;
7671 case NFS4_CHECK_STATEID_BAD:
7672 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7673 break;
7674 case NFS4_CHECK_STATEID_EXPIRED:
7675 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7676 break;
7677 case NFS4_CHECK_STATEID_CLOSED:
7678 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7679 break;
7680 case NFS4_CHECK_STATEID_REPLAY:
7681 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7682 resop)) {
7683 case NFS4_CHKSEQ_OKAY:
7684 /*
7685 * This is replayed stateid; if seqid matches
7686 * next expected, then client is using wrong seqid.
7687 */
7688 /* fall through */
7689 case NFS4_CHKSEQ_BAD:
7690 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7691 break;
7692 case NFS4_CHKSEQ_REPLAY:
7693 /*
7694 * Note this case is the duplicate case so
7695 * resp->status is already set.
7696 */
7697 *cs->statusp = resp->status;
7698 rfs4_update_lease(sp->rs_owner->ro_client);
7699 break;
7700 }
7701 break;
7702 case NFS4_CHECK_STATEID_UNCONFIRMED:
7703 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7704 resop) != NFS4_CHKSEQ_OKAY) {
7705 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7706 break;
7707 }
7708 *cs->statusp = resp->status = NFS4_OK;
7709
7710 next_stateid(&sp->rs_stateid);
7711 resp->open_stateid = sp->rs_stateid.stateid;
7712 sp->rs_owner->ro_need_confirm = FALSE;
7713 rfs4_update_lease(sp->rs_owner->ro_client);
7714 rfs4_update_open_sequence(sp->rs_owner);
7715 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7716 break;
7717 default:
7718 ASSERT(FALSE);
7719 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7720 break;
7721 }
7722 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7723 rfs4_state_rele(sp);
7724
7725 out:
7726 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7727 OPEN_CONFIRM4res *, resp);
7728 }
7729
7730 /*ARGSUSED*/
7731 void
7732 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7733 struct svc_req *req, struct compound_state *cs)
7734 {
7735 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7736 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7737 uint32_t access = args->share_access;
7738 uint32_t deny = args->share_deny;
7739 nfsstat4 status;
7740 rfs4_state_t *sp;
7741 rfs4_file_t *fp;
7742 int fflags = 0;
7743
7744 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7745 OPEN_DOWNGRADE4args *, args);
7746
7747 if (cs->vp == NULL) {
7748 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7749 goto out;
7750 }
7751
7752 if (cs->vp->v_type != VREG) {
7753 *cs->statusp = resp->status = NFS4ERR_INVAL;
7754 return;
7755 }
7756
7757 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7758 if (status != NFS4_OK) {
7759 *cs->statusp = resp->status = status;
7760 goto out;
7761 }
7762
7763 /* Ensure specified filehandle matches */
7764 if (cs->vp != sp->rs_finfo->rf_vp) {
7765 rfs4_state_rele(sp);
7766 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7767 goto out;
7768 }
7769
7770 /* hold off other access to open_owner while we tinker */
7771 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7772
7773 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7774 case NFS4_CHECK_STATEID_OKAY:
7775 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7776 resop) != NFS4_CHKSEQ_OKAY) {
7777 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7778 goto end;
7779 }
7780 break;
7781 case NFS4_CHECK_STATEID_OLD:
7782 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7783 goto end;
7784 case NFS4_CHECK_STATEID_BAD:
7785 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7786 goto end;
7787 case NFS4_CHECK_STATEID_EXPIRED:
7788 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7789 goto end;
7790 case NFS4_CHECK_STATEID_CLOSED:
7791 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7792 goto end;
7793 case NFS4_CHECK_STATEID_UNCONFIRMED:
7794 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7795 goto end;
7796 case NFS4_CHECK_STATEID_REPLAY:
7797 /* Check the sequence id for the open owner */
7798 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7799 resop)) {
7800 case NFS4_CHKSEQ_OKAY:
7801 /*
7802 * This is replayed stateid; if seqid matches
7803 * next expected, then client is using wrong seqid.
7804 */
7805 /* fall through */
7806 case NFS4_CHKSEQ_BAD:
7807 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7808 goto end;
7809 case NFS4_CHKSEQ_REPLAY:
7810 /*
7811 * Note this case is the duplicate case so
7812 * resp->status is already set.
7813 */
7814 *cs->statusp = resp->status;
7815 rfs4_update_lease(sp->rs_owner->ro_client);
7816 goto end;
7817 }
7818 break;
7819 default:
7820 ASSERT(FALSE);
7821 break;
7822 }
7823
7824 rfs4_dbe_lock(sp->rs_dbe);
7825 /*
7826 * Check that the new access modes and deny modes are valid.
7827 * Check that no invalid bits are set.
7828 */
7829 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7830 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7831 *cs->statusp = resp->status = NFS4ERR_INVAL;
7832 rfs4_update_open_sequence(sp->rs_owner);
7833 rfs4_dbe_unlock(sp->rs_dbe);
7834 goto end;
7835 }
7836
7837 /*
7838 * The new modes must be a subset of the current modes and
7839 * the access must specify at least one mode. To test that
7840 * the new mode is a subset of the current modes we bitwise
7841 * AND them together and check that the result equals the new
7842 * mode. For example:
7843 * New mode, access == R and current mode, sp->rs_open_access == RW
7844 * access & sp->rs_open_access == R == access, so the new access mode
7845 * is valid. Consider access == RW, sp->rs_open_access = R
7846 * access & sp->rs_open_access == R != access, so the new access mode
7847 * is invalid.
7848 */
7849 if ((access & sp->rs_open_access) != access ||
7850 (deny & sp->rs_open_deny) != deny ||
7851 (access &
7852 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7853 *cs->statusp = resp->status = NFS4ERR_INVAL;
7854 rfs4_update_open_sequence(sp->rs_owner);
7855 rfs4_dbe_unlock(sp->rs_dbe);
7856 goto end;
7857 }
7858
7859 /*
7860 * Release any share locks associated with this stateID.
7861 * Strictly speaking, this violates the spec because the
7862 * spec effectively requires that open downgrade be atomic.
7863 * At present, fs_shrlock does not have this capability.
7864 */
7865 (void) rfs4_unshare(sp);
7866
7867 status = rfs4_share(sp, access, deny);
7868 if (status != NFS4_OK) {
7869 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7870 rfs4_update_open_sequence(sp->rs_owner);
7871 rfs4_dbe_unlock(sp->rs_dbe);
7872 goto end;
7873 }
7874
7875 fp = sp->rs_finfo;
7876 rfs4_dbe_lock(fp->rf_dbe);
7877
7878 /*
7879 * If the current mode has deny read and the new mode
7880 * does not, decrement the number of deny read mode bits
7881 * and if it goes to zero turn off the deny read bit
7882 * on the file.
7883 */
7884 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7885 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7886 fp->rf_deny_read--;
7887 if (fp->rf_deny_read == 0)
7888 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7889 }
7890
7891 /*
7892 * If the current mode has deny write and the new mode
7893 * does not, decrement the number of deny write mode bits
7894 * and if it goes to zero turn off the deny write bit
7895 * on the file.
7896 */
7897 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7898 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7899 fp->rf_deny_write--;
7900 if (fp->rf_deny_write == 0)
7901 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7902 }
7903
7904 /*
7905 * If the current mode has access read and the new mode
7906 * does not, decrement the number of access read mode bits
7907 * and if it goes to zero turn off the access read bit
7908 * on the file. set fflags to FREAD for the call to
7909 * vn_open_downgrade().
7910 */
7911 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7912 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7913 fp->rf_access_read--;
7914 if (fp->rf_access_read == 0)
7915 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7916 fflags |= FREAD;
7917 }
7918
7919 /*
7920 * If the current mode has access write and the new mode
7921 * does not, decrement the number of access write mode bits
7922 * and if it goes to zero turn off the access write bit
7923 * on the file. set fflags to FWRITE for the call to
7924 * vn_open_downgrade().
7925 */
7926 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7927 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7928 fp->rf_access_write--;
7929 if (fp->rf_access_write == 0)
7930 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7931 fflags |= FWRITE;
7932 }
7933
7934 /* Check that the file is still accessible */
7935 ASSERT(fp->rf_share_access);
7936
7937 rfs4_dbe_unlock(fp->rf_dbe);
7938
7939 /* now set the new open access and deny modes */
7940 sp->rs_open_access = access;
7941 sp->rs_open_deny = deny;
7942
7943 /*
7944 * we successfully downgraded the share lock, now we need to downgrade
7945 * the open. it is possible that the downgrade was only for a deny
7946 * mode and we have nothing else to do.
7947 */
7948 if ((fflags & (FREAD|FWRITE)) != 0)
7949 vn_open_downgrade(cs->vp, fflags);
7950
7951 /* Update the stateid */
7952 next_stateid(&sp->rs_stateid);
7953 resp->open_stateid = sp->rs_stateid.stateid;
7954
7955 rfs4_dbe_unlock(sp->rs_dbe);
7956
7957 *cs->statusp = resp->status = NFS4_OK;
7958 /* Update the lease */
7959 rfs4_update_lease(sp->rs_owner->ro_client);
7960 /* And the sequence */
7961 rfs4_update_open_sequence(sp->rs_owner);
7962 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7963
7964 end:
7965 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7966 rfs4_state_rele(sp);
7967 out:
7968 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7969 OPEN_DOWNGRADE4res *, resp);
7970 }
7971
7972 static void *
7973 memstr(const void *s1, const char *s2, size_t n)
7974 {
7975 size_t l = strlen(s2);
7976 char *p = (char *)s1;
7977
7978 while (n >= l) {
7979 if (bcmp(p, s2, l) == 0)
7980 return (p);
7981 p++;
7982 n--;
7983 }
7984
7985 return (NULL);
7986 }
7987
7988 /*
7989 * The logic behind this function is detailed in the NFSv4 RFC in the
7990 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7991 * that section for explicit guidance to server behavior for
7992 * SETCLIENTID.
7993 */
7994 void
7995 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7996 struct svc_req *req, struct compound_state *cs)
7997 {
7998 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7999 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8000 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8001 rfs4_clntip_t *ci;
8002 bool_t create;
8003 char *addr, *netid;
8004 int len;
8005
8006 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8007 SETCLIENTID4args *, args);
8008 retry:
8009 newcp = cp_confirmed = cp_unconfirmed = NULL;
8010
8011 /*
8012 * Save the caller's IP address
8013 */
8014 args->client.cl_addr =
8015 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8016
8017 /*
8018 * Record if it is a Solaris client that cannot handle referrals.
8019 */
8020 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8021 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8022 /* Add a "yes, it's downrev" record */
8023 create = TRUE;
8024 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8025 ASSERT(ci != NULL);
8026 rfs4_dbe_rele(ci->ri_dbe);
8027 } else {
8028 /* Remove any previous record */
8029 rfs4_invalidate_clntip(args->client.cl_addr);
8030 }
8031
8032 /*
8033 * In search of an EXISTING client matching the incoming
8034 * request to establish a new client identifier at the server
8035 */
8036 create = TRUE;
8037 cp = rfs4_findclient(&args->client, &create, NULL);
8038
8039 /* Should never happen */
8040 ASSERT(cp != NULL);
8041
8042 if (cp == NULL) {
8043 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8044 goto out;
8045 }
8046
8047 /*
8048 * Easiest case. Client identifier is newly created and is
8049 * unconfirmed. Also note that for this case, no other
8050 * entries exist for the client identifier. Nothing else to
8051 * check. Just setup the response and respond.
8052 */
8053 if (create) {
8054 *cs->statusp = res->status = NFS4_OK;
8055 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8056 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8057 cp->rc_confirm_verf;
8058 /* Setup callback information; CB_NULL confirmation later */
8059 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8060
8061 rfs4_client_rele(cp);
8062 goto out;
8063 }
8064
8065 /*
8066 * An existing, confirmed client may exist but it may not have
8067 * been active for at least one lease period. If so, then
8068 * "close" the client and create a new client identifier
8069 */
8070 if (rfs4_lease_expired(cp)) {
8071 rfs4_client_close(cp);
8072 goto retry;
8073 }
8074
8075 if (cp->rc_need_confirm == TRUE)
8076 cp_unconfirmed = cp;
8077 else
8078 cp_confirmed = cp;
8079
8080 cp = NULL;
8081
8082 /*
8083 * We have a confirmed client, now check for an
8084 * unconfimred entry
8085 */
8086 if (cp_confirmed) {
8087 /* If creds don't match then client identifier is inuse */
8088 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8089 rfs4_cbinfo_t *cbp;
8090 /*
8091 * Some one else has established this client
8092 * id. Try and say * who they are. We will use
8093 * the call back address supplied by * the
8094 * first client.
8095 */
8096 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8097
8098 addr = netid = NULL;
8099
8100 cbp = &cp_confirmed->rc_cbinfo;
8101 if (cbp->cb_callback.cb_location.r_addr &&
8102 cbp->cb_callback.cb_location.r_netid) {
8103 cb_client4 *cbcp = &cbp->cb_callback;
8104
8105 len = strlen(cbcp->cb_location.r_addr)+1;
8106 addr = kmem_alloc(len, KM_SLEEP);
8107 bcopy(cbcp->cb_location.r_addr, addr, len);
8108 len = strlen(cbcp->cb_location.r_netid)+1;
8109 netid = kmem_alloc(len, KM_SLEEP);
8110 bcopy(cbcp->cb_location.r_netid, netid, len);
8111 }
8112
8113 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8114 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8115
8116 rfs4_client_rele(cp_confirmed);
8117 }
8118
8119 /*
8120 * Confirmed, creds match, and verifier matches; must
8121 * be an update of the callback info
8122 */
8123 if (cp_confirmed->rc_nfs_client.verifier ==
8124 args->client.verifier) {
8125 /* Setup callback information */
8126 rfs4_client_setcb(cp_confirmed, &args->callback,
8127 args->callback_ident);
8128
8129 /* everything okay -- move ahead */
8130 *cs->statusp = res->status = NFS4_OK;
8131 res->SETCLIENTID4res_u.resok4.clientid =
8132 cp_confirmed->rc_clientid;
8133
8134 /* update the confirm_verifier and return it */
8135 rfs4_client_scv_next(cp_confirmed);
8136 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8137 cp_confirmed->rc_confirm_verf;
8138
8139 rfs4_client_rele(cp_confirmed);
8140 goto out;
8141 }
8142
8143 /*
8144 * Creds match but the verifier doesn't. Must search
8145 * for an unconfirmed client that would be replaced by
8146 * this request.
8147 */
8148 create = FALSE;
8149 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8150 cp_confirmed);
8151 }
8152
8153 /*
8154 * At this point, we have taken care of the brand new client
8155 * struct, INUSE case, update of an existing, and confirmed
8156 * client struct.
8157 */
8158
8159 /*
8160 * check to see if things have changed while we originally
8161 * picked up the client struct. If they have, then return and
8162 * retry the processing of this SETCLIENTID request.
8163 */
8164 if (cp_unconfirmed) {
8165 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8166 if (!cp_unconfirmed->rc_need_confirm) {
8167 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8168 rfs4_client_rele(cp_unconfirmed);
8169 if (cp_confirmed)
8170 rfs4_client_rele(cp_confirmed);
8171 goto retry;
8172 }
8173 /* do away with the old unconfirmed one */
8174 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8175 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8176 rfs4_client_rele(cp_unconfirmed);
8177 cp_unconfirmed = NULL;
8178 }
8179
8180 /*
8181 * This search will temporarily hide the confirmed client
8182 * struct while a new client struct is created as the
8183 * unconfirmed one.
8184 */
8185 create = TRUE;
8186 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8187
8188 ASSERT(newcp != NULL);
8189
8190 if (newcp == NULL) {
8191 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8192 rfs4_client_rele(cp_confirmed);
8193 goto out;
8194 }
8195
8196 /*
8197 * If one was not created, then a similar request must be in
8198 * process so release and start over with this one
8199 */
8200 if (create != TRUE) {
8201 rfs4_client_rele(newcp);
8202 if (cp_confirmed)
8203 rfs4_client_rele(cp_confirmed);
8204 goto retry;
8205 }
8206
8207 *cs->statusp = res->status = NFS4_OK;
8208 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8209 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8210 newcp->rc_confirm_verf;
8211 /* Setup callback information; CB_NULL confirmation later */
8212 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8213
8214 newcp->rc_cp_confirmed = cp_confirmed;
8215
8216 rfs4_client_rele(newcp);
8217
8218 out:
8219 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8220 SETCLIENTID4res *, res);
8221 }
8222
8223 /*ARGSUSED*/
8224 void
8225 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8226 struct svc_req *req, struct compound_state *cs)
8227 {
8228 SETCLIENTID_CONFIRM4args *args =
8229 &argop->nfs_argop4_u.opsetclientid_confirm;
8230 SETCLIENTID_CONFIRM4res *res =
8231 &resop->nfs_resop4_u.opsetclientid_confirm;
8232 rfs4_client_t *cp, *cptoclose = NULL;
8233 nfs4_srv_t *nsrv4;
8234
8235 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8236 struct compound_state *, cs,
8237 SETCLIENTID_CONFIRM4args *, args);
8238
8239 nsrv4 = nfs4_get_srv();
8240 *cs->statusp = res->status = NFS4_OK;
8241
8242 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8243
8244 if (cp == NULL) {
8245 *cs->statusp = res->status =
8246 rfs4_check_clientid(&args->clientid, 1);
8247 goto out;
8248 }
8249
8250 if (!creds_ok(cp, req, cs)) {
8251 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8252 rfs4_client_rele(cp);
8253 goto out;
8254 }
8255
8256 /* If the verifier doesn't match, the record doesn't match */
8257 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8258 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8259 rfs4_client_rele(cp);
8260 goto out;
8261 }
8262
8263 rfs4_dbe_lock(cp->rc_dbe);
8264 cp->rc_need_confirm = FALSE;
8265 if (cp->rc_cp_confirmed) {
8266 cptoclose = cp->rc_cp_confirmed;
8267 cptoclose->rc_ss_remove = 1;
8268 cp->rc_cp_confirmed = NULL;
8269 }
8270
8271 /*
8272 * Update the client's associated server instance, if it's changed
8273 * since the client was created.
8274 */
8275 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8276 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8277
8278 /*
8279 * Record clientid in stable storage.
8280 * Must be done after server instance has been assigned.
8281 */
8282 rfs4_ss_clid(nsrv4, cp);
8283
8284 rfs4_dbe_unlock(cp->rc_dbe);
8285
8286 if (cptoclose)
8287 /* don't need to rele, client_close does it */
8288 rfs4_client_close(cptoclose);
8289
8290 /* If needed, initiate CB_NULL call for callback path */
8291 rfs4_deleg_cb_check(cp);
8292 rfs4_update_lease(cp);
8293
8294 /*
8295 * Check to see if client can perform reclaims
8296 */
8297 rfs4_ss_chkclid(nsrv4, cp);
8298
8299 rfs4_client_rele(cp);
8300
8301 out:
8302 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8303 struct compound_state *, cs,
8304 SETCLIENTID_CONFIRM4 *, res);
8305 }
8306
8307
8308 /*ARGSUSED*/
8309 void
8310 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8311 struct svc_req *req, struct compound_state *cs)
8312 {
8313 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8314 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8315 rfs4_state_t *sp;
8316 nfsstat4 status;
8317
8318 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8319 CLOSE4args *, args);
8320
8321 if (cs->vp == NULL) {
8322 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8323 goto out;
8324 }
8325
8326 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8327 if (status != NFS4_OK) {
8328 *cs->statusp = resp->status = status;
8329 goto out;
8330 }
8331
8332 /* Ensure specified filehandle matches */
8333 if (cs->vp != sp->rs_finfo->rf_vp) {
8334 rfs4_state_rele(sp);
8335 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8336 goto out;
8337 }
8338
8339 /* hold off other access to open_owner while we tinker */
8340 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8341
8342 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8343 case NFS4_CHECK_STATEID_OKAY:
8344 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8345 resop) != NFS4_CHKSEQ_OKAY) {
8346 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8347 goto end;
8348 }
8349 break;
8350 case NFS4_CHECK_STATEID_OLD:
8351 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8352 goto end;
8353 case NFS4_CHECK_STATEID_BAD:
8354 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8355 goto end;
8356 case NFS4_CHECK_STATEID_EXPIRED:
8357 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8358 goto end;
8359 case NFS4_CHECK_STATEID_CLOSED:
8360 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8361 goto end;
8362 case NFS4_CHECK_STATEID_UNCONFIRMED:
8363 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8364 goto end;
8365 case NFS4_CHECK_STATEID_REPLAY:
8366 /* Check the sequence id for the open owner */
8367 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8368 resop)) {
8369 case NFS4_CHKSEQ_OKAY:
8370 /*
8371 * This is replayed stateid; if seqid matches
8372 * next expected, then client is using wrong seqid.
8373 */
8374 /* FALL THROUGH */
8375 case NFS4_CHKSEQ_BAD:
8376 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8377 goto end;
8378 case NFS4_CHKSEQ_REPLAY:
8379 /*
8380 * Note this case is the duplicate case so
8381 * resp->status is already set.
8382 */
8383 *cs->statusp = resp->status;
8384 rfs4_update_lease(sp->rs_owner->ro_client);
8385 goto end;
8386 }
8387 break;
8388 default:
8389 ASSERT(FALSE);
8390 break;
8391 }
8392
8393 rfs4_dbe_lock(sp->rs_dbe);
8394
8395 /* Update the stateid. */
8396 next_stateid(&sp->rs_stateid);
8397 resp->open_stateid = sp->rs_stateid.stateid;
8398
8399 rfs4_dbe_unlock(sp->rs_dbe);
8400
8401 rfs4_update_lease(sp->rs_owner->ro_client);
8402 rfs4_update_open_sequence(sp->rs_owner);
8403 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8404
8405 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8406
8407 *cs->statusp = resp->status = status;
8408
8409 end:
8410 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8411 rfs4_state_rele(sp);
8412 out:
8413 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8414 CLOSE4res *, resp);
8415 }
8416
8417 /*
8418 * Manage the counts on the file struct and close all file locks
8419 */
8420 /*ARGSUSED*/
8421 void
8422 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8423 bool_t close_of_client)
8424 {
8425 rfs4_file_t *fp = sp->rs_finfo;
8426 rfs4_lo_state_t *lsp;
8427 int fflags = 0;
8428
8429 /*
8430 * If this call is part of the larger closing down of client
8431 * state then it is just easier to release all locks
8432 * associated with this client instead of going through each
8433 * individual file and cleaning locks there.
8434 */
8435 if (close_of_client) {
8436 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8437 !list_is_empty(&sp->rs_lostatelist) &&
8438 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8439 /* Is the PxFS kernel module loaded? */
8440 if (lm_remove_file_locks != NULL) {
8441 int new_sysid;
8442
8443 /* Encode the cluster nodeid in new sysid */
8444 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8445 lm_set_nlmid_flk(&new_sysid);
8446
8447 /*
8448 * This PxFS routine removes file locks for a
8449 * client over all nodes of a cluster.
8450 */
8451 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8452 "lm_remove_file_locks(sysid=0x%x)\n",
8453 new_sysid));
8454 (*lm_remove_file_locks)(new_sysid);
8455 } else {
8456 struct flock64 flk;
8457
8458 /* Release all locks for this client */
8459 flk.l_type = F_UNLKSYS;
8460 flk.l_whence = 0;
8461 flk.l_start = 0;
8462 flk.l_len = 0;
8463 flk.l_sysid =
8464 sp->rs_owner->ro_client->rc_sysidt;
8465 flk.l_pid = 0;
8466 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8467 &flk, F_REMOTELOCK | FREAD | FWRITE,
8468 (u_offset_t)0, NULL, CRED(), NULL);
8469 }
8470
8471 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8472 }
8473 }
8474
8475 /*
8476 * Release all locks on this file by this lock owner or at
8477 * least mark the locks as having been released
8478 */
8479 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8480 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8481 lsp->rls_locks_cleaned = TRUE;
8482
8483 /* Was this already taken care of above? */
8484 if (!close_of_client &&
8485 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8486 (void) cleanlocks(sp->rs_finfo->rf_vp,
8487 lsp->rls_locker->rl_pid,
8488 lsp->rls_locker->rl_client->rc_sysidt);
8489 }
8490
8491 /*
8492 * Release any shrlocks associated with this open state ID.
8493 * This must be done before the rfs4_state gets marked closed.
8494 */
8495 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8496 (void) rfs4_unshare(sp);
8497
8498 if (sp->rs_open_access) {
8499 rfs4_dbe_lock(fp->rf_dbe);
8500
8501 /*
8502 * Decrement the count for each access and deny bit that this
8503 * state has contributed to the file.
8504 * If the file counts go to zero
8505 * clear the appropriate bit in the appropriate mask.
8506 */
8507 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8508 fp->rf_access_read--;
8509 fflags |= FREAD;
8510 if (fp->rf_access_read == 0)
8511 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8512 }
8513 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8514 fp->rf_access_write--;
8515 fflags |= FWRITE;
8516 if (fp->rf_access_write == 0)
8517 fp->rf_share_access &=
8518 ~OPEN4_SHARE_ACCESS_WRITE;
8519 }
8520 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8521 fp->rf_deny_read--;
8522 if (fp->rf_deny_read == 0)
8523 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8524 }
8525 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8526 fp->rf_deny_write--;
8527 if (fp->rf_deny_write == 0)
8528 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8529 }
8530
8531 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8532
8533 rfs4_dbe_unlock(fp->rf_dbe);
8534
8535 sp->rs_open_access = 0;
8536 sp->rs_open_deny = 0;
8537 }
8538 }
8539
8540 /*
8541 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8542 */
8543 static nfsstat4
8544 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8545 {
8546 rfs4_lockowner_t *lo;
8547 rfs4_client_t *cp;
8548 uint32_t len;
8549
8550 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8551 if (lo != NULL) {
8552 cp = lo->rl_client;
8553 if (rfs4_lease_expired(cp)) {
8554 rfs4_lockowner_rele(lo);
8555 rfs4_dbe_hold(cp->rc_dbe);
8556 rfs4_client_close(cp);
8557 return (NFS4ERR_EXPIRED);
8558 }
8559 dp->owner.clientid = lo->rl_owner.clientid;
8560 len = lo->rl_owner.owner_len;
8561 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8562 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8563 dp->owner.owner_len = len;
8564 rfs4_lockowner_rele(lo);
8565 goto finish;
8566 }
8567
8568 /*
8569 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8570 * of the client id contain the boot time for a NFS4 lock. So we
8571 * fabricate and identity by setting clientid to the sysid, and
8572 * the lock owner to the pid.
8573 */
8574 dp->owner.clientid = flk->l_sysid;
8575 len = sizeof (pid_t);
8576 dp->owner.owner_len = len;
8577 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8578 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8579 finish:
8580 dp->offset = flk->l_start;
8581 dp->length = flk->l_len;
8582
8583 if (flk->l_type == F_RDLCK)
8584 dp->locktype = READ_LT;
8585 else if (flk->l_type == F_WRLCK)
8586 dp->locktype = WRITE_LT;
8587 else
8588 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8589
8590 return (NFS4_OK);
8591 }
8592
8593 /*
8594 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8595 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8596 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8597 * for that (obviously); they are sending the LOCK requests with some delays
8598 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8599 * locking and delay implementation at the client side.
8600 *
8601 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8602 * fast retries on its own (the for loop below) in a hope the lock will be
8603 * available soon. And if not, the client won't need to resend the LOCK
8604 * requests so fast to check the lock availability. This basically saves some
8605 * network traffic and tries to make sure the client gets the lock ASAP.
8606 */
8607 static int
8608 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8609 {
8610 int error;
8611 struct flock64 flk;
8612 int i;
8613 clock_t delaytime;
8614 int cmd;
8615 int spin_cnt = 0;
8616
8617 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8618 retry:
8619 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8620
8621 for (i = 0; i < rfs4_maxlock_tries; i++) {
8622 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8623 error = VOP_FRLOCK(vp, cmd,
8624 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8625
8626 if (error != EAGAIN && error != EACCES)
8627 break;
8628
8629 if (i < rfs4_maxlock_tries - 1) {
8630 delay(delaytime);
8631 delaytime *= 2;
8632 }
8633 }
8634
8635 if (error == EAGAIN || error == EACCES) {
8636 /* Get the owner of the lock */
8637 flk = *flock;
8638 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8639 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8640 NULL) == 0) {
8641 /*
8642 * There's a race inherent in the current VOP_FRLOCK
8643 * design where:
8644 * a: "other guy" takes a lock that conflicts with a
8645 * lock we want
8646 * b: we attempt to take our lock (non-blocking) and
8647 * the attempt fails.
8648 * c: "other guy" releases the conflicting lock
8649 * d: we ask what lock conflicts with the lock we want,
8650 * getting F_UNLCK (no lock blocks us)
8651 *
8652 * If we retry the non-blocking lock attempt in this
8653 * case (restart at step 'b') there's some possibility
8654 * that many such attempts might fail. However a test
8655 * designed to actually provoke this race shows that
8656 * the vast majority of cases require no retry, and
8657 * only a few took as many as three retries. Here's
8658 * the test outcome:
8659 *
8660 * number of retries how many times we needed
8661 * that many retries
8662 * 0 79461
8663 * 1 862
8664 * 2 49
8665 * 3 5
8666 *
8667 * Given those empirical results, we arbitrarily limit
8668 * the retry count to ten.
8669 *
8670 * If we actually make to ten retries and give up,
8671 * nothing catastrophic happens, but we're unable to
8672 * return the information about the conflicting lock to
8673 * the NFS client. That's an acceptable trade off vs.
8674 * letting this retry loop run forever.
8675 */
8676 if (flk.l_type == F_UNLCK) {
8677 if (spin_cnt++ < 10) {
8678 /* No longer locked, retry */
8679 goto retry;
8680 }
8681 } else {
8682 *flock = flk;
8683 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8684 F_GETLK, &flk);
8685 }
8686 }
8687 }
8688
8689 return (error);
8690 }
8691
8692 /*ARGSUSED*/
8693 static nfsstat4
8694 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8695 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8696 {
8697 nfsstat4 status;
8698 rfs4_lockowner_t *lo = lsp->rls_locker;
8699 rfs4_state_t *sp = lsp->rls_state;
8700 struct flock64 flock;
8701 int16_t ltype;
8702 int flag;
8703 int error;
8704 sysid_t sysid;
8705 LOCK4res *lres;
8706 vnode_t *vp;
8707
8708 if (rfs4_lease_expired(lo->rl_client)) {
8709 return (NFS4ERR_EXPIRED);
8710 }
8711
8712 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8713 return (status);
8714
8715 /* Check for zero length. To lock to end of file use all ones for V4 */
8716 if (length == 0)
8717 return (NFS4ERR_INVAL);
8718 else if (length == (length4)(~0))
8719 length = 0; /* Posix to end of file */
8720
8721 retry:
8722 rfs4_dbe_lock(sp->rs_dbe);
8723 if (sp->rs_closed == TRUE) {
8724 rfs4_dbe_unlock(sp->rs_dbe);
8725 return (NFS4ERR_OLD_STATEID);
8726 }
8727
8728 if (resop->resop != OP_LOCKU) {
8729 switch (locktype) {
8730 case READ_LT:
8731 case READW_LT:
8732 if ((sp->rs_share_access
8733 & OPEN4_SHARE_ACCESS_READ) == 0) {
8734 rfs4_dbe_unlock(sp->rs_dbe);
8735
8736 return (NFS4ERR_OPENMODE);
8737 }
8738 ltype = F_RDLCK;
8739 break;
8740 case WRITE_LT:
8741 case WRITEW_LT:
8742 if ((sp->rs_share_access
8743 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8744 rfs4_dbe_unlock(sp->rs_dbe);
8745
8746 return (NFS4ERR_OPENMODE);
8747 }
8748 ltype = F_WRLCK;
8749 break;
8750 }
8751 } else
8752 ltype = F_UNLCK;
8753
8754 flock.l_type = ltype;
8755 flock.l_whence = 0; /* SEEK_SET */
8756 flock.l_start = offset;
8757 flock.l_len = length;
8758 flock.l_sysid = sysid;
8759 flock.l_pid = lsp->rls_locker->rl_pid;
8760
8761 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8762 if (flock.l_len < 0 || flock.l_start < 0) {
8763 rfs4_dbe_unlock(sp->rs_dbe);
8764 return (NFS4ERR_INVAL);
8765 }
8766
8767 /*
8768 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8769 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8770 */
8771 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8772
8773 vp = sp->rs_finfo->rf_vp;
8774 VN_HOLD(vp);
8775
8776 /*
8777 * We need to unlock sp before we call the underlying filesystem to
8778 * acquire the file lock.
8779 */
8780 rfs4_dbe_unlock(sp->rs_dbe);
8781
8782 error = setlock(vp, &flock, flag, cred);
8783
8784 /*
8785 * Make sure the file is still open. In a case the file was closed in
8786 * the meantime, clean the lock we acquired using the setlock() call
8787 * above, and return the appropriate error.
8788 */
8789 rfs4_dbe_lock(sp->rs_dbe);
8790 if (sp->rs_closed == TRUE) {
8791 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8792 rfs4_dbe_unlock(sp->rs_dbe);
8793
8794 VN_RELE(vp);
8795
8796 return (NFS4ERR_OLD_STATEID);
8797 }
8798 rfs4_dbe_unlock(sp->rs_dbe);
8799
8800 VN_RELE(vp);
8801
8802 if (error == 0) {
8803 rfs4_dbe_lock(lsp->rls_dbe);
8804 next_stateid(&lsp->rls_lockid);
8805 rfs4_dbe_unlock(lsp->rls_dbe);
8806 }
8807
8808 /*
8809 * N.B. We map error values to nfsv4 errors. This is differrent
8810 * than puterrno4 routine.
8811 */
8812 switch (error) {
8813 case 0:
8814 status = NFS4_OK;
8815 break;
8816 case EAGAIN:
8817 case EACCES: /* Old value */
8818 /* Can only get here if op is OP_LOCK */
8819 ASSERT(resop->resop == OP_LOCK);
8820 lres = &resop->nfs_resop4_u.oplock;
8821 status = NFS4ERR_DENIED;
8822 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8823 == NFS4ERR_EXPIRED)
8824 goto retry;
8825 break;
8826 case ENOLCK:
8827 status = NFS4ERR_DELAY;
8828 break;
8829 case EOVERFLOW:
8830 status = NFS4ERR_INVAL;
8831 break;
8832 case EINVAL:
8833 status = NFS4ERR_NOTSUPP;
8834 break;
8835 default:
8836 status = NFS4ERR_SERVERFAULT;
8837 break;
8838 }
8839
8840 return (status);
8841 }
8842
8843 /*ARGSUSED*/
8844 void
8845 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8846 struct svc_req *req, struct compound_state *cs)
8847 {
8848 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8849 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8850 nfsstat4 status;
8851 stateid4 *stateid;
8852 rfs4_lockowner_t *lo;
8853 rfs4_client_t *cp;
8854 rfs4_state_t *sp = NULL;
8855 rfs4_lo_state_t *lsp = NULL;
8856 bool_t ls_sw_held = FALSE;
8857 bool_t create = TRUE;
8858 bool_t lcreate = TRUE;
8859 bool_t dup_lock = FALSE;
8860 int rc;
8861
8862 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8863 LOCK4args *, args);
8864
8865 if (cs->vp == NULL) {
8866 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8867 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8868 cs, LOCK4res *, resp);
8869 return;
8870 }
8871
8872 if (args->locker.new_lock_owner) {
8873 /* Create a new lockowner for this instance */
8874 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8875
8876 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8877
8878 stateid = &olo->open_stateid;
8879 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8880 if (status != NFS4_OK) {
8881 NFS4_DEBUG(rfs4_debug,
8882 (CE_NOTE, "Get state failed in lock %d", status));
8883 *cs->statusp = resp->status = status;
8884 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8885 cs, LOCK4res *, resp);
8886 return;
8887 }
8888
8889 /* Ensure specified filehandle matches */
8890 if (cs->vp != sp->rs_finfo->rf_vp) {
8891 rfs4_state_rele(sp);
8892 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8893 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8894 cs, LOCK4res *, resp);
8895 return;
8896 }
8897
8898 /* hold off other access to open_owner while we tinker */
8899 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8900
8901 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8902 case NFS4_CHECK_STATEID_OLD:
8903 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8904 goto end;
8905 case NFS4_CHECK_STATEID_BAD:
8906 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8907 goto end;
8908 case NFS4_CHECK_STATEID_EXPIRED:
8909 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8910 goto end;
8911 case NFS4_CHECK_STATEID_UNCONFIRMED:
8912 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8913 goto end;
8914 case NFS4_CHECK_STATEID_CLOSED:
8915 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8916 goto end;
8917 case NFS4_CHECK_STATEID_OKAY:
8918 case NFS4_CHECK_STATEID_REPLAY:
8919 switch (rfs4_check_olo_seqid(olo->open_seqid,
8920 sp->rs_owner, resop)) {
8921 case NFS4_CHKSEQ_OKAY:
8922 if (rc == NFS4_CHECK_STATEID_OKAY)
8923 break;
8924 /*
8925 * This is replayed stateid; if seqid
8926 * matches next expected, then client
8927 * is using wrong seqid.
8928 */
8929 /* FALLTHROUGH */
8930 case NFS4_CHKSEQ_BAD:
8931 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8932 goto end;
8933 case NFS4_CHKSEQ_REPLAY:
8934 /* This is a duplicate LOCK request */
8935 dup_lock = TRUE;
8936
8937 /*
8938 * For a duplicate we do not want to
8939 * create a new lockowner as it should
8940 * already exist.
8941 * Turn off the lockowner create flag.
8942 */
8943 lcreate = FALSE;
8944 }
8945 break;
8946 }
8947
8948 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8949 if (lo == NULL) {
8950 NFS4_DEBUG(rfs4_debug,
8951 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8952 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8953 goto end;
8954 }
8955
8956 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8957 if (lsp == NULL) {
8958 rfs4_update_lease(sp->rs_owner->ro_client);
8959 /*
8960 * Only update theh open_seqid if this is not
8961 * a duplicate request
8962 */
8963 if (dup_lock == FALSE) {
8964 rfs4_update_open_sequence(sp->rs_owner);
8965 }
8966
8967 NFS4_DEBUG(rfs4_debug,
8968 (CE_NOTE, "rfs4_op_lock: no state"));
8969 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8970 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8971 rfs4_lockowner_rele(lo);
8972 goto end;
8973 }
8974
8975 /*
8976 * This is the new_lock_owner branch and the client is
8977 * supposed to be associating a new lock_owner with
8978 * the open file at this point. If we find that a
8979 * lock_owner/state association already exists and a
8980 * successful LOCK request was returned to the client,
8981 * an error is returned to the client since this is
8982 * not appropriate. The client should be using the
8983 * existing lock_owner branch.
8984 */
8985 if (dup_lock == FALSE && create == FALSE) {
8986 if (lsp->rls_lock_completed == TRUE) {
8987 *cs->statusp =
8988 resp->status = NFS4ERR_BAD_SEQID;
8989 rfs4_lockowner_rele(lo);
8990 goto end;
8991 }
8992 }
8993
8994 rfs4_update_lease(sp->rs_owner->ro_client);
8995
8996 /*
8997 * Only update theh open_seqid if this is not
8998 * a duplicate request
8999 */
9000 if (dup_lock == FALSE) {
9001 rfs4_update_open_sequence(sp->rs_owner);
9002 }
9003
9004 /*
9005 * If this is a duplicate lock request, just copy the
9006 * previously saved reply and return.
9007 */
9008 if (dup_lock == TRUE) {
9009 /* verify that lock_seqid's match */
9010 if (lsp->rls_seqid != olo->lock_seqid) {
9011 NFS4_DEBUG(rfs4_debug,
9012 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9013 "lsp->seqid=%d old->seqid=%d",
9014 lsp->rls_seqid, olo->lock_seqid));
9015 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9016 } else {
9017 rfs4_copy_reply(resop, &lsp->rls_reply);
9018 /*
9019 * Make sure to copy the just
9020 * retrieved reply status into the
9021 * overall compound status
9022 */
9023 *cs->statusp = resp->status;
9024 }
9025 rfs4_lockowner_rele(lo);
9026 goto end;
9027 }
9028
9029 rfs4_dbe_lock(lsp->rls_dbe);
9030
9031 /* Make sure to update the lock sequence id */
9032 lsp->rls_seqid = olo->lock_seqid;
9033
9034 NFS4_DEBUG(rfs4_debug,
9035 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9036
9037 /*
9038 * This is used to signify the newly created lockowner
9039 * stateid and its sequence number. The checks for
9040 * sequence number and increment don't occur on the
9041 * very first lock request for a lockowner.
9042 */
9043 lsp->rls_skip_seqid_check = TRUE;
9044
9045 /* hold off other access to lsp while we tinker */
9046 rfs4_sw_enter(&lsp->rls_sw);
9047 ls_sw_held = TRUE;
9048
9049 rfs4_dbe_unlock(lsp->rls_dbe);
9050
9051 rfs4_lockowner_rele(lo);
9052 } else {
9053 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9054 /* get lsp and hold the lock on the underlying file struct */
9055 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9056 != NFS4_OK) {
9057 *cs->statusp = resp->status = status;
9058 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9059 cs, LOCK4res *, resp);
9060 return;
9061 }
9062 create = FALSE; /* We didn't create lsp */
9063
9064 /* Ensure specified filehandle matches */
9065 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9066 rfs4_lo_state_rele(lsp, TRUE);
9067 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9068 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9069 cs, LOCK4res *, resp);
9070 return;
9071 }
9072
9073 /* hold off other access to lsp while we tinker */
9074 rfs4_sw_enter(&lsp->rls_sw);
9075 ls_sw_held = TRUE;
9076
9077 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9078 /*
9079 * The stateid looks like it was okay (expected to be
9080 * the next one)
9081 */
9082 case NFS4_CHECK_STATEID_OKAY:
9083 /*
9084 * The sequence id is now checked. Determine
9085 * if this is a replay or if it is in the
9086 * expected (next) sequence. In the case of a
9087 * replay, there are two replay conditions
9088 * that may occur. The first is the normal
9089 * condition where a LOCK is done with a
9090 * NFS4_OK response and the stateid is
9091 * updated. That case is handled below when
9092 * the stateid is identified as a REPLAY. The
9093 * second is the case where an error is
9094 * returned, like NFS4ERR_DENIED, and the
9095 * sequence number is updated but the stateid
9096 * is not updated. This second case is dealt
9097 * with here. So it may seem odd that the
9098 * stateid is okay but the sequence id is a
9099 * replay but it is okay.
9100 */
9101 switch (rfs4_check_lock_seqid(
9102 args->locker.locker4_u.lock_owner.lock_seqid,
9103 lsp, resop)) {
9104 case NFS4_CHKSEQ_REPLAY:
9105 if (resp->status != NFS4_OK) {
9106 /*
9107 * Here is our replay and need
9108 * to verify that the last
9109 * response was an error.
9110 */
9111 *cs->statusp = resp->status;
9112 goto end;
9113 }
9114 /*
9115 * This is done since the sequence id
9116 * looked like a replay but it didn't
9117 * pass our check so a BAD_SEQID is
9118 * returned as a result.
9119 */
9120 /*FALLTHROUGH*/
9121 case NFS4_CHKSEQ_BAD:
9122 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9123 goto end;
9124 case NFS4_CHKSEQ_OKAY:
9125 /* Everything looks okay move ahead */
9126 break;
9127 }
9128 break;
9129 case NFS4_CHECK_STATEID_OLD:
9130 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9131 goto end;
9132 case NFS4_CHECK_STATEID_BAD:
9133 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9134 goto end;
9135 case NFS4_CHECK_STATEID_EXPIRED:
9136 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9137 goto end;
9138 case NFS4_CHECK_STATEID_CLOSED:
9139 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9140 goto end;
9141 case NFS4_CHECK_STATEID_REPLAY:
9142 switch (rfs4_check_lock_seqid(
9143 args->locker.locker4_u.lock_owner.lock_seqid,
9144 lsp, resop)) {
9145 case NFS4_CHKSEQ_OKAY:
9146 /*
9147 * This is a replayed stateid; if
9148 * seqid matches the next expected,
9149 * then client is using wrong seqid.
9150 */
9151 case NFS4_CHKSEQ_BAD:
9152 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9153 goto end;
9154 case NFS4_CHKSEQ_REPLAY:
9155 rfs4_update_lease(lsp->rls_locker->rl_client);
9156 *cs->statusp = status = resp->status;
9157 goto end;
9158 }
9159 break;
9160 default:
9161 ASSERT(FALSE);
9162 break;
9163 }
9164
9165 rfs4_update_lock_sequence(lsp);
9166 rfs4_update_lease(lsp->rls_locker->rl_client);
9167 }
9168
9169 /*
9170 * NFS4 only allows locking on regular files, so
9171 * verify type of object.
9172 */
9173 if (cs->vp->v_type != VREG) {
9174 if (cs->vp->v_type == VDIR)
9175 status = NFS4ERR_ISDIR;
9176 else
9177 status = NFS4ERR_INVAL;
9178 goto out;
9179 }
9180
9181 cp = lsp->rls_state->rs_owner->ro_client;
9182
9183 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9184 status = NFS4ERR_GRACE;
9185 goto out;
9186 }
9187
9188 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9189 status = NFS4ERR_NO_GRACE;
9190 goto out;
9191 }
9192
9193 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9194 status = NFS4ERR_NO_GRACE;
9195 goto out;
9196 }
9197
9198 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9199 cs->deleg = TRUE;
9200
9201 status = rfs4_do_lock(lsp, args->locktype,
9202 args->offset, args->length, cs->cr, resop);
9203
9204 out:
9205 lsp->rls_skip_seqid_check = FALSE;
9206
9207 *cs->statusp = resp->status = status;
9208
9209 if (status == NFS4_OK) {
9210 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9211 lsp->rls_lock_completed = TRUE;
9212 }
9213 /*
9214 * Only update the "OPEN" response here if this was a new
9215 * lock_owner
9216 */
9217 if (sp)
9218 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9219
9220 rfs4_update_lock_resp(lsp, resop);
9221
9222 end:
9223 if (lsp) {
9224 if (ls_sw_held)
9225 rfs4_sw_exit(&lsp->rls_sw);
9226 /*
9227 * If an sp obtained, then the lsp does not represent
9228 * a lock on the file struct.
9229 */
9230 if (sp != NULL)
9231 rfs4_lo_state_rele(lsp, FALSE);
9232 else
9233 rfs4_lo_state_rele(lsp, TRUE);
9234 }
9235 if (sp) {
9236 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9237 rfs4_state_rele(sp);
9238 }
9239
9240 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9241 LOCK4res *, resp);
9242 }
9243
9244 /* free function for LOCK/LOCKT */
9245 static void
9246 lock_denied_free(nfs_resop4 *resop)
9247 {
9248 LOCK4denied *dp = NULL;
9249
9250 switch (resop->resop) {
9251 case OP_LOCK:
9252 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9253 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9254 break;
9255 case OP_LOCKT:
9256 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9257 dp = &resop->nfs_resop4_u.oplockt.denied;
9258 break;
9259 default:
9260 break;
9261 }
9262
9263 if (dp)
9264 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9265 }
9266
9267 /*ARGSUSED*/
9268 void
9269 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9270 struct svc_req *req, struct compound_state *cs)
9271 {
9272 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9273 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9274 nfsstat4 status;
9275 stateid4 *stateid = &args->lock_stateid;
9276 rfs4_lo_state_t *lsp;
9277
9278 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9279 LOCKU4args *, args);
9280
9281 if (cs->vp == NULL) {
9282 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9283 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9284 LOCKU4res *, resp);
9285 return;
9286 }
9287
9288 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9289 *cs->statusp = resp->status = status;
9290 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9291 LOCKU4res *, resp);
9292 return;
9293 }
9294
9295 /* Ensure specified filehandle matches */
9296 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9297 rfs4_lo_state_rele(lsp, TRUE);
9298 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9299 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9300 LOCKU4res *, resp);
9301 return;
9302 }
9303
9304 /* hold off other access to lsp while we tinker */
9305 rfs4_sw_enter(&lsp->rls_sw);
9306
9307 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9308 case NFS4_CHECK_STATEID_OKAY:
9309 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9310 != NFS4_CHKSEQ_OKAY) {
9311 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9312 goto end;
9313 }
9314 break;
9315 case NFS4_CHECK_STATEID_OLD:
9316 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9317 goto end;
9318 case NFS4_CHECK_STATEID_BAD:
9319 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9320 goto end;
9321 case NFS4_CHECK_STATEID_EXPIRED:
9322 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9323 goto end;
9324 case NFS4_CHECK_STATEID_CLOSED:
9325 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9326 goto end;
9327 case NFS4_CHECK_STATEID_REPLAY:
9328 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9329 case NFS4_CHKSEQ_OKAY:
9330 /*
9331 * This is a replayed stateid; if
9332 * seqid matches the next expected,
9333 * then client is using wrong seqid.
9334 */
9335 case NFS4_CHKSEQ_BAD:
9336 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9337 goto end;
9338 case NFS4_CHKSEQ_REPLAY:
9339 rfs4_update_lease(lsp->rls_locker->rl_client);
9340 *cs->statusp = status = resp->status;
9341 goto end;
9342 }
9343 break;
9344 default:
9345 ASSERT(FALSE);
9346 break;
9347 }
9348
9349 rfs4_update_lock_sequence(lsp);
9350 rfs4_update_lease(lsp->rls_locker->rl_client);
9351
9352 /*
9353 * NFS4 only allows locking on regular files, so
9354 * verify type of object.
9355 */
9356 if (cs->vp->v_type != VREG) {
9357 if (cs->vp->v_type == VDIR)
9358 status = NFS4ERR_ISDIR;
9359 else
9360 status = NFS4ERR_INVAL;
9361 goto out;
9362 }
9363
9364 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9365 status = NFS4ERR_GRACE;
9366 goto out;
9367 }
9368
9369 status = rfs4_do_lock(lsp, args->locktype,
9370 args->offset, args->length, cs->cr, resop);
9371
9372 out:
9373 *cs->statusp = resp->status = status;
9374
9375 if (status == NFS4_OK)
9376 resp->lock_stateid = lsp->rls_lockid.stateid;
9377
9378 rfs4_update_lock_resp(lsp, resop);
9379
9380 end:
9381 rfs4_sw_exit(&lsp->rls_sw);
9382 rfs4_lo_state_rele(lsp, TRUE);
9383
9384 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9385 LOCKU4res *, resp);
9386 }
9387
9388 /*
9389 * LOCKT is a best effort routine, the client can not be guaranteed that
9390 * the status return is still in effect by the time the reply is received.
9391 * They are numerous race conditions in this routine, but we are not required
9392 * and can not be accurate.
9393 */
9394 /*ARGSUSED*/
9395 void
9396 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9397 struct svc_req *req, struct compound_state *cs)
9398 {
9399 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9400 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9401 rfs4_lockowner_t *lo;
9402 rfs4_client_t *cp;
9403 bool_t create = FALSE;
9404 struct flock64 flk;
9405 int error;
9406 int flag = FREAD | FWRITE;
9407 int ltype;
9408 length4 posix_length;
9409 sysid_t sysid;
9410 pid_t pid;
9411
9412 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9413 LOCKT4args *, args);
9414
9415 if (cs->vp == NULL) {
9416 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9417 goto out;
9418 }
9419
9420 /*
9421 * NFS4 only allows locking on regular files, so
9422 * verify type of object.
9423 */
9424 if (cs->vp->v_type != VREG) {
9425 if (cs->vp->v_type == VDIR)
9426 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9427 else
9428 *cs->statusp = resp->status = NFS4ERR_INVAL;
9429 goto out;
9430 }
9431
9432 /*
9433 * Check out the clientid to ensure the server knows about it
9434 * so that we correctly inform the client of a server reboot.
9435 */
9436 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9437 == NULL) {
9438 *cs->statusp = resp->status =
9439 rfs4_check_clientid(&args->owner.clientid, 0);
9440 goto out;
9441 }
9442 if (rfs4_lease_expired(cp)) {
9443 rfs4_client_close(cp);
9444 /*
9445 * Protocol doesn't allow returning NFS4ERR_STALE as
9446 * other operations do on this check so STALE_CLIENTID
9447 * is returned instead
9448 */
9449 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9450 goto out;
9451 }
9452
9453 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9454 *cs->statusp = resp->status = NFS4ERR_GRACE;
9455 rfs4_client_rele(cp);
9456 goto out;
9457 }
9458 rfs4_client_rele(cp);
9459
9460 resp->status = NFS4_OK;
9461
9462 switch (args->locktype) {
9463 case READ_LT:
9464 case READW_LT:
9465 ltype = F_RDLCK;
9466 break;
9467 case WRITE_LT:
9468 case WRITEW_LT:
9469 ltype = F_WRLCK;
9470 break;
9471 }
9472
9473 posix_length = args->length;
9474 /* Check for zero length. To lock to end of file use all ones for V4 */
9475 if (posix_length == 0) {
9476 *cs->statusp = resp->status = NFS4ERR_INVAL;
9477 goto out;
9478 } else if (posix_length == (length4)(~0)) {
9479 posix_length = 0; /* Posix to end of file */
9480 }
9481
9482 /* Find or create a lockowner */
9483 lo = rfs4_findlockowner(&args->owner, &create);
9484
9485 if (lo) {
9486 pid = lo->rl_pid;
9487 if ((resp->status =
9488 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9489 goto err;
9490 } else {
9491 pid = 0;
9492 sysid = lockt_sysid;
9493 }
9494 retry:
9495 flk.l_type = ltype;
9496 flk.l_whence = 0; /* SEEK_SET */
9497 flk.l_start = args->offset;
9498 flk.l_len = posix_length;
9499 flk.l_sysid = sysid;
9500 flk.l_pid = pid;
9501 flag |= F_REMOTELOCK;
9502
9503 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9504
9505 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9506 if (flk.l_len < 0 || flk.l_start < 0) {
9507 resp->status = NFS4ERR_INVAL;
9508 goto err;
9509 }
9510 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9511 NULL, cs->cr, NULL);
9512
9513 /*
9514 * N.B. We map error values to nfsv4 errors. This is differrent
9515 * than puterrno4 routine.
9516 */
9517 switch (error) {
9518 case 0:
9519 if (flk.l_type == F_UNLCK)
9520 resp->status = NFS4_OK;
9521 else {
9522 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9523 goto retry;
9524 resp->status = NFS4ERR_DENIED;
9525 }
9526 break;
9527 case EOVERFLOW:
9528 resp->status = NFS4ERR_INVAL;
9529 break;
9530 case EINVAL:
9531 resp->status = NFS4ERR_NOTSUPP;
9532 break;
9533 default:
9534 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9535 error);
9536 resp->status = NFS4ERR_SERVERFAULT;
9537 break;
9538 }
9539
9540 err:
9541 if (lo)
9542 rfs4_lockowner_rele(lo);
9543 *cs->statusp = resp->status;
9544 out:
9545 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9546 LOCKT4res *, resp);
9547 }
9548
9549 int
9550 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9551 {
9552 int err;
9553 int cmd;
9554 vnode_t *vp;
9555 struct shrlock shr;
9556 struct shr_locowner shr_loco;
9557 int fflags = 0;
9558
9559 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9560 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9561
9562 if (sp->rs_closed)
9563 return (NFS4ERR_OLD_STATEID);
9564
9565 vp = sp->rs_finfo->rf_vp;
9566 ASSERT(vp);
9567
9568 shr.s_access = shr.s_deny = 0;
9569
9570 if (access & OPEN4_SHARE_ACCESS_READ) {
9571 fflags |= FREAD;
9572 shr.s_access |= F_RDACC;
9573 }
9574 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9575 fflags |= FWRITE;
9576 shr.s_access |= F_WRACC;
9577 }
9578 ASSERT(shr.s_access);
9579
9580 if (deny & OPEN4_SHARE_DENY_READ)
9581 shr.s_deny |= F_RDDNY;
9582 if (deny & OPEN4_SHARE_DENY_WRITE)
9583 shr.s_deny |= F_WRDNY;
9584
9585 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9586 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9587 shr_loco.sl_pid = shr.s_pid;
9588 shr_loco.sl_id = shr.s_sysid;
9589 shr.s_owner = (caddr_t)&shr_loco;
9590 shr.s_own_len = sizeof (shr_loco);
9591
9592 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9593
9594 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9595 if (err != 0) {
9596 if (err == EAGAIN)
9597 err = NFS4ERR_SHARE_DENIED;
9598 else
9599 err = puterrno4(err);
9600 return (err);
9601 }
9602
9603 sp->rs_share_access |= access;
9604 sp->rs_share_deny |= deny;
9605
9606 return (0);
9607 }
9608
9609 int
9610 rfs4_unshare(rfs4_state_t *sp)
9611 {
9612 int err;
9613 struct shrlock shr;
9614 struct shr_locowner shr_loco;
9615
9616 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9617
9618 if (sp->rs_closed || sp->rs_share_access == 0)
9619 return (0);
9620
9621 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9622 ASSERT(sp->rs_finfo->rf_vp);
9623
9624 shr.s_access = shr.s_deny = 0;
9625 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9626 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9627 shr_loco.sl_pid = shr.s_pid;
9628 shr_loco.sl_id = shr.s_sysid;
9629 shr.s_owner = (caddr_t)&shr_loco;
9630 shr.s_own_len = sizeof (shr_loco);
9631
9632 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9633 NULL);
9634 if (err != 0) {
9635 err = puterrno4(err);
9636 return (err);
9637 }
9638
9639 sp->rs_share_access = 0;
9640 sp->rs_share_deny = 0;
9641
9642 return (0);
9643
9644 }
9645
9646 static int
9647 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9648 {
9649 struct clist *wcl;
9650 count4 count = rok->data_len;
9651 int wlist_len;
9652
9653 wcl = args->wlist;
9654 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9655 return (FALSE);
9656 }
9657 wcl = args->wlist;
9658 rok->wlist_len = wlist_len;
9659 rok->wlist = wcl;
9660 return (TRUE);
9661 }
9662
9663 /* tunable to disable server referrals */
9664 int rfs4_no_referrals = 0;
9665
9666 /*
9667 * Find an NFS record in reparse point data.
9668 * Returns 0 for success and <0 or an errno value on failure.
9669 */
9670 int
9671 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9672 {
9673 int err;
9674 char *stype, *val;
9675 nvlist_t *nvl;
9676 nvpair_t *curr;
9677
9678 if ((nvl = reparse_init()) == NULL)
9679 return (-1);
9680
9681 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9682 reparse_free(nvl);
9683 return (err);
9684 }
9685
9686 curr = NULL;
9687 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9688 if ((stype = nvpair_name(curr)) == NULL) {
9689 reparse_free(nvl);
9690 return (-2);
9691 }
9692 if (strncasecmp(stype, "NFS", 3) == 0)
9693 break;
9694 }
9695
9696 if ((curr == NULL) ||
9697 (nvpair_value_string(curr, &val))) {
9698 reparse_free(nvl);
9699 return (-3);
9700 }
9701 *nvlp = nvl;
9702 *svcp = stype;
9703 *datap = val;
9704 return (0);
9705 }
9706
9707 int
9708 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9709 {
9710 nvlist_t *nvl;
9711 char *s, *d;
9712
9713 if (rfs4_no_referrals != 0)
9714 return (B_FALSE);
9715
9716 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9717 return (B_FALSE);
9718
9719 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9720 return (B_FALSE);
9721
9722 reparse_free(nvl);
9723
9724 return (B_TRUE);
9725 }
9726
9727 /*
9728 * There is a user-level copy of this routine in ref_subr.c.
9729 * Changes should be kept in sync.
9730 */
9731 static int
9732 nfs4_create_components(char *path, component4 *comp4)
9733 {
9734 int slen, plen, ncomp;
9735 char *ori_path, *nxtc, buf[MAXNAMELEN];
9736
9737 if (path == NULL)
9738 return (0);
9739
9740 plen = strlen(path) + 1; /* include the terminator */
9741 ori_path = path;
9742 ncomp = 0;
9743
9744 /* count number of components in the path */
9745 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9746 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9747 if ((slen = nxtc - path) == 0) {
9748 path = nxtc + 1;
9749 continue;
9750 }
9751
9752 if (comp4 != NULL) {
9753 bcopy(path, buf, slen);
9754 buf[slen] = '\0';
9755 (void) str_to_utf8(buf, &comp4[ncomp]);
9756 }
9757
9758 ncomp++; /* 1 valid component */
9759 path = nxtc + 1;
9760 }
9761 if (*nxtc == '\0' || *nxtc == '\n')
9762 break;
9763 }
9764
9765 return (ncomp);
9766 }
9767
9768 /*
9769 * There is a user-level copy of this routine in ref_subr.c.
9770 * Changes should be kept in sync.
9771 */
9772 static int
9773 make_pathname4(char *path, pathname4 *pathname)
9774 {
9775 int ncomp;
9776 component4 *comp4;
9777
9778 if (pathname == NULL)
9779 return (0);
9780
9781 if (path == NULL) {
9782 pathname->pathname4_val = NULL;
9783 pathname->pathname4_len = 0;
9784 return (0);
9785 }
9786
9787 /* count number of components to alloc buffer */
9788 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9789 pathname->pathname4_val = NULL;
9790 pathname->pathname4_len = 0;
9791 return (0);
9792 }
9793 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9794
9795 /* copy components into allocated buffer */
9796 ncomp = nfs4_create_components(path, comp4);
9797
9798 pathname->pathname4_val = comp4;
9799 pathname->pathname4_len = ncomp;
9800
9801 return (ncomp);
9802 }
9803
9804 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9805
9806 fs_locations4 *
9807 fetch_referral(vnode_t *vp, cred_t *cr)
9808 {
9809 nvlist_t *nvl;
9810 char *stype, *sdata;
9811 fs_locations4 *result;
9812 char buf[1024];
9813 size_t bufsize;
9814 XDR xdr;
9815 int err;
9816
9817 /*
9818 * Check attrs to ensure it's a reparse point
9819 */
9820 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9821 return (NULL);
9822
9823 /*
9824 * Look for an NFS record and get the type and data
9825 */
9826 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9827 return (NULL);
9828
9829 /*
9830 * With the type and data, upcall to get the referral
9831 */
9832 bufsize = sizeof (buf);
9833 bzero(buf, sizeof (buf));
9834 err = reparse_kderef((const char *)stype, (const char *)sdata,
9835 buf, &bufsize);
9836 reparse_free(nvl);
9837
9838 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9839 char *, stype, char *, sdata, char *, buf, int, err);
9840 if (err) {
9841 cmn_err(CE_NOTE,
9842 "reparsed daemon not running: unable to get referral (%d)",
9843 err);
9844 return (NULL);
9845 }
9846
9847 /*
9848 * We get an XDR'ed record back from the kderef call
9849 */
9850 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9851 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9852 err = xdr_fs_locations4(&xdr, result);
9853 XDR_DESTROY(&xdr);
9854 if (err != TRUE) {
9855 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9856 int, err);
9857 return (NULL);
9858 }
9859
9860 /*
9861 * Look at path to recover fs_root, ignoring the leading '/'
9862 */
9863 (void) make_pathname4(vp->v_path, &result->fs_root);
9864
9865 return (result);
9866 }
9867
9868 char *
9869 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9870 {
9871 fs_locations4 *fsl;
9872 fs_location4 *fs;
9873 char *server, *path, *symbuf;
9874 static char *prefix = "/net/";
9875 int i, size, npaths;
9876 uint_t len;
9877
9878 /* Get the referral */
9879 if ((fsl = fetch_referral(vp, cr)) == NULL)
9880 return (NULL);
9881
9882 /* Deal with only the first location and first server */
9883 fs = &fsl->locations_val[0];
9884 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9885 if (server == NULL) {
9886 rfs4_free_fs_locations4(fsl);
9887 kmem_free(fsl, sizeof (fs_locations4));
9888 return (NULL);
9889 }
9890
9891 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9892 size = strlen(prefix) + len;
9893 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9894 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9895
9896 /* Allocate the symlink buffer and fill it */
9897 symbuf = kmem_zalloc(size, KM_SLEEP);
9898 (void) strcat(symbuf, prefix);
9899 (void) strcat(symbuf, server);
9900 kmem_free(server, len);
9901
9902 npaths = 0;
9903 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9904 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9905 if (path == NULL)
9906 continue;
9907 (void) strcat(symbuf, "/");
9908 (void) strcat(symbuf, path);
9909 npaths++;
9910 kmem_free(path, len);
9911 }
9912
9913 rfs4_free_fs_locations4(fsl);
9914 kmem_free(fsl, sizeof (fs_locations4));
9915
9916 if (strsz != NULL)
9917 *strsz = size;
9918 return (symbuf);
9919 }
9920
9921 /*
9922 * Check to see if we have a downrev Solaris client, so that we
9923 * can send it a symlink instead of a referral.
9924 */
9925 int
9926 client_is_downrev(struct svc_req *req)
9927 {
9928 struct sockaddr *ca;
9929 rfs4_clntip_t *ci;
9930 bool_t create = FALSE;
9931 int is_downrev;
9932
9933 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9934 ASSERT(ca);
9935 ci = rfs4_find_clntip(ca, &create);
9936 if (ci == NULL)
9937 return (0);
9938 is_downrev = ci->ri_no_referrals;
9939 rfs4_dbe_rele(ci->ri_dbe);
9940 return (is_downrev);
9941 }
9942
9943 /*
9944 * Do the main work of handling HA-NFSv4 Resource Group failover on
9945 * Sun Cluster.
9946 * We need to detect whether any RG admin paths have been added or removed,
9947 * and adjust resources accordingly.
9948 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9949 * order to scale, the list and array of paths need to be held in more
9950 * suitable data structures.
9951 */
9952 static void
9953 hanfsv4_failover(nfs4_srv_t *nsrv4)
9954 {
9955 int i, start_grace, numadded_paths = 0;
9956 char **added_paths = NULL;
9957 rfs4_dss_path_t *dss_path;
9958
9959 /*
9960 * Note: currently, dss_pathlist cannot be NULL, since
9961 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9962 * make the latter dynamically specified too, the following will
9963 * need to be adjusted.
9964 */
9965
9966 /*
9967 * First, look for removed paths: RGs that have been failed-over
9968 * away from this node.
9969 * Walk the "currently-serving" dss_pathlist and, for each
9970 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9971 * from nfsd. If not, that RG path has been removed.
9972 *
9973 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9974 * any duplicates.
9975 */
9976 dss_path = nsrv4->dss_pathlist;
9977 do {
9978 int found = 0;
9979 char *path = dss_path->path;
9980
9981 /* used only for non-HA so may not be removed */
9982 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9983 dss_path = dss_path->next;
9984 continue;
9985 }
9986
9987 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9988 int cmpret;
9989 char *newpath = rfs4_dss_newpaths[i];
9990
9991 /*
9992 * Since nfsd has sorted rfs4_dss_newpaths for us,
9993 * once the return from strcmp is negative we know
9994 * we've passed the point where "path" should be,
9995 * and can stop searching: "path" has been removed.
9996 */
9997 cmpret = strcmp(path, newpath);
9998 if (cmpret < 0)
9999 break;
10000 if (cmpret == 0) {
10001 found = 1;
10002 break;
10003 }
10004 }
10005
10006 if (found == 0) {
10007 unsigned index = dss_path->index;
10008 rfs4_servinst_t *sip = dss_path->sip;
10009 rfs4_dss_path_t *path_next = dss_path->next;
10010
10011 /*
10012 * This path has been removed.
10013 * We must clear out the servinst reference to
10014 * it, since it's now owned by another
10015 * node: we should not attempt to touch it.
10016 */
10017 ASSERT(dss_path == sip->dss_paths[index]);
10018 sip->dss_paths[index] = NULL;
10019
10020 /* remove from "currently-serving" list, and destroy */
10021 remque(dss_path);
10022 /* allow for NUL */
10023 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10024 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10025
10026 dss_path = path_next;
10027 } else {
10028 /* path was found; not removed */
10029 dss_path = dss_path->next;
10030 }
10031 } while (dss_path != nsrv4->dss_pathlist);
10032
10033 /*
10034 * Now, look for added paths: RGs that have been failed-over
10035 * to this node.
10036 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10037 * for each path, check if it is on the "currently-serving"
10038 * dss_pathlist. If not, that RG path has been added.
10039 *
10040 * Note: we don't do duplicate detection here; nfsd does that for us.
10041 *
10042 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10043 * an upper bound for the size needed for added_paths[numadded_paths].
10044 */
10045
10046 /* probably more space than we need, but guaranteed to be enough */
10047 if (rfs4_dss_numnewpaths > 0) {
10048 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10049 added_paths = kmem_zalloc(sz, KM_SLEEP);
10050 }
10051
10052 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10053 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10054 int found = 0;
10055 char *newpath = rfs4_dss_newpaths[i];
10056
10057 dss_path = nsrv4->dss_pathlist;
10058 do {
10059 char *path = dss_path->path;
10060
10061 /* used only for non-HA */
10062 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10063 dss_path = dss_path->next;
10064 continue;
10065 }
10066
10067 if (strncmp(path, newpath, strlen(path)) == 0) {
10068 found = 1;
10069 break;
10070 }
10071
10072 dss_path = dss_path->next;
10073 } while (dss_path != nsrv4->dss_pathlist);
10074
10075 if (found == 0) {
10076 added_paths[numadded_paths] = newpath;
10077 numadded_paths++;
10078 }
10079 }
10080
10081 /* did we find any added paths? */
10082 if (numadded_paths > 0) {
10083
10084 /* create a new server instance, and start its grace period */
10085 start_grace = 1;
10086 /* CSTYLED */
10087 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10088
10089 /* read in the stable storage state from these paths */
10090 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10091
10092 /*
10093 * Multiple failovers during a grace period will cause
10094 * clients of the same resource group to be partitioned
10095 * into different server instances, with different
10096 * grace periods. Since clients of the same resource
10097 * group must be subject to the same grace period,
10098 * we need to reset all currently active grace periods.
10099 */
10100 rfs4_grace_reset_all(nsrv4);
10101 }
10102
10103 if (rfs4_dss_numnewpaths > 0)
10104 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10105 }