1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 */
36
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/cred.h>
41 #include <sys/buf.h>
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/sysmacros.h>
48 #include <sys/statvfs.h>
49 #include <sys/kmem.h>
50 #include <sys/dirent.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/systeminfo.h>
54 #include <sys/flock.h>
55 #include <sys/pathname.h>
56 #include <sys/nbmlock.h>
57 #include <sys/share.h>
58 #include <sys/atomic.h>
59 #include <sys/policy.h>
60 #include <sys/fem.h>
61 #include <sys/sdt.h>
62 #include <sys/ddi.h>
63 #include <sys/zone.h>
64
65 #include <fs/fs_reparse.h>
66
67 #include <rpc/types.h>
68 #include <rpc/auth.h>
69 #include <rpc/rpcsec_gss.h>
70 #include <rpc/svc.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfssys.h>
74 #include <nfs/export.h>
75 #include <nfs/nfs_cmd.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_drc.h>
79
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82
83 #include <inet/common.h>
84 #include <inet/ip.h>
85 #include <inet/ip6.h>
86
87 #include <sys/tsol/label.h>
88 #include <sys/tsol/tndb.h>
89
90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 extern struct svc_ops rdma_svc_ops;
95 extern int nfs_loaned_buffers;
96 /* End of Tunables */
97
98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99
100 /*
101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 */
103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104
105 /*
106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 * maxcount that isn't large enough to hold the smallest possible
109 * XDR encoded dirent.
110 *
111 * sizeof cookie (8 bytes) +
112 * sizeof name_len (4 bytes) +
113 * sizeof smallest (padded) name (4 bytes) +
114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 * sizeof attrlist4_len (4 bytes) +
116 * sizeof next boolean (4 bytes)
117 *
118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 * the smallest possible entry4 (assumes no attrs requested).
120 * sizeof nfsstat4 (4 bytes) +
121 * sizeof verifier4 (8 bytes) +
122 * sizeof entry4list bool (4 bytes) +
123 * sizeof entry4 (36 bytes) +
124 * sizeof eof bool (4 bytes)
125 *
126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 * required for a given name length. MAXNAMELEN is the maximum
130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 * to hold ., .., and the largest possible solaris dirent64.
134 */
135 #define RFS4_MINLEN_ENTRY4 36
136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 #define RFS4_MINLEN_RDDIR_BUF \
138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139
140 /*
141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 * but the dirents UFS gives us are already padded to 8, so just take
143 * what we're given. Dircount is only a hint anyway. Currently the
144 * solaris kernel is ASCII only, so there's no point in calling the
145 * UTF8 functions.
146 *
147 * dirent64: named padded to provide 8 byte struct alignment
148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 *
150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 *
152 */
153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155
156
157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
158
159 u_longlong_t nfs4_srv_caller_id;
160 uint_t nfs4_srv_vkey = 0;
161
162 void rfs4_init_compound_state(struct compound_state *);
163
164 static void nullfree(caddr_t);
165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 struct compound_state *);
175 static void rfs4_op_create_free(nfs_resop4 *resop);
176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 struct svc_req *, struct compound_state *);
180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 struct compound_state *);
182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 struct compound_state *);
185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void lock_denied_free(nfs_resop4 *);
193 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 struct compound_state *);
195 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 struct compound_state *);
201 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
202 struct svc_req *req, struct compound_state *cs);
203 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 struct compound_state *);
205 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 struct compound_state *);
207 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
208 struct svc_req *, struct compound_state *);
209 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
210 struct svc_req *, struct compound_state *);
211 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 struct compound_state *);
213 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 struct compound_state *);
219 static void rfs4_op_read_free(nfs_resop4 *);
220 static void rfs4_op_readdir_free(nfs_resop4 *resop);
221 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 struct compound_state *);
223 static void rfs4_op_readlink_free(nfs_resop4 *);
224 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
225 struct svc_req *, struct compound_state *);
226 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 struct compound_state *);
228 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 struct compound_state *);
242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *, struct compound_state *);
244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
245 struct svc_req *req, struct compound_state *);
246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
247 struct compound_state *);
248 static void rfs4_op_secinfo_free(nfs_resop4 *);
249
250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
251 struct svc_req *);
252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
254
255
256 /*
257 * translation table for attrs
258 */
259 struct nfs4_ntov_table {
260 union nfs4_attr_u *na;
261 uint8_t amap[NFS4_MAXNUM_ATTRS];
262 int attrcnt;
263 bool_t vfsstat;
264 };
265
266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
268 struct nfs4_svgetit_arg *sargp);
269
270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
273
274 static void hanfsv4_failover(nfs4_srv_t *);
275
276 fem_t *deleg_rdops;
277 fem_t *deleg_wrops;
278
279 /*
280 * NFS4 op dispatch table
281 */
282
283 struct rfsv4disp {
284 void (*dis_proc)(); /* proc to call */
285 void (*dis_resfree)(); /* frees space allocated by proc */
286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 };
288
289 static struct rfsv4disp rfsv4disptab[] = {
290 /*
291 * NFS VERSION 4
292 */
293
294 /* RFS_NULL = 0 */
295 {rfs4_op_illegal, nullfree, 0},
296
297 /* UNUSED = 1 */
298 {rfs4_op_illegal, nullfree, 0},
299
300 /* UNUSED = 2 */
301 {rfs4_op_illegal, nullfree, 0},
302
303 /* OP_ACCESS = 3 */
304 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
305
306 /* OP_CLOSE = 4 */
307 {rfs4_op_close, nullfree, 0},
308
309 /* OP_COMMIT = 5 */
310 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
311
312 /* OP_CREATE = 6 */
313 {rfs4_op_create, nullfree, 0},
314
315 /* OP_DELEGPURGE = 7 */
316 {rfs4_op_delegpurge, nullfree, 0},
317
318 /* OP_DELEGRETURN = 8 */
319 {rfs4_op_delegreturn, nullfree, 0},
320
321 /* OP_GETATTR = 9 */
322 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
323
324 /* OP_GETFH = 10 */
325 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
326
327 /* OP_LINK = 11 */
328 {rfs4_op_link, nullfree, 0},
329
330 /* OP_LOCK = 12 */
331 {rfs4_op_lock, lock_denied_free, 0},
332
333 /* OP_LOCKT = 13 */
334 {rfs4_op_lockt, lock_denied_free, 0},
335
336 /* OP_LOCKU = 14 */
337 {rfs4_op_locku, nullfree, 0},
338
339 /* OP_LOOKUP = 15 */
340 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
341
342 /* OP_LOOKUPP = 16 */
343 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
344
345 /* OP_NVERIFY = 17 */
346 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
347
348 /* OP_OPEN = 18 */
349 {rfs4_op_open, rfs4_free_reply, 0},
350
351 /* OP_OPENATTR = 19 */
352 {rfs4_op_openattr, nullfree, 0},
353
354 /* OP_OPEN_CONFIRM = 20 */
355 {rfs4_op_open_confirm, nullfree, 0},
356
357 /* OP_OPEN_DOWNGRADE = 21 */
358 {rfs4_op_open_downgrade, nullfree, 0},
359
360 /* OP_OPEN_PUTFH = 22 */
361 {rfs4_op_putfh, nullfree, RPC_ALL},
362
363 /* OP_PUTPUBFH = 23 */
364 {rfs4_op_putpubfh, nullfree, RPC_ALL},
365
366 /* OP_PUTROOTFH = 24 */
367 {rfs4_op_putrootfh, nullfree, RPC_ALL},
368
369 /* OP_READ = 25 */
370 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
371
372 /* OP_READDIR = 26 */
373 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
374
375 /* OP_READLINK = 27 */
376 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
377
378 /* OP_REMOVE = 28 */
379 {rfs4_op_remove, nullfree, 0},
380
381 /* OP_RENAME = 29 */
382 {rfs4_op_rename, nullfree, 0},
383
384 /* OP_RENEW = 30 */
385 {rfs4_op_renew, nullfree, 0},
386
387 /* OP_RESTOREFH = 31 */
388 {rfs4_op_restorefh, nullfree, RPC_ALL},
389
390 /* OP_SAVEFH = 32 */
391 {rfs4_op_savefh, nullfree, RPC_ALL},
392
393 /* OP_SECINFO = 33 */
394 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
395
396 /* OP_SETATTR = 34 */
397 {rfs4_op_setattr, nullfree, 0},
398
399 /* OP_SETCLIENTID = 35 */
400 {rfs4_op_setclientid, nullfree, 0},
401
402 /* OP_SETCLIENTID_CONFIRM = 36 */
403 {rfs4_op_setclientid_confirm, nullfree, 0},
404
405 /* OP_VERIFY = 37 */
406 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
407
408 /* OP_WRITE = 38 */
409 {rfs4_op_write, nullfree, 0},
410
411 /* OP_RELEASE_LOCKOWNER = 39 */
412 {rfs4_op_release_lockowner, nullfree, 0},
413 };
414
415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
416
417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
418
419 #ifdef DEBUG
420
421 int rfs4_fillone_debug = 0;
422 int rfs4_no_stub_access = 1;
423 int rfs4_rddir_debug = 0;
424
425 static char *rfs4_op_string[] = {
426 "rfs4_op_null",
427 "rfs4_op_1 unused",
428 "rfs4_op_2 unused",
429 "rfs4_op_access",
430 "rfs4_op_close",
431 "rfs4_op_commit",
432 "rfs4_op_create",
433 "rfs4_op_delegpurge",
434 "rfs4_op_delegreturn",
435 "rfs4_op_getattr",
436 "rfs4_op_getfh",
437 "rfs4_op_link",
438 "rfs4_op_lock",
439 "rfs4_op_lockt",
440 "rfs4_op_locku",
441 "rfs4_op_lookup",
442 "rfs4_op_lookupp",
443 "rfs4_op_nverify",
444 "rfs4_op_open",
445 "rfs4_op_openattr",
446 "rfs4_op_open_confirm",
447 "rfs4_op_open_downgrade",
448 "rfs4_op_putfh",
449 "rfs4_op_putpubfh",
450 "rfs4_op_putrootfh",
451 "rfs4_op_read",
452 "rfs4_op_readdir",
453 "rfs4_op_readlink",
454 "rfs4_op_remove",
455 "rfs4_op_rename",
456 "rfs4_op_renew",
457 "rfs4_op_restorefh",
458 "rfs4_op_savefh",
459 "rfs4_op_secinfo",
460 "rfs4_op_setattr",
461 "rfs4_op_setclientid",
462 "rfs4_op_setclient_confirm",
463 "rfs4_op_verify",
464 "rfs4_op_write",
465 "rfs4_op_release_lockowner",
466 "rfs4_op_illegal"
467 };
468 #endif
469
470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
471
472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
473
474 extern void rfs4_free_fs_locations4(fs_locations4 *);
475
476 #ifdef nextdp
477 #undef nextdp
478 #endif
479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
480
481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
489 NULL, NULL
490 };
491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
493 VOPNAME_READ, { .femop_read = deleg_wr_read },
494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
500 NULL, NULL
501 };
502
503 nfs4_srv_t *
504 nfs4_get_srv(void)
505 {
506 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
507 nfs4_srv_t *srv = ng->nfs4_srv;
508 ASSERT(srv != NULL);
509 return (srv);
510 }
511
512 void
513 rfs4_srv_zone_init(nfs_globals_t *ng)
514 {
515 nfs4_srv_t *nsrv4;
516 timespec32_t verf;
517
518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
519
520 /*
521 * The following algorithm attempts to find a unique verifier
522 * to be used as the write verifier returned from the server
523 * to the client. It is important that this verifier change
524 * whenever the server reboots. Of secondary importance, it
525 * is important for the verifier to be unique between two
526 * different servers.
527 *
528 * Thus, an attempt is made to use the system hostid and the
529 * current time in seconds when the nfssrv kernel module is
530 * loaded. It is assumed that an NFS server will not be able
531 * to boot and then to reboot in less than a second. If the
532 * hostid has not been set, then the current high resolution
533 * time is used. This will ensure different verifiers each
534 * time the server reboots and minimize the chances that two
535 * different servers will have the same verifier.
536 * XXX - this is broken on LP64 kernels.
537 */
538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
539 if (verf.tv_sec != 0) {
540 verf.tv_nsec = gethrestime_sec();
541 } else {
542 timespec_t tverf;
543
544 gethrestime(&tverf);
545 verf.tv_sec = (time_t)tverf.tv_sec;
546 verf.tv_nsec = tverf.tv_nsec;
547 }
548 nsrv4->write4verf = *(uint64_t *)&verf;
549
550 /* Used to manage create/destroy of server state */
551 nsrv4->nfs4_server_state = NULL;
552 nsrv4->nfs4_cur_servinst = NULL;
553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
558
559 ng->nfs4_srv = nsrv4;
560 }
561
562 void
563 rfs4_srv_zone_fini(nfs_globals_t *ng)
564 {
565 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
566
567 ng->nfs4_srv = NULL;
568
569 mutex_destroy(&nsrv4->deleg_lock);
570 mutex_destroy(&nsrv4->state_lock);
571 mutex_destroy(&nsrv4->servinst_lock);
572 rw_destroy(&nsrv4->deleg_policy_lock);
573
574 kmem_free(nsrv4, sizeof (*nsrv4));
575 }
576
577 void
578 rfs4_srvrinit(void)
579 {
580 extern void rfs4_attr_init();
581
582 rfs4_attr_init();
583
584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
585 rfs4_disable_delegation();
586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
587 &deleg_wrops) != 0) {
588 rfs4_disable_delegation();
589 fem_free(deleg_rdops);
590 }
591
592 nfs4_srv_caller_id = fs_new_caller_id();
593 lockt_sysid = lm_alloc_sysidt();
594 vsd_create(&nfs4_srv_vkey, NULL);
595 rfs4_state_g_init();
596 }
597
598 void
599 rfs4_srvrfini(void)
600 {
601 if (lockt_sysid != LM_NOSYSID) {
602 lm_free_sysidt(lockt_sysid);
603 lockt_sysid = LM_NOSYSID;
604 }
605
606 rfs4_state_g_fini();
607
608 fem_free(deleg_rdops);
609 fem_free(deleg_wrops);
610 }
611
612 void
613 rfs4_do_server_start(int server_upordown,
614 int srv_delegation, int cluster_booted)
615 {
616 nfs4_srv_t *nsrv4 = nfs4_get_srv();
617
618 /* Is this a warm start? */
619 if (server_upordown == NFS_SERVER_QUIESCED) {
620 cmn_err(CE_NOTE, "nfs4_srv: "
621 "server was previously quiesced; "
622 "existing NFSv4 state will be re-used");
623
624 /*
625 * HA-NFSv4: this is also the signal
626 * that a Resource Group failover has
627 * occurred.
628 */
629 if (cluster_booted)
630 hanfsv4_failover(nsrv4);
631 } else {
632 /* Cold start */
633 nsrv4->rfs4_start_time = 0;
634 rfs4_state_zone_init(nsrv4);
635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
636 nfs4_drc_hash);
637
638 /*
639 * The nfsd service was started with the -s option
640 * we need to pull in any state from the paths indicated.
641 */
642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
643 /* read in the stable storage state from these paths */
644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
645 rfs4_dss_newpaths);
646 }
647 }
648
649 /* Check if delegation is to be enabled */
650 if (srv_delegation != FALSE)
651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
652 }
653
654 void
655 rfs4_init_compound_state(struct compound_state *cs)
656 {
657 bzero(cs, sizeof (*cs));
658 cs->cont = TRUE;
659 cs->access = CS_ACCESS_DENIED;
660 cs->deleg = FALSE;
661 cs->mandlock = FALSE;
662 cs->fh.nfs_fh4_val = cs->fhbuf;
663 }
664
665 void
666 rfs4_grace_start(rfs4_servinst_t *sip)
667 {
668 rw_enter(&sip->rwlock, RW_WRITER);
669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
670 sip->grace_period = rfs4_grace_period;
671 rw_exit(&sip->rwlock);
672 }
673
674 /*
675 * returns true if the instance's grace period has never been started
676 */
677 int
678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
679 {
680 time_t start_time;
681
682 rw_enter(&sip->rwlock, RW_READER);
683 start_time = sip->start_time;
684 rw_exit(&sip->rwlock);
685
686 return (start_time == 0);
687 }
688
689 /*
690 * Indicates if server instance is within the
691 * grace period.
692 */
693 int
694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
695 {
696 time_t grace_expiry;
697
698 rw_enter(&sip->rwlock, RW_READER);
699 grace_expiry = sip->start_time + sip->grace_period;
700 rw_exit(&sip->rwlock);
701
702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
703 }
704
705 int
706 rfs4_clnt_in_grace(rfs4_client_t *cp)
707 {
708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
709
710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
711 }
712
713 /*
714 * reset all currently active grace periods
715 */
716 void
717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
718 {
719 rfs4_servinst_t *sip;
720
721 mutex_enter(&nsrv4->servinst_lock);
722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
723 if (rfs4_servinst_in_grace(sip))
724 rfs4_grace_start(sip);
725 mutex_exit(&nsrv4->servinst_lock);
726 }
727
728 /*
729 * start any new instances' grace periods
730 */
731 void
732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
733 {
734 rfs4_servinst_t *sip;
735
736 mutex_enter(&nsrv4->servinst_lock);
737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
738 if (rfs4_servinst_grace_new(sip))
739 rfs4_grace_start(sip);
740 mutex_exit(&nsrv4->servinst_lock);
741 }
742
743 static rfs4_dss_path_t *
744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
745 char *path, unsigned index)
746 {
747 size_t len;
748 rfs4_dss_path_t *dss_path;
749
750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
751
752 /*
753 * Take a copy of the string, since the original may be overwritten.
754 * Sadly, no strdup() in the kernel.
755 */
756 /* allow for NUL */
757 len = strlen(path) + 1;
758 dss_path->path = kmem_alloc(len, KM_SLEEP);
759 (void) strlcpy(dss_path->path, path, len);
760
761 /* associate with servinst */
762 dss_path->sip = sip;
763 dss_path->index = index;
764
765 /*
766 * Add to list of served paths.
767 * No locking required, as we're only ever called at startup.
768 */
769 if (nsrv4->dss_pathlist == NULL) {
770 /* this is the first dss_path_t */
771
772 /* needed for insque/remque */
773 dss_path->next = dss_path->prev = dss_path;
774
775 nsrv4->dss_pathlist = dss_path;
776 } else {
777 insque(dss_path, nsrv4->dss_pathlist);
778 }
779
780 return (dss_path);
781 }
782
783 /*
784 * Create a new server instance, and make it the currently active instance.
785 * Note that starting the grace period too early will reduce the clients'
786 * recovery window.
787 */
788 void
789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
790 int dss_npaths, char **dss_paths)
791 {
792 unsigned i;
793 rfs4_servinst_t *sip;
794 rfs4_oldstate_t *oldstate;
795
796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
798
799 sip->start_time = (time_t)0;
800 sip->grace_period = (time_t)0;
801 sip->next = NULL;
802 sip->prev = NULL;
803
804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
805 /*
806 * This initial dummy entry is required to setup for insque/remque.
807 * It must be skipped over whenever the list is traversed.
808 */
809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
810 /* insque/remque require initial list entry to be self-terminated */
811 oldstate->next = oldstate;
812 oldstate->prev = oldstate;
813 sip->oldstate = oldstate;
814
815
816 sip->dss_npaths = dss_npaths;
817 sip->dss_paths = kmem_alloc(dss_npaths *
818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
819
820 for (i = 0; i < dss_npaths; i++) {
821 /* CSTYLED */
822 sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
823 }
824
825 mutex_enter(&nsrv4->servinst_lock);
826 if (nsrv4->nfs4_cur_servinst != NULL) {
827 /* add to linked list */
828 sip->prev = nsrv4->nfs4_cur_servinst;
829 nsrv4->nfs4_cur_servinst->next = sip;
830 }
831 if (start_grace)
832 rfs4_grace_start(sip);
833 /* make the new instance "current" */
834 nsrv4->nfs4_cur_servinst = sip;
835
836 mutex_exit(&nsrv4->servinst_lock);
837 }
838
839 /*
840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
841 * all instances directly.
842 */
843 void
844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
845 {
846 rfs4_servinst_t *sip, *prev, *current;
847 #ifdef DEBUG
848 int n = 0;
849 #endif
850
851 mutex_enter(&nsrv4->servinst_lock);
852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
853 current = nsrv4->nfs4_cur_servinst;
854 nsrv4->nfs4_cur_servinst = NULL;
855 for (sip = current; sip != NULL; sip = prev) {
856 prev = sip->prev;
857 rw_destroy(&sip->rwlock);
858 if (sip->oldstate)
859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
860 if (sip->dss_paths)
861 kmem_free(sip->dss_paths,
862 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
863 kmem_free(sip, sizeof (rfs4_servinst_t));
864 #ifdef DEBUG
865 n++;
866 #endif
867 }
868 mutex_exit(&nsrv4->servinst_lock);
869 }
870
871 /*
872 * Assign the current server instance to a client_t.
873 * Should be called with cp->rc_dbe held.
874 */
875 void
876 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
877 rfs4_servinst_t *sip)
878 {
879 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
880
881 /*
882 * The lock ensures that if the current instance is in the process
883 * of changing, we will see the new one.
884 */
885 mutex_enter(&nsrv4->servinst_lock);
886 cp->rc_server_instance = sip;
887 mutex_exit(&nsrv4->servinst_lock);
888 }
889
890 rfs4_servinst_t *
891 rfs4_servinst(rfs4_client_t *cp)
892 {
893 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
894
895 return (cp->rc_server_instance);
896 }
897
898 /* ARGSUSED */
899 static void
900 nullfree(caddr_t resop)
901 {
902 }
903
904 /*
905 * This is a fall-through for invalid or not implemented (yet) ops
906 */
907 /* ARGSUSED */
908 static void
909 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
910 struct compound_state *cs)
911 {
912 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
913 }
914
915 /*
916 * Check if the security flavor, nfsnum, is in the flavor_list.
917 */
918 bool_t
919 in_flavor_list(int nfsnum, int *flavor_list, int count)
920 {
921 int i;
922
923 for (i = 0; i < count; i++) {
924 if (nfsnum == flavor_list[i])
925 return (TRUE);
926 }
927 return (FALSE);
928 }
929
930 /*
931 * Used by rfs4_op_secinfo to get the security information from the
932 * export structure associated with the component.
933 */
934 /* ARGSUSED */
935 static nfsstat4
936 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
937 {
938 int error, different_export = 0;
939 vnode_t *dvp, *vp;
940 struct exportinfo *exi = NULL;
941 fid_t fid;
942 uint_t count, i;
943 secinfo4 *resok_val;
944 struct secinfo *secp;
945 seconfig_t *si;
946 bool_t did_traverse = FALSE;
947 int dotdot, walk;
948 nfs_export_t *ne = nfs_get_export();
949
950 dvp = cs->vp;
951 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
952
953 /*
954 * If dotdotting, then need to check whether it's above the
955 * root of a filesystem, or above an export point.
956 */
957 if (dotdot) {
958
959 /*
960 * If dotdotting at the root of a filesystem, then
961 * need to traverse back to the mounted-on filesystem
962 * and do the dotdot lookup there.
963 */
964 if (cs->vp->v_flag & VROOT) {
965
966 /*
967 * If at the system root, then can
968 * go up no further.
969 */
970 if (VN_CMP(dvp, ZONE_ROOTVP()))
971 return (puterrno4(ENOENT));
972
973 /*
974 * Traverse back to the mounted-on filesystem
975 */
976 dvp = untraverse(cs->vp);
977
978 /*
979 * Set the different_export flag so we remember
980 * to pick up a new exportinfo entry for
981 * this new filesystem.
982 */
983 different_export = 1;
984 } else {
985
986 /*
987 * If dotdotting above an export point then set
988 * the different_export to get new export info.
989 */
990 different_export = nfs_exported(cs->exi, cs->vp);
991 }
992 }
993
994 /*
995 * Get the vnode for the component "nm".
996 */
997 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
998 NULL, NULL, NULL);
999 if (error)
1000 return (puterrno4(error));
1001
1002 /*
1003 * If the vnode is in a pseudo filesystem, or if the security flavor
1004 * used in the request is valid but not an explicitly shared flavor,
1005 * or the access bit indicates that this is a limited access,
1006 * check whether this vnode is visible.
1007 */
1008 if (!different_export &&
1009 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1010 cs->access & CS_ACCESS_LIMITED)) {
1011 if (! nfs_visible(cs->exi, vp, &different_export)) {
1012 VN_RELE(vp);
1013 return (puterrno4(ENOENT));
1014 }
1015 }
1016
1017 /*
1018 * If it's a mountpoint, then traverse it.
1019 */
1020 if (vn_ismntpt(vp)) {
1021 if ((error = traverse(&vp)) != 0) {
1022 VN_RELE(vp);
1023 return (puterrno4(error));
1024 }
1025 /* remember that we had to traverse mountpoint */
1026 did_traverse = TRUE;
1027 different_export = 1;
1028 } else if (vp->v_vfsp != dvp->v_vfsp) {
1029 /*
1030 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1031 * then vp is probably an LOFS object. We don't need the
1032 * realvp, we just need to know that we might have crossed
1033 * a server fs boundary and need to call checkexport4.
1034 * (LOFS lookup hides server fs mountpoints, and actually calls
1035 * traverse)
1036 */
1037 different_export = 1;
1038 }
1039
1040 /*
1041 * Get the export information for it.
1042 */
1043 if (different_export) {
1044
1045 bzero(&fid, sizeof (fid));
1046 fid.fid_len = MAXFIDSZ;
1047 error = vop_fid_pseudo(vp, &fid);
1048 if (error) {
1049 VN_RELE(vp);
1050 return (puterrno4(error));
1051 }
1052
1053 if (dotdot)
1054 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1055 else
1056 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1057
1058 if (exi == NULL) {
1059 if (did_traverse == TRUE) {
1060 /*
1061 * If this vnode is a mounted-on vnode,
1062 * but the mounted-on file system is not
1063 * exported, send back the secinfo for
1064 * the exported node that the mounted-on
1065 * vnode lives in.
1066 */
1067 exi = cs->exi;
1068 } else {
1069 VN_RELE(vp);
1070 return (puterrno4(EACCES));
1071 }
1072 }
1073 } else {
1074 exi = cs->exi;
1075 }
1076 ASSERT(exi != NULL);
1077
1078
1079 /*
1080 * Create the secinfo result based on the security information
1081 * from the exportinfo structure (exi).
1082 *
1083 * Return all flavors for a pseudo node.
1084 * For a real export node, return the flavor that the client
1085 * has access with.
1086 */
1087 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1088 if (PSEUDO(exi)) {
1089 count = exi->exi_export.ex_seccnt; /* total sec count */
1090 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1091 secp = exi->exi_export.ex_secinfo;
1092
1093 for (i = 0; i < count; i++) {
1094 si = &secp[i].s_secinfo;
1095 resok_val[i].flavor = si->sc_rpcnum;
1096 if (resok_val[i].flavor == RPCSEC_GSS) {
1097 rpcsec_gss_info *info;
1098
1099 info = &resok_val[i].flavor_info;
1100 info->qop = si->sc_qop;
1101 info->service = (rpc_gss_svc_t)si->sc_service;
1102
1103 /* get oid opaque data */
1104 info->oid.sec_oid4_len =
1105 si->sc_gss_mech_type->length;
1106 info->oid.sec_oid4_val = kmem_alloc(
1107 si->sc_gss_mech_type->length, KM_SLEEP);
1108 bcopy(
1109 si->sc_gss_mech_type->elements,
1110 info->oid.sec_oid4_val,
1111 info->oid.sec_oid4_len);
1112 }
1113 }
1114 resp->SECINFO4resok_len = count;
1115 resp->SECINFO4resok_val = resok_val;
1116 } else {
1117 int ret_cnt = 0, k = 0;
1118 int *flavor_list;
1119
1120 count = exi->exi_export.ex_seccnt; /* total sec count */
1121 secp = exi->exi_export.ex_secinfo;
1122
1123 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1124 /* find out which flavors to return */
1125 for (i = 0; i < count; i ++) {
1126 int access, flavor, perm;
1127
1128 flavor = secp[i].s_secinfo.sc_nfsnum;
1129 perm = secp[i].s_flags;
1130
1131 access = nfsauth4_secinfo_access(exi, cs->req,
1132 flavor, perm, cs->basecr);
1133
1134 if (! (access & NFSAUTH_DENIED) &&
1135 ! (access & NFSAUTH_WRONGSEC)) {
1136 flavor_list[ret_cnt] = flavor;
1137 ret_cnt++;
1138 }
1139 }
1140
1141 /* Create the returning SECINFO value */
1142 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1143
1144 for (i = 0; i < count; i++) {
1145 /*
1146 * If the flavor is in the flavor list,
1147 * fill in resok_val.
1148 */
1149 si = &secp[i].s_secinfo;
1150 if (in_flavor_list(si->sc_nfsnum,
1151 flavor_list, ret_cnt)) {
1152 resok_val[k].flavor = si->sc_rpcnum;
1153 if (resok_val[k].flavor == RPCSEC_GSS) {
1154 rpcsec_gss_info *info;
1155
1156 info = &resok_val[k].flavor_info;
1157 info->qop = si->sc_qop;
1158 info->service = (rpc_gss_svc_t)
1159 si->sc_service;
1160
1161 /* get oid opaque data */
1162 info->oid.sec_oid4_len =
1163 si->sc_gss_mech_type->length;
1164 info->oid.sec_oid4_val = kmem_alloc(
1165 si->sc_gss_mech_type->length,
1166 KM_SLEEP);
1167 bcopy(si->sc_gss_mech_type->elements,
1168 info->oid.sec_oid4_val,
1169 info->oid.sec_oid4_len);
1170 }
1171 k++;
1172 }
1173 if (k >= ret_cnt)
1174 break;
1175 }
1176 resp->SECINFO4resok_len = ret_cnt;
1177 resp->SECINFO4resok_val = resok_val;
1178 kmem_free(flavor_list, count * sizeof (int));
1179 }
1180
1181 VN_RELE(vp);
1182 return (NFS4_OK);
1183 }
1184
1185 /*
1186 * SECINFO (Operation 33): Obtain required security information on
1187 * the component name in the format of (security-mechanism-oid, qop, service)
1188 * triplets.
1189 */
1190 /* ARGSUSED */
1191 static void
1192 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1193 struct compound_state *cs)
1194 {
1195 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1196 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1197 utf8string *utfnm = &args->name;
1198 uint_t len;
1199 char *nm;
1200 struct sockaddr *ca;
1201 char *name = NULL;
1202 nfsstat4 status = NFS4_OK;
1203
1204 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1205 SECINFO4args *, args);
1206
1207 /*
1208 * Current file handle (cfh) should have been set before getting
1209 * into this function. If not, return error.
1210 */
1211 if (cs->vp == NULL) {
1212 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1213 goto out;
1214 }
1215
1216 if (cs->vp->v_type != VDIR) {
1217 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1218 goto out;
1219 }
1220
1221 /*
1222 * Verify the component name. If failed, error out, but
1223 * do not error out if the component name is a "..".
1224 * SECINFO will return its parents secinfo data for SECINFO "..".
1225 */
1226 status = utf8_dir_verify(utfnm);
1227 if (status != NFS4_OK) {
1228 if (utfnm->utf8string_len != 2 ||
1229 utfnm->utf8string_val[0] != '.' ||
1230 utfnm->utf8string_val[1] != '.') {
1231 *cs->statusp = resp->status = status;
1232 goto out;
1233 }
1234 }
1235
1236 nm = utf8_to_str(utfnm, &len, NULL);
1237 if (nm == NULL) {
1238 *cs->statusp = resp->status = NFS4ERR_INVAL;
1239 goto out;
1240 }
1241
1242 if (len > MAXNAMELEN) {
1243 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1244 kmem_free(nm, len);
1245 goto out;
1246 }
1247
1248 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1249 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1250 MAXPATHLEN + 1);
1251
1252 if (name == NULL) {
1253 *cs->statusp = resp->status = NFS4ERR_INVAL;
1254 kmem_free(nm, len);
1255 goto out;
1256 }
1257
1258
1259 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1260
1261 if (name != nm)
1262 kmem_free(name, MAXPATHLEN + 1);
1263 kmem_free(nm, len);
1264
1265 out:
1266 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1267 SECINFO4res *, resp);
1268 }
1269
1270 /*
1271 * Free SECINFO result.
1272 */
1273 /* ARGSUSED */
1274 static void
1275 rfs4_op_secinfo_free(nfs_resop4 *resop)
1276 {
1277 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1278 int count, i;
1279 secinfo4 *resok_val;
1280
1281 /* If this is not an Ok result, nothing to free. */
1282 if (resp->status != NFS4_OK) {
1283 return;
1284 }
1285
1286 count = resp->SECINFO4resok_len;
1287 resok_val = resp->SECINFO4resok_val;
1288
1289 for (i = 0; i < count; i++) {
1290 if (resok_val[i].flavor == RPCSEC_GSS) {
1291 rpcsec_gss_info *info;
1292
1293 info = &resok_val[i].flavor_info;
1294 kmem_free(info->oid.sec_oid4_val,
1295 info->oid.sec_oid4_len);
1296 }
1297 }
1298 kmem_free(resok_val, count * sizeof (secinfo4));
1299 resp->SECINFO4resok_len = 0;
1300 resp->SECINFO4resok_val = NULL;
1301 }
1302
1303 /* ARGSUSED */
1304 static void
1305 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1306 struct compound_state *cs)
1307 {
1308 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1309 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1310 int error;
1311 vnode_t *vp;
1312 struct vattr va;
1313 int checkwriteperm;
1314 cred_t *cr = cs->cr;
1315 bslabel_t *clabel, *slabel;
1316 ts_label_t *tslabel;
1317 boolean_t admin_low_client;
1318
1319 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1320 ACCESS4args *, args);
1321
1322 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1323 if (cs->access == CS_ACCESS_DENIED) {
1324 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1325 goto out;
1326 }
1327 #endif
1328 if (cs->vp == NULL) {
1329 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1330 goto out;
1331 }
1332
1333 ASSERT(cr != NULL);
1334
1335 vp = cs->vp;
1336
1337 /*
1338 * If the file system is exported read only, it is not appropriate
1339 * to check write permissions for regular files and directories.
1340 * Special files are interpreted by the client, so the underlying
1341 * permissions are sent back to the client for interpretation.
1342 */
1343 if (rdonly4(req, cs) &&
1344 (vp->v_type == VREG || vp->v_type == VDIR))
1345 checkwriteperm = 0;
1346 else
1347 checkwriteperm = 1;
1348
1349 /*
1350 * XXX
1351 * We need the mode so that we can correctly determine access
1352 * permissions relative to a mandatory lock file. Access to
1353 * mandatory lock files is denied on the server, so it might
1354 * as well be reflected to the server during the open.
1355 */
1356 va.va_mask = AT_MODE;
1357 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1358 if (error) {
1359 *cs->statusp = resp->status = puterrno4(error);
1360 goto out;
1361 }
1362 resp->access = 0;
1363 resp->supported = 0;
1364
1365 if (is_system_labeled()) {
1366 ASSERT(req->rq_label != NULL);
1367 clabel = req->rq_label;
1368 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1369 "got client label from request(1)",
1370 struct svc_req *, req);
1371 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1372 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1373 *cs->statusp = resp->status = puterrno4(EACCES);
1374 goto out;
1375 }
1376 slabel = label2bslabel(tslabel);
1377 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1378 char *, "got server label(1) for vp(2)",
1379 bslabel_t *, slabel, vnode_t *, vp);
1380
1381 admin_low_client = B_FALSE;
1382 } else
1383 admin_low_client = B_TRUE;
1384 }
1385
1386 if (args->access & ACCESS4_READ) {
1387 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1388 if (!error && !MANDLOCK(vp, va.va_mode) &&
1389 (!is_system_labeled() || admin_low_client ||
1390 bldominates(clabel, slabel)))
1391 resp->access |= ACCESS4_READ;
1392 resp->supported |= ACCESS4_READ;
1393 }
1394 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1395 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1396 if (!error && (!is_system_labeled() || admin_low_client ||
1397 bldominates(clabel, slabel)))
1398 resp->access |= ACCESS4_LOOKUP;
1399 resp->supported |= ACCESS4_LOOKUP;
1400 }
1401 if (checkwriteperm &&
1402 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1403 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1404 if (!error && !MANDLOCK(vp, va.va_mode) &&
1405 (!is_system_labeled() || admin_low_client ||
1406 blequal(clabel, slabel)))
1407 resp->access |=
1408 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1409 resp->supported |=
1410 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1411 }
1412
1413 if (checkwriteperm &&
1414 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1415 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1416 if (!error && (!is_system_labeled() || admin_low_client ||
1417 blequal(clabel, slabel)))
1418 resp->access |= ACCESS4_DELETE;
1419 resp->supported |= ACCESS4_DELETE;
1420 }
1421 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1422 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1423 if (!error && !MANDLOCK(vp, va.va_mode) &&
1424 (!is_system_labeled() || admin_low_client ||
1425 bldominates(clabel, slabel)))
1426 resp->access |= ACCESS4_EXECUTE;
1427 resp->supported |= ACCESS4_EXECUTE;
1428 }
1429
1430 if (is_system_labeled() && !admin_low_client)
1431 label_rele(tslabel);
1432
1433 *cs->statusp = resp->status = NFS4_OK;
1434 out:
1435 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1436 ACCESS4res *, resp);
1437 }
1438
1439 /* ARGSUSED */
1440 static void
1441 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1442 struct compound_state *cs)
1443 {
1444 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1445 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1446 int error;
1447 vnode_t *vp = cs->vp;
1448 cred_t *cr = cs->cr;
1449 vattr_t va;
1450 nfs4_srv_t *nsrv4;
1451
1452 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1453 COMMIT4args *, args);
1454
1455 if (vp == NULL) {
1456 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1457 goto out;
1458 }
1459 if (cs->access == CS_ACCESS_DENIED) {
1460 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1461 goto out;
1462 }
1463
1464 if (args->offset + args->count < args->offset) {
1465 *cs->statusp = resp->status = NFS4ERR_INVAL;
1466 goto out;
1467 }
1468
1469 va.va_mask = AT_UID;
1470 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1471
1472 /*
1473 * If we can't get the attributes, then we can't do the
1474 * right access checking. So, we'll fail the request.
1475 */
1476 if (error) {
1477 *cs->statusp = resp->status = puterrno4(error);
1478 goto out;
1479 }
1480 if (rdonly4(req, cs)) {
1481 *cs->statusp = resp->status = NFS4ERR_ROFS;
1482 goto out;
1483 }
1484
1485 if (vp->v_type != VREG) {
1486 if (vp->v_type == VDIR)
1487 resp->status = NFS4ERR_ISDIR;
1488 else
1489 resp->status = NFS4ERR_INVAL;
1490 *cs->statusp = resp->status;
1491 goto out;
1492 }
1493
1494 if (crgetuid(cr) != va.va_uid &&
1495 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1496 *cs->statusp = resp->status = puterrno4(error);
1497 goto out;
1498 }
1499
1500 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1501
1502 if (error) {
1503 *cs->statusp = resp->status = puterrno4(error);
1504 goto out;
1505 }
1506
1507 nsrv4 = nfs4_get_srv();
1508 *cs->statusp = resp->status = NFS4_OK;
1509 resp->writeverf = nsrv4->write4verf;
1510 out:
1511 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1512 COMMIT4res *, resp);
1513 }
1514
1515 /*
1516 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1517 * was completed. It does the nfsv4 create for special files.
1518 */
1519 /* ARGSUSED */
1520 static vnode_t *
1521 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1522 struct compound_state *cs, vattr_t *vap, char *nm)
1523 {
1524 int error;
1525 cred_t *cr = cs->cr;
1526 vnode_t *dvp = cs->vp;
1527 vnode_t *vp = NULL;
1528 int mode;
1529 enum vcexcl excl;
1530
1531 switch (args->type) {
1532 case NF4CHR:
1533 case NF4BLK:
1534 if (secpolicy_sys_devices(cr) != 0) {
1535 *cs->statusp = resp->status = NFS4ERR_PERM;
1536 return (NULL);
1537 }
1538 if (args->type == NF4CHR)
1539 vap->va_type = VCHR;
1540 else
1541 vap->va_type = VBLK;
1542 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1543 args->ftype4_u.devdata.specdata2);
1544 vap->va_mask |= AT_RDEV;
1545 break;
1546 case NF4SOCK:
1547 vap->va_type = VSOCK;
1548 break;
1549 case NF4FIFO:
1550 vap->va_type = VFIFO;
1551 break;
1552 default:
1553 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1554 return (NULL);
1555 }
1556
1557 /*
1558 * Must specify the mode.
1559 */
1560 if (!(vap->va_mask & AT_MODE)) {
1561 *cs->statusp = resp->status = NFS4ERR_INVAL;
1562 return (NULL);
1563 }
1564
1565 excl = EXCL;
1566
1567 mode = 0;
1568
1569 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1570 if (error) {
1571 *cs->statusp = resp->status = puterrno4(error);
1572 return (NULL);
1573 }
1574 return (vp);
1575 }
1576
1577 /*
1578 * nfsv4 create is used to create non-regular files. For regular files,
1579 * use nfsv4 open.
1580 */
1581 /* ARGSUSED */
1582 static void
1583 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1584 struct compound_state *cs)
1585 {
1586 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1587 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1588 int error;
1589 struct vattr bva, iva, iva2, ava, *vap;
1590 cred_t *cr = cs->cr;
1591 vnode_t *dvp = cs->vp;
1592 vnode_t *vp = NULL;
1593 vnode_t *realvp;
1594 char *nm, *lnm;
1595 uint_t len, llen;
1596 int syncval = 0;
1597 struct nfs4_svgetit_arg sarg;
1598 struct nfs4_ntov_table ntov;
1599 struct statvfs64 sb;
1600 nfsstat4 status;
1601 struct sockaddr *ca;
1602 char *name = NULL;
1603 char *lname = NULL;
1604
1605 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1606 CREATE4args *, args);
1607
1608 resp->attrset = 0;
1609
1610 if (dvp == NULL) {
1611 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1612 goto out;
1613 }
1614
1615 /*
1616 * If there is an unshared filesystem mounted on this vnode,
1617 * do not allow to create an object in this directory.
1618 */
1619 if (vn_ismntpt(dvp)) {
1620 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1621 goto out;
1622 }
1623
1624 /* Verify that type is correct */
1625 switch (args->type) {
1626 case NF4LNK:
1627 case NF4BLK:
1628 case NF4CHR:
1629 case NF4SOCK:
1630 case NF4FIFO:
1631 case NF4DIR:
1632 break;
1633 default:
1634 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1635 goto out;
1636 };
1637
1638 if (cs->access == CS_ACCESS_DENIED) {
1639 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1640 goto out;
1641 }
1642 if (dvp->v_type != VDIR) {
1643 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1644 goto out;
1645 }
1646 status = utf8_dir_verify(&args->objname);
1647 if (status != NFS4_OK) {
1648 *cs->statusp = resp->status = status;
1649 goto out;
1650 }
1651
1652 if (rdonly4(req, cs)) {
1653 *cs->statusp = resp->status = NFS4ERR_ROFS;
1654 goto out;
1655 }
1656
1657 /*
1658 * Name of newly created object
1659 */
1660 nm = utf8_to_fn(&args->objname, &len, NULL);
1661 if (nm == NULL) {
1662 *cs->statusp = resp->status = NFS4ERR_INVAL;
1663 goto out;
1664 }
1665
1666 if (len > MAXNAMELEN) {
1667 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1668 kmem_free(nm, len);
1669 goto out;
1670 }
1671
1672 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1673 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1674 MAXPATHLEN + 1);
1675
1676 if (name == NULL) {
1677 *cs->statusp = resp->status = NFS4ERR_INVAL;
1678 kmem_free(nm, len);
1679 goto out;
1680 }
1681
1682 resp->attrset = 0;
1683
1684 sarg.sbp = &sb;
1685 sarg.is_referral = B_FALSE;
1686 nfs4_ntov_table_init(&ntov);
1687
1688 status = do_rfs4_set_attrs(&resp->attrset,
1689 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1690
1691 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1692 status = NFS4ERR_INVAL;
1693
1694 if (status != NFS4_OK) {
1695 *cs->statusp = resp->status = status;
1696 if (name != nm)
1697 kmem_free(name, MAXPATHLEN + 1);
1698 kmem_free(nm, len);
1699 nfs4_ntov_table_free(&ntov, &sarg);
1700 resp->attrset = 0;
1701 goto out;
1702 }
1703
1704 /* Get "before" change value */
1705 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1706 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1707 if (error) {
1708 *cs->statusp = resp->status = puterrno4(error);
1709 if (name != nm)
1710 kmem_free(name, MAXPATHLEN + 1);
1711 kmem_free(nm, len);
1712 nfs4_ntov_table_free(&ntov, &sarg);
1713 resp->attrset = 0;
1714 goto out;
1715 }
1716 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1717
1718 vap = sarg.vap;
1719
1720 /*
1721 * Set the default initial values for attributes when the parent
1722 * directory does not have the VSUID/VSGID bit set and they have
1723 * not been specified in createattrs.
1724 */
1725 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1726 vap->va_uid = crgetuid(cr);
1727 vap->va_mask |= AT_UID;
1728 }
1729 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1730 vap->va_gid = crgetgid(cr);
1731 vap->va_mask |= AT_GID;
1732 }
1733
1734 vap->va_mask |= AT_TYPE;
1735 switch (args->type) {
1736 case NF4DIR:
1737 vap->va_type = VDIR;
1738 if ((vap->va_mask & AT_MODE) == 0) {
1739 vap->va_mode = 0700; /* default: owner rwx only */
1740 vap->va_mask |= AT_MODE;
1741 }
1742 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1743 if (error)
1744 break;
1745
1746 /*
1747 * Get the initial "after" sequence number, if it fails,
1748 * set to zero
1749 */
1750 iva.va_mask = AT_SEQ;
1751 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1752 iva.va_seq = 0;
1753 break;
1754 case NF4LNK:
1755 vap->va_type = VLNK;
1756 if ((vap->va_mask & AT_MODE) == 0) {
1757 vap->va_mode = 0700; /* default: owner rwx only */
1758 vap->va_mask |= AT_MODE;
1759 }
1760
1761 /*
1762 * symlink names must be treated as data
1763 */
1764 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1765 &llen, NULL);
1766
1767 if (lnm == NULL) {
1768 *cs->statusp = resp->status = NFS4ERR_INVAL;
1769 if (name != nm)
1770 kmem_free(name, MAXPATHLEN + 1);
1771 kmem_free(nm, len);
1772 nfs4_ntov_table_free(&ntov, &sarg);
1773 resp->attrset = 0;
1774 goto out;
1775 }
1776
1777 if (llen > MAXPATHLEN) {
1778 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1779 if (name != nm)
1780 kmem_free(name, MAXPATHLEN + 1);
1781 kmem_free(nm, len);
1782 kmem_free(lnm, llen);
1783 nfs4_ntov_table_free(&ntov, &sarg);
1784 resp->attrset = 0;
1785 goto out;
1786 }
1787
1788 lname = nfscmd_convname(ca, cs->exi, lnm,
1789 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1790
1791 if (lname == NULL) {
1792 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1793 if (name != nm)
1794 kmem_free(name, MAXPATHLEN + 1);
1795 kmem_free(nm, len);
1796 kmem_free(lnm, llen);
1797 nfs4_ntov_table_free(&ntov, &sarg);
1798 resp->attrset = 0;
1799 goto out;
1800 }
1801
1802 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1803 if (lname != lnm)
1804 kmem_free(lname, MAXPATHLEN + 1);
1805 kmem_free(lnm, llen);
1806 if (error)
1807 break;
1808
1809 /*
1810 * Get the initial "after" sequence number, if it fails,
1811 * set to zero
1812 */
1813 iva.va_mask = AT_SEQ;
1814 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1815 iva.va_seq = 0;
1816
1817 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1818 NULL, NULL, NULL);
1819 if (error)
1820 break;
1821
1822 /*
1823 * va_seq is not safe over VOP calls, check it again
1824 * if it has changed zero out iva to force atomic = FALSE.
1825 */
1826 iva2.va_mask = AT_SEQ;
1827 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1828 iva2.va_seq != iva.va_seq)
1829 iva.va_seq = 0;
1830 break;
1831 default:
1832 /*
1833 * probably a special file.
1834 */
1835 if ((vap->va_mask & AT_MODE) == 0) {
1836 vap->va_mode = 0600; /* default: owner rw only */
1837 vap->va_mask |= AT_MODE;
1838 }
1839 syncval = FNODSYNC;
1840 /*
1841 * We know this will only generate one VOP call
1842 */
1843 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1844
1845 if (vp == NULL) {
1846 if (name != nm)
1847 kmem_free(name, MAXPATHLEN + 1);
1848 kmem_free(nm, len);
1849 nfs4_ntov_table_free(&ntov, &sarg);
1850 resp->attrset = 0;
1851 goto out;
1852 }
1853
1854 /*
1855 * Get the initial "after" sequence number, if it fails,
1856 * set to zero
1857 */
1858 iva.va_mask = AT_SEQ;
1859 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1860 iva.va_seq = 0;
1861
1862 break;
1863 }
1864 if (name != nm)
1865 kmem_free(name, MAXPATHLEN + 1);
1866 kmem_free(nm, len);
1867
1868 if (error) {
1869 *cs->statusp = resp->status = puterrno4(error);
1870 }
1871
1872 /*
1873 * Force modified data and metadata out to stable storage.
1874 */
1875 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1876
1877 if (resp->status != NFS4_OK) {
1878 if (vp != NULL)
1879 VN_RELE(vp);
1880 nfs4_ntov_table_free(&ntov, &sarg);
1881 resp->attrset = 0;
1882 goto out;
1883 }
1884
1885 /*
1886 * Finish setup of cinfo response, "before" value already set.
1887 * Get "after" change value, if it fails, simply return the
1888 * before value.
1889 */
1890 ava.va_mask = AT_CTIME|AT_SEQ;
1891 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1892 ava.va_ctime = bva.va_ctime;
1893 ava.va_seq = 0;
1894 }
1895 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1896
1897 /*
1898 * True verification that object was created with correct
1899 * attrs is impossible. The attrs could have been changed
1900 * immediately after object creation. If attributes did
1901 * not verify, the only recourse for the server is to
1902 * destroy the object. Maybe if some attrs (like gid)
1903 * are set incorrectly, the object should be destroyed;
1904 * however, seems bad as a default policy. Do we really
1905 * want to destroy an object over one of the times not
1906 * verifying correctly? For these reasons, the server
1907 * currently sets bits in attrset for createattrs
1908 * that were set; however, no verification is done.
1909 *
1910 * vmask_to_nmask accounts for vattr bits set on create
1911 * [do_rfs4_set_attrs() only sets resp bits for
1912 * non-vattr/vfs bits.]
1913 * Mask off any bits set by default so as not to return
1914 * more attrset bits than were requested in createattrs
1915 */
1916 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1917 resp->attrset &= args->createattrs.attrmask;
1918 nfs4_ntov_table_free(&ntov, &sarg);
1919
1920 error = makefh4(&cs->fh, vp, cs->exi);
1921 if (error) {
1922 *cs->statusp = resp->status = puterrno4(error);
1923 }
1924
1925 /*
1926 * The cinfo.atomic = TRUE only if we got no errors, we have
1927 * non-zero va_seq's, and it has incremented by exactly one
1928 * during the creation and it didn't change during the VOP_LOOKUP
1929 * or VOP_FSYNC.
1930 */
1931 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1932 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1933 resp->cinfo.atomic = TRUE;
1934 else
1935 resp->cinfo.atomic = FALSE;
1936
1937 /*
1938 * Force modified metadata out to stable storage.
1939 *
1940 * if a underlying vp exists, pass it to VOP_FSYNC
1941 */
1942 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1943 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1944 else
1945 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1946
1947 if (resp->status != NFS4_OK) {
1948 VN_RELE(vp);
1949 goto out;
1950 }
1951 if (cs->vp)
1952 VN_RELE(cs->vp);
1953
1954 cs->vp = vp;
1955 *cs->statusp = resp->status = NFS4_OK;
1956 out:
1957 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1958 CREATE4res *, resp);
1959 }
1960
1961 /*ARGSUSED*/
1962 static void
1963 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1964 struct compound_state *cs)
1965 {
1966 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1967 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1968
1969 rfs4_op_inval(argop, resop, req, cs);
1970
1971 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1972 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1973 }
1974
1975 /*ARGSUSED*/
1976 static void
1977 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1978 struct compound_state *cs)
1979 {
1980 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1981 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1982 rfs4_deleg_state_t *dsp;
1983 nfsstat4 status;
1984
1985 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1986 DELEGRETURN4args *, args);
1987
1988 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1989 resp->status = *cs->statusp = status;
1990 if (status != NFS4_OK)
1991 goto out;
1992
1993 /* Ensure specified filehandle matches */
1994 if (cs->vp != dsp->rds_finfo->rf_vp) {
1995 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1996 } else
1997 rfs4_return_deleg(dsp, FALSE);
1998
1999 rfs4_update_lease(dsp->rds_client);
2000
2001 rfs4_deleg_state_rele(dsp);
2002 out:
2003 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2004 DELEGRETURN4res *, resp);
2005 }
2006
2007 /*
2008 * Check to see if a given "flavor" is an explicitly shared flavor.
2009 * The assumption of this routine is the "flavor" is already a valid
2010 * flavor in the secinfo list of "exi".
2011 *
2012 * e.g.
2013 * # share -o sec=flavor1 /export
2014 * # share -o sec=flavor2 /export/home
2015 *
2016 * flavor2 is not an explicitly shared flavor for /export,
2017 * however it is in the secinfo list for /export thru the
2018 * server namespace setup.
2019 */
2020 int
2021 is_exported_sec(int flavor, struct exportinfo *exi)
2022 {
2023 int i;
2024 struct secinfo *sp;
2025
2026 sp = exi->exi_export.ex_secinfo;
2027 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2028 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2029 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2030 return (SEC_REF_EXPORTED(&sp[i]));
2031 }
2032 }
2033
2034 /* Should not reach this point based on the assumption */
2035 return (0);
2036 }
2037
2038 /*
2039 * Check if the security flavor used in the request matches what is
2040 * required at the export point or at the root pseudo node (exi_root).
2041 *
2042 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2043 *
2044 */
2045 static int
2046 secinfo_match_or_authnone(struct compound_state *cs)
2047 {
2048 int i;
2049 struct secinfo *sp;
2050
2051 /*
2052 * Check cs->nfsflavor (from the request) against
2053 * the current export data in cs->exi.
2054 */
2055 sp = cs->exi->exi_export.ex_secinfo;
2056 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2057 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2058 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2059 return (1);
2060 }
2061
2062 return (0);
2063 }
2064
2065 /*
2066 * Check the access authority for the client and return the correct error.
2067 */
2068 nfsstat4
2069 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2070 {
2071 int authres;
2072
2073 /*
2074 * First, check if the security flavor used in the request
2075 * are among the flavors set in the server namespace.
2076 */
2077 if (!secinfo_match_or_authnone(cs)) {
2078 *cs->statusp = NFS4ERR_WRONGSEC;
2079 return (*cs->statusp);
2080 }
2081
2082 authres = checkauth4(cs, req);
2083
2084 if (authres > 0) {
2085 *cs->statusp = NFS4_OK;
2086 if (! (cs->access & CS_ACCESS_LIMITED))
2087 cs->access = CS_ACCESS_OK;
2088 } else if (authres == 0) {
2089 *cs->statusp = NFS4ERR_ACCESS;
2090 } else if (authres == -2) {
2091 *cs->statusp = NFS4ERR_WRONGSEC;
2092 } else {
2093 *cs->statusp = NFS4ERR_DELAY;
2094 }
2095 return (*cs->statusp);
2096 }
2097
2098 /*
2099 * bitmap4_to_attrmask is called by getattr and readdir.
2100 * It sets up the vattr mask and determines whether vfsstat call is needed
2101 * based on the input bitmap.
2102 * Returns nfsv4 status.
2103 */
2104 static nfsstat4
2105 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2106 {
2107 int i;
2108 uint_t va_mask;
2109 struct statvfs64 *sbp = sargp->sbp;
2110
2111 sargp->sbp = NULL;
2112 sargp->flag = 0;
2113 sargp->rdattr_error = NFS4_OK;
2114 sargp->mntdfid_set = FALSE;
2115 if (sargp->cs->vp)
2116 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2117 FH4_ATTRDIR | FH4_NAMEDATTR);
2118 else
2119 sargp->xattr = 0;
2120
2121 /*
2122 * Set rdattr_error_req to true if return error per
2123 * failed entry rather than fail the readdir.
2124 */
2125 if (breq & FATTR4_RDATTR_ERROR_MASK)
2126 sargp->rdattr_error_req = 1;
2127 else
2128 sargp->rdattr_error_req = 0;
2129
2130 /*
2131 * generate the va_mask
2132 * Handle the easy cases first
2133 */
2134 switch (breq) {
2135 case NFS4_NTOV_ATTR_MASK:
2136 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2137 return (NFS4_OK);
2138
2139 case NFS4_FS_ATTR_MASK:
2140 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2141 sargp->sbp = sbp;
2142 return (NFS4_OK);
2143
2144 case NFS4_NTOV_ATTR_CACHE_MASK:
2145 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2146 return (NFS4_OK);
2147
2148 case FATTR4_LEASE_TIME_MASK:
2149 sargp->vap->va_mask = 0;
2150 return (NFS4_OK);
2151
2152 default:
2153 va_mask = 0;
2154 for (i = 0; i < nfs4_ntov_map_size; i++) {
2155 if ((breq & nfs4_ntov_map[i].fbit) &&
2156 nfs4_ntov_map[i].vbit)
2157 va_mask |= nfs4_ntov_map[i].vbit;
2158 }
2159
2160 /*
2161 * Check is vfsstat is needed
2162 */
2163 if (breq & NFS4_FS_ATTR_MASK)
2164 sargp->sbp = sbp;
2165
2166 sargp->vap->va_mask = va_mask;
2167 return (NFS4_OK);
2168 }
2169 /* NOTREACHED */
2170 }
2171
2172 /*
2173 * bitmap4_get_sysattrs is called by getattr and readdir.
2174 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2175 * Returns nfsv4 status.
2176 */
2177 static nfsstat4
2178 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2179 {
2180 int error;
2181 struct compound_state *cs = sargp->cs;
2182 vnode_t *vp = cs->vp;
2183
2184 if (sargp->sbp != NULL) {
2185 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2186 sargp->sbp = NULL; /* to identify error */
2187 return (puterrno4(error));
2188 }
2189 }
2190
2191 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2192 }
2193
2194 static void
2195 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2196 {
2197 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2198 KM_SLEEP);
2199 ntovp->attrcnt = 0;
2200 ntovp->vfsstat = FALSE;
2201 }
2202
2203 static void
2204 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2205 struct nfs4_svgetit_arg *sargp)
2206 {
2207 int i;
2208 union nfs4_attr_u *na;
2209 uint8_t *amap;
2210
2211 /*
2212 * XXX Should do the same checks for whether the bit is set
2213 */
2214 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2215 i < ntovp->attrcnt; i++, na++, amap++) {
2216 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2217 NFS4ATTR_FREEIT, sargp, na);
2218 }
2219 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2220 /*
2221 * xdr_free for getattr will be done later
2222 */
2223 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2224 i < ntovp->attrcnt; i++, na++, amap++) {
2225 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2226 }
2227 }
2228 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2229 }
2230
2231 /*
2232 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2233 */
2234 static nfsstat4
2235 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2236 struct nfs4_svgetit_arg *sargp)
2237 {
2238 int error = 0;
2239 int i, k;
2240 struct nfs4_ntov_table ntov;
2241 XDR xdr;
2242 ulong_t xdr_size;
2243 char *xdr_attrs;
2244 nfsstat4 status = NFS4_OK;
2245 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2246 union nfs4_attr_u *na;
2247 uint8_t *amap;
2248
2249 sargp->op = NFS4ATTR_GETIT;
2250 sargp->flag = 0;
2251
2252 fattrp->attrmask = 0;
2253 /* if no bits requested, then return empty fattr4 */
2254 if (breq == 0) {
2255 fattrp->attrlist4_len = 0;
2256 fattrp->attrlist4 = NULL;
2257 return (NFS4_OK);
2258 }
2259
2260 /*
2261 * return NFS4ERR_INVAL when client requests write-only attrs
2262 */
2263 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2264 return (NFS4ERR_INVAL);
2265
2266 nfs4_ntov_table_init(&ntov);
2267 na = ntov.na;
2268 amap = ntov.amap;
2269
2270 /*
2271 * Now loop to get or verify the attrs
2272 */
2273 for (i = 0; i < nfs4_ntov_map_size; i++) {
2274 if (breq & nfs4_ntov_map[i].fbit) {
2275 if ((*nfs4_ntov_map[i].sv_getit)(
2276 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2277
2278 error = (*nfs4_ntov_map[i].sv_getit)(
2279 NFS4ATTR_GETIT, sargp, na);
2280
2281 /*
2282 * Possible error values:
2283 * >0 if sv_getit failed to
2284 * get the attr; 0 if succeeded;
2285 * <0 if rdattr_error and the
2286 * attribute cannot be returned.
2287 */
2288 if (error && !(sargp->rdattr_error_req))
2289 goto done;
2290 /*
2291 * If error then just for entry
2292 */
2293 if (error == 0) {
2294 fattrp->attrmask |=
2295 nfs4_ntov_map[i].fbit;
2296 *amap++ =
2297 (uint8_t)nfs4_ntov_map[i].nval;
2298 na++;
2299 (ntov.attrcnt)++;
2300 } else if ((error > 0) &&
2301 (sargp->rdattr_error == NFS4_OK)) {
2302 sargp->rdattr_error = puterrno4(error);
2303 }
2304 error = 0;
2305 }
2306 }
2307 }
2308
2309 /*
2310 * If rdattr_error was set after the return value for it was assigned,
2311 * update it.
2312 */
2313 if (prev_rdattr_error != sargp->rdattr_error) {
2314 na = ntov.na;
2315 amap = ntov.amap;
2316 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2317 k = *amap;
2318 if (k < FATTR4_RDATTR_ERROR) {
2319 continue;
2320 }
2321 if ((k == FATTR4_RDATTR_ERROR) &&
2322 ((*nfs4_ntov_map[k].sv_getit)(
2323 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2324
2325 (void) (*nfs4_ntov_map[k].sv_getit)(
2326 NFS4ATTR_GETIT, sargp, na);
2327 }
2328 break;
2329 }
2330 }
2331
2332 xdr_size = 0;
2333 na = ntov.na;
2334 amap = ntov.amap;
2335 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2336 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2337 }
2338
2339 fattrp->attrlist4_len = xdr_size;
2340 if (xdr_size) {
2341 /* freed by rfs4_op_getattr_free() */
2342 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2343
2344 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2345
2346 na = ntov.na;
2347 amap = ntov.amap;
2348 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2350 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2351 int, *amap);
2352 status = NFS4ERR_SERVERFAULT;
2353 break;
2354 }
2355 }
2356 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2357 } else {
2358 fattrp->attrlist4 = NULL;
2359 }
2360 done:
2361
2362 nfs4_ntov_table_free(&ntov, sargp);
2363
2364 if (error != 0)
2365 status = puterrno4(error);
2366
2367 return (status);
2368 }
2369
2370 /* ARGSUSED */
2371 static void
2372 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2373 struct compound_state *cs)
2374 {
2375 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2376 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2377 struct nfs4_svgetit_arg sarg;
2378 struct statvfs64 sb;
2379 nfsstat4 status;
2380
2381 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2382 GETATTR4args *, args);
2383
2384 if (cs->vp == NULL) {
2385 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2386 goto out;
2387 }
2388
2389 if (cs->access == CS_ACCESS_DENIED) {
2390 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2391 goto out;
2392 }
2393
2394 sarg.sbp = &sb;
2395 sarg.cs = cs;
2396 sarg.is_referral = B_FALSE;
2397
2398 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2399 if (status == NFS4_OK) {
2400
2401 status = bitmap4_get_sysattrs(&sarg);
2402 if (status == NFS4_OK) {
2403
2404 /* Is this a referral? */
2405 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2406 /* Older V4 Solaris client sees a link */
2407 if (client_is_downrev(req))
2408 sarg.vap->va_type = VLNK;
2409 else
2410 sarg.is_referral = B_TRUE;
2411 }
2412
2413 status = do_rfs4_op_getattr(args->attr_request,
2414 &resp->obj_attributes, &sarg);
2415 }
2416 }
2417 *cs->statusp = resp->status = status;
2418 out:
2419 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2420 GETATTR4res *, resp);
2421 }
2422
2423 static void
2424 rfs4_op_getattr_free(nfs_resop4 *resop)
2425 {
2426 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2427
2428 nfs4_fattr4_free(&resp->obj_attributes);
2429 }
2430
2431 /* ARGSUSED */
2432 static void
2433 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2434 struct compound_state *cs)
2435 {
2436 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2437
2438 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2439
2440 if (cs->vp == NULL) {
2441 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2442 goto out;
2443 }
2444 if (cs->access == CS_ACCESS_DENIED) {
2445 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2446 goto out;
2447 }
2448
2449 /* check for reparse point at the share point */
2450 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2451 /* it's all bad */
2452 cs->exi->exi_moved = 1;
2453 *cs->statusp = resp->status = NFS4ERR_MOVED;
2454 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2455 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2456 return;
2457 }
2458
2459 /* check for reparse point at vp */
2460 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2461 /* it's not all bad */
2462 *cs->statusp = resp->status = NFS4ERR_MOVED;
2463 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2464 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2465 return;
2466 }
2467
2468 resp->object.nfs_fh4_val =
2469 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2470 nfs_fh4_copy(&cs->fh, &resp->object);
2471 *cs->statusp = resp->status = NFS4_OK;
2472 out:
2473 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2474 GETFH4res *, resp);
2475 }
2476
2477 static void
2478 rfs4_op_getfh_free(nfs_resop4 *resop)
2479 {
2480 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2481
2482 if (resp->status == NFS4_OK &&
2483 resp->object.nfs_fh4_val != NULL) {
2484 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2485 resp->object.nfs_fh4_val = NULL;
2486 resp->object.nfs_fh4_len = 0;
2487 }
2488 }
2489
2490 /*
2491 * illegal: args: void
2492 * res : status (NFS4ERR_OP_ILLEGAL)
2493 */
2494 /* ARGSUSED */
2495 static void
2496 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2497 struct svc_req *req, struct compound_state *cs)
2498 {
2499 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2500
2501 resop->resop = OP_ILLEGAL;
2502 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2503 }
2504
2505 /*
2506 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2507 * res: status. If success - CURRENT_FH unchanged, return change_info
2508 */
2509 /* ARGSUSED */
2510 static void
2511 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2512 struct compound_state *cs)
2513 {
2514 LINK4args *args = &argop->nfs_argop4_u.oplink;
2515 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2516 int error;
2517 vnode_t *vp;
2518 vnode_t *dvp;
2519 struct vattr bdva, idva, adva;
2520 char *nm;
2521 uint_t len;
2522 struct sockaddr *ca;
2523 char *name = NULL;
2524 nfsstat4 status;
2525
2526 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2527 LINK4args *, args);
2528
2529 /* SAVED_FH: source object */
2530 vp = cs->saved_vp;
2531 if (vp == NULL) {
2532 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2533 goto out;
2534 }
2535
2536 /* CURRENT_FH: target directory */
2537 dvp = cs->vp;
2538 if (dvp == NULL) {
2539 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2540 goto out;
2541 }
2542
2543 /*
2544 * If there is a non-shared filesystem mounted on this vnode,
2545 * do not allow to link any file in this directory.
2546 */
2547 if (vn_ismntpt(dvp)) {
2548 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2549 goto out;
2550 }
2551
2552 if (cs->access == CS_ACCESS_DENIED) {
2553 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2554 goto out;
2555 }
2556
2557 /* Check source object's type validity */
2558 if (vp->v_type == VDIR) {
2559 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2560 goto out;
2561 }
2562
2563 /* Check target directory's type */
2564 if (dvp->v_type != VDIR) {
2565 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2566 goto out;
2567 }
2568
2569 if (cs->saved_exi != cs->exi) {
2570 *cs->statusp = resp->status = NFS4ERR_XDEV;
2571 goto out;
2572 }
2573
2574 status = utf8_dir_verify(&args->newname);
2575 if (status != NFS4_OK) {
2576 *cs->statusp = resp->status = status;
2577 goto out;
2578 }
2579
2580 nm = utf8_to_fn(&args->newname, &len, NULL);
2581 if (nm == NULL) {
2582 *cs->statusp = resp->status = NFS4ERR_INVAL;
2583 goto out;
2584 }
2585
2586 if (len > MAXNAMELEN) {
2587 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2588 kmem_free(nm, len);
2589 goto out;
2590 }
2591
2592 if (rdonly4(req, cs)) {
2593 *cs->statusp = resp->status = NFS4ERR_ROFS;
2594 kmem_free(nm, len);
2595 goto out;
2596 }
2597
2598 /* Get "before" change value */
2599 bdva.va_mask = AT_CTIME|AT_SEQ;
2600 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2601 if (error) {
2602 *cs->statusp = resp->status = puterrno4(error);
2603 kmem_free(nm, len);
2604 goto out;
2605 }
2606
2607 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2608 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2609 MAXPATHLEN + 1);
2610
2611 if (name == NULL) {
2612 *cs->statusp = resp->status = NFS4ERR_INVAL;
2613 kmem_free(nm, len);
2614 goto out;
2615 }
2616
2617 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2618
2619 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2620
2621 if (nm != name)
2622 kmem_free(name, MAXPATHLEN + 1);
2623 kmem_free(nm, len);
2624
2625 /*
2626 * Get the initial "after" sequence number, if it fails, set to zero
2627 */
2628 idva.va_mask = AT_SEQ;
2629 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2630 idva.va_seq = 0;
2631
2632 /*
2633 * Force modified data and metadata out to stable storage.
2634 */
2635 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2636 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2637
2638 if (error) {
2639 *cs->statusp = resp->status = puterrno4(error);
2640 goto out;
2641 }
2642
2643 /*
2644 * Get "after" change value, if it fails, simply return the
2645 * before value.
2646 */
2647 adva.va_mask = AT_CTIME|AT_SEQ;
2648 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2649 adva.va_ctime = bdva.va_ctime;
2650 adva.va_seq = 0;
2651 }
2652
2653 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2654
2655 /*
2656 * The cinfo.atomic = TRUE only if we have
2657 * non-zero va_seq's, and it has incremented by exactly one
2658 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2659 */
2660 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2661 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2662 resp->cinfo.atomic = TRUE;
2663 else
2664 resp->cinfo.atomic = FALSE;
2665
2666 *cs->statusp = resp->status = NFS4_OK;
2667 out:
2668 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2669 LINK4res *, resp);
2670 }
2671
2672 /*
2673 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2674 */
2675
2676 /* ARGSUSED */
2677 static nfsstat4
2678 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2679 {
2680 int error;
2681 int different_export = 0;
2682 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2683 struct exportinfo *exi = NULL, *pre_exi = NULL;
2684 nfsstat4 stat;
2685 fid_t fid;
2686 int attrdir, dotdot, walk;
2687 bool_t is_newvp = FALSE;
2688
2689 if (cs->vp->v_flag & V_XATTRDIR) {
2690 attrdir = 1;
2691 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2692 } else {
2693 attrdir = 0;
2694 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2695 }
2696
2697 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2698
2699 /*
2700 * If dotdotting, then need to check whether it's
2701 * above the root of a filesystem, or above an
2702 * export point.
2703 */
2704 if (dotdot) {
2705
2706 /*
2707 * If dotdotting at the root of a filesystem, then
2708 * need to traverse back to the mounted-on filesystem
2709 * and do the dotdot lookup there.
2710 */
2711 if (cs->vp->v_flag & VROOT) {
2712
2713 /*
2714 * If at the system root, then can
2715 * go up no further.
2716 */
2717 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2718 return (puterrno4(ENOENT));
2719
2720 /*
2721 * Traverse back to the mounted-on filesystem
2722 */
2723 cs->vp = untraverse(cs->vp);
2724
2725 /*
2726 * Set the different_export flag so we remember
2727 * to pick up a new exportinfo entry for
2728 * this new filesystem.
2729 */
2730 different_export = 1;
2731 } else {
2732
2733 /*
2734 * If dotdotting above an export point then set
2735 * the different_export to get new export info.
2736 */
2737 different_export = nfs_exported(cs->exi, cs->vp);
2738 }
2739 }
2740
2741 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2742 NULL, NULL, NULL);
2743 if (error)
2744 return (puterrno4(error));
2745
2746 /*
2747 * If the vnode is in a pseudo filesystem, check whether it is visible.
2748 *
2749 * XXX if the vnode is a symlink and it is not visible in
2750 * a pseudo filesystem, return ENOENT (not following symlink).
2751 * V4 client can not mount such symlink. This is a regression
2752 * from V2/V3.
2753 *
2754 * In the same exported filesystem, if the security flavor used
2755 * is not an explicitly shared flavor, limit the view to the visible
2756 * list entries only. This is not a WRONGSEC case because it's already
2757 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2758 */
2759 if (!different_export &&
2760 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2761 cs->access & CS_ACCESS_LIMITED)) {
2762 if (! nfs_visible(cs->exi, vp, &different_export)) {
2763 VN_RELE(vp);
2764 return (puterrno4(ENOENT));
2765 }
2766 }
2767
2768 /*
2769 * If it's a mountpoint, then traverse it.
2770 */
2771 if (vn_ismntpt(vp)) {
2772 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2773 pre_tvp = vp; /* save pre-traversed vnode */
2774
2775 /*
2776 * hold pre_tvp to counteract rele by traverse. We will
2777 * need pre_tvp below if checkexport4 fails
2778 */
2779 VN_HOLD(pre_tvp);
2780 if ((error = traverse(&vp)) != 0) {
2781 VN_RELE(vp);
2782 VN_RELE(pre_tvp);
2783 return (puterrno4(error));
2784 }
2785 different_export = 1;
2786 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2787 /*
2788 * The vfsp comparison is to handle the case where
2789 * a LOFS mount is shared. lo_lookup traverses mount points,
2790 * and NFS is unaware of local fs transistions because
2791 * v_vfsmountedhere isn't set. For this special LOFS case,
2792 * the dir and the obj returned by lookup will have different
2793 * vfs ptrs.
2794 */
2795 different_export = 1;
2796 }
2797
2798 if (different_export) {
2799
2800 bzero(&fid, sizeof (fid));
2801 fid.fid_len = MAXFIDSZ;
2802 error = vop_fid_pseudo(vp, &fid);
2803 if (error) {
2804 VN_RELE(vp);
2805 if (pre_tvp)
2806 VN_RELE(pre_tvp);
2807 return (puterrno4(error));
2808 }
2809
2810 if (dotdot)
2811 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2812 else
2813 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2814
2815 if (exi == NULL) {
2816 if (pre_tvp) {
2817 /*
2818 * If this vnode is a mounted-on vnode,
2819 * but the mounted-on file system is not
2820 * exported, send back the filehandle for
2821 * the mounted-on vnode, not the root of
2822 * the mounted-on file system.
2823 */
2824 VN_RELE(vp);
2825 vp = pre_tvp;
2826 exi = pre_exi;
2827 } else {
2828 VN_RELE(vp);
2829 return (puterrno4(EACCES));
2830 }
2831 } else if (pre_tvp) {
2832 /* we're done with pre_tvp now. release extra hold */
2833 VN_RELE(pre_tvp);
2834 }
2835
2836 cs->exi = exi;
2837
2838 /*
2839 * Now we do a checkauth4. The reason is that
2840 * this client/user may not have access to the new
2841 * exported file system, and if they do,
2842 * the client/user may be mapped to a different uid.
2843 *
2844 * We start with a new cr, because the checkauth4 done
2845 * in the PUT*FH operation over wrote the cred's uid,
2846 * gid, etc, and we want the real thing before calling
2847 * checkauth4()
2848 */
2849 crfree(cs->cr);
2850 cs->cr = crdup(cs->basecr);
2851
2852 oldvp = cs->vp;
2853 cs->vp = vp;
2854 is_newvp = TRUE;
2855
2856 stat = call_checkauth4(cs, req);
2857 if (stat != NFS4_OK) {
2858 VN_RELE(cs->vp);
2859 cs->vp = oldvp;
2860 return (stat);
2861 }
2862 }
2863
2864 /*
2865 * After various NFS checks, do a label check on the path
2866 * component. The label on this path should either be the
2867 * global zone's label or a zone's label. We are only
2868 * interested in the zone's label because exported files
2869 * in global zone is accessible (though read-only) to
2870 * clients. The exportability/visibility check is already
2871 * done before reaching this code.
2872 */
2873 if (is_system_labeled()) {
2874 bslabel_t *clabel;
2875
2876 ASSERT(req->rq_label != NULL);
2877 clabel = req->rq_label;
2878 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2879 "got client label from request(1)", struct svc_req *, req);
2880
2881 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2882 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2883 cs->exi)) {
2884 error = EACCES;
2885 goto err_out;
2886 }
2887 } else {
2888 /*
2889 * We grant access to admin_low label clients
2890 * only if the client is trusted, i.e. also
2891 * running Solaris Trusted Extension.
2892 */
2893 struct sockaddr *ca;
2894 int addr_type;
2895 void *ipaddr;
2896 tsol_tpc_t *tp;
2897
2898 ca = (struct sockaddr *)svc_getrpccaller(
2899 req->rq_xprt)->buf;
2900 if (ca->sa_family == AF_INET) {
2901 addr_type = IPV4_VERSION;
2902 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2903 } else if (ca->sa_family == AF_INET6) {
2904 addr_type = IPV6_VERSION;
2905 ipaddr = &((struct sockaddr_in6 *)
2906 ca)->sin6_addr;
2907 }
2908 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2909 if (tp == NULL || tp->tpc_tp.tp_doi !=
2910 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2911 SUN_CIPSO) {
2912 if (tp != NULL)
2913 TPC_RELE(tp);
2914 error = EACCES;
2915 goto err_out;
2916 }
2917 TPC_RELE(tp);
2918 }
2919 }
2920
2921 error = makefh4(&cs->fh, vp, cs->exi);
2922
2923 err_out:
2924 if (error) {
2925 if (is_newvp) {
2926 VN_RELE(cs->vp);
2927 cs->vp = oldvp;
2928 } else
2929 VN_RELE(vp);
2930 return (puterrno4(error));
2931 }
2932
2933 if (!is_newvp) {
2934 if (cs->vp)
2935 VN_RELE(cs->vp);
2936 cs->vp = vp;
2937 } else if (oldvp)
2938 VN_RELE(oldvp);
2939
2940 /*
2941 * if did lookup on attrdir and didn't lookup .., set named
2942 * attr fh flag
2943 */
2944 if (attrdir && ! dotdot)
2945 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2946
2947 /* Assume false for now, open proc will set this */
2948 cs->mandlock = FALSE;
2949
2950 return (NFS4_OK);
2951 }
2952
2953 /* ARGSUSED */
2954 static void
2955 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2956 struct compound_state *cs)
2957 {
2958 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2959 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2960 char *nm;
2961 uint_t len;
2962 struct sockaddr *ca;
2963 char *name = NULL;
2964 nfsstat4 status;
2965
2966 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2967 LOOKUP4args *, args);
2968
2969 if (cs->vp == NULL) {
2970 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2971 goto out;
2972 }
2973
2974 if (cs->vp->v_type == VLNK) {
2975 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2976 goto out;
2977 }
2978
2979 if (cs->vp->v_type != VDIR) {
2980 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2981 goto out;
2982 }
2983
2984 status = utf8_dir_verify(&args->objname);
2985 if (status != NFS4_OK) {
2986 *cs->statusp = resp->status = status;
2987 goto out;
2988 }
2989
2990 nm = utf8_to_str(&args->objname, &len, NULL);
2991 if (nm == NULL) {
2992 *cs->statusp = resp->status = NFS4ERR_INVAL;
2993 goto out;
2994 }
2995
2996 if (len > MAXNAMELEN) {
2997 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2998 kmem_free(nm, len);
2999 goto out;
3000 }
3001
3002 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3003 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3004 MAXPATHLEN + 1);
3005
3006 if (name == NULL) {
3007 *cs->statusp = resp->status = NFS4ERR_INVAL;
3008 kmem_free(nm, len);
3009 goto out;
3010 }
3011
3012 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3013
3014 if (name != nm)
3015 kmem_free(name, MAXPATHLEN + 1);
3016 kmem_free(nm, len);
3017
3018 out:
3019 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3020 LOOKUP4res *, resp);
3021 }
3022
3023 /* ARGSUSED */
3024 static void
3025 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3026 struct compound_state *cs)
3027 {
3028 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3029
3030 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3031
3032 if (cs->vp == NULL) {
3033 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3034 goto out;
3035 }
3036
3037 if (cs->vp->v_type != VDIR) {
3038 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3039 goto out;
3040 }
3041
3042 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3043
3044 /*
3045 * From NFSV4 Specification, LOOKUPP should not check for
3046 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3047 */
3048 if (resp->status == NFS4ERR_WRONGSEC) {
3049 *cs->statusp = resp->status = NFS4_OK;
3050 }
3051
3052 out:
3053 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3054 LOOKUPP4res *, resp);
3055 }
3056
3057
3058 /*ARGSUSED2*/
3059 static void
3060 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3061 struct compound_state *cs)
3062 {
3063 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3064 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3065 vnode_t *avp = NULL;
3066 int lookup_flags = LOOKUP_XATTR, error;
3067 int exp_ro = 0;
3068
3069 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3070 OPENATTR4args *, args);
3071
3072 if (cs->vp == NULL) {
3073 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3074 goto out;
3075 }
3076
3077 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3078 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3079 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3080 goto out;
3081 }
3082
3083 /*
3084 * If file system supports passing ACE mask to VOP_ACCESS then
3085 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3086 */
3087
3088 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3089 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3090 V_ACE_MASK, cs->cr, NULL);
3091 else
3092 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3093 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3094 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3095
3096 if (error) {
3097 *cs->statusp = resp->status = puterrno4(EACCES);
3098 goto out;
3099 }
3100
3101 /*
3102 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3103 * the file system is exported read-only -- regardless of
3104 * createdir flag. Otherwise the attrdir would be created
3105 * (assuming server fs isn't mounted readonly locally). If
3106 * VOP_LOOKUP returns ENOENT in this case, the error will
3107 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3108 * because specfs has no VOP_LOOKUP op, so the macro would
3109 * return ENOSYS. EINVAL is returned by all (current)
3110 * Solaris file system implementations when any of their
3111 * restrictions are violated (xattr(dir) can't have xattrdir).
3112 * Returning NOTSUPP is more appropriate in this case
3113 * because the object will never be able to have an attrdir.
3114 */
3115 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3116 lookup_flags |= CREATE_XATTR_DIR;
3117
3118 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3119 NULL, NULL, NULL);
3120
3121 if (error) {
3122 if (error == ENOENT && args->createdir && exp_ro)
3123 *cs->statusp = resp->status = puterrno4(EROFS);
3124 else if (error == EINVAL || error == ENOSYS)
3125 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3126 else
3127 *cs->statusp = resp->status = puterrno4(error);
3128 goto out;
3129 }
3130
3131 ASSERT(avp->v_flag & V_XATTRDIR);
3132
3133 error = makefh4(&cs->fh, avp, cs->exi);
3134
3135 if (error) {
3136 VN_RELE(avp);
3137 *cs->statusp = resp->status = puterrno4(error);
3138 goto out;
3139 }
3140
3141 VN_RELE(cs->vp);
3142 cs->vp = avp;
3143
3144 /*
3145 * There is no requirement for an attrdir fh flag
3146 * because the attrdir has a vnode flag to distinguish
3147 * it from regular (non-xattr) directories. The
3148 * FH4_ATTRDIR flag is set for future sanity checks.
3149 */
3150 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3151 *cs->statusp = resp->status = NFS4_OK;
3152
3153 out:
3154 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3155 OPENATTR4res *, resp);
3156 }
3157
3158 static int
3159 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3160 caller_context_t *ct)
3161 {
3162 int error;
3163 int i;
3164 clock_t delaytime;
3165
3166 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3167
3168 /*
3169 * Don't block on mandatory locks. If this routine returns
3170 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3171 */
3172 uio->uio_fmode = FNONBLOCK;
3173
3174 for (i = 0; i < rfs4_maxlock_tries; i++) {
3175
3176
3177 if (direction == FREAD) {
3178 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3179 error = VOP_READ(vp, uio, ioflag, cred, ct);
3180 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3181 } else {
3182 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3183 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3184 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3185 }
3186
3187 if (error != EAGAIN)
3188 break;
3189
3190 if (i < rfs4_maxlock_tries - 1) {
3191 delay(delaytime);
3192 delaytime *= 2;
3193 }
3194 }
3195
3196 return (error);
3197 }
3198
3199 /* ARGSUSED */
3200 static void
3201 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3202 struct compound_state *cs)
3203 {
3204 READ4args *args = &argop->nfs_argop4_u.opread;
3205 READ4res *resp = &resop->nfs_resop4_u.opread;
3206 int error;
3207 int verror;
3208 vnode_t *vp;
3209 struct vattr va;
3210 struct iovec iov, *iovp = NULL;
3211 int iovcnt;
3212 struct uio uio;
3213 u_offset_t offset;
3214 bool_t *deleg = &cs->deleg;
3215 nfsstat4 stat;
3216 int in_crit = 0;
3217 mblk_t *mp = NULL;
3218 int alloc_err = 0;
3219 int rdma_used = 0;
3220 int loaned_buffers;
3221 caller_context_t ct;
3222 struct uio *uiop;
3223
3224 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3225 READ4args, args);
3226
3227 vp = cs->vp;
3228 if (vp == NULL) {
3229 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3230 goto out;
3231 }
3232 if (cs->access == CS_ACCESS_DENIED) {
3233 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3234 goto out;
3235 }
3236
3237 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3238 deleg, TRUE, &ct)) != NFS4_OK) {
3239 *cs->statusp = resp->status = stat;
3240 goto out;
3241 }
3242
3243 /*
3244 * Enter the critical region before calling VOP_RWLOCK
3245 * to avoid a deadlock with write requests.
3246 */
3247 if (nbl_need_check(vp)) {
3248 nbl_start_crit(vp, RW_READER);
3249 in_crit = 1;
3250 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3251 &ct)) {
3252 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3253 goto out;
3254 }
3255 }
3256
3257 if (args->wlist) {
3258 if (args->count > clist_len(args->wlist)) {
3259 *cs->statusp = resp->status = NFS4ERR_INVAL;
3260 goto out;
3261 }
3262 rdma_used = 1;
3263 }
3264
3265 /* use loaned buffers for TCP */
3266 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3267
3268 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3269 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3270
3271 /*
3272 * If we can't get the attributes, then we can't do the
3273 * right access checking. So, we'll fail the request.
3274 */
3275 if (verror) {
3276 *cs->statusp = resp->status = puterrno4(verror);
3277 goto out;
3278 }
3279
3280 if (vp->v_type != VREG) {
3281 *cs->statusp = resp->status =
3282 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3283 goto out;
3284 }
3285
3286 if (crgetuid(cs->cr) != va.va_uid &&
3287 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3288 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3289 *cs->statusp = resp->status = puterrno4(error);
3290 goto out;
3291 }
3292
3293 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3294 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3295 goto out;
3296 }
3297
3298 offset = args->offset;
3299 if (offset >= va.va_size) {
3300 *cs->statusp = resp->status = NFS4_OK;
3301 resp->eof = TRUE;
3302 resp->data_len = 0;
3303 resp->data_val = NULL;
3304 resp->mblk = NULL;
3305 /* RDMA */
3306 resp->wlist = args->wlist;
3307 resp->wlist_len = resp->data_len;
3308 *cs->statusp = resp->status = NFS4_OK;
3309 if (resp->wlist)
3310 clist_zero_len(resp->wlist);
3311 goto out;
3312 }
3313
3314 if (args->count == 0) {
3315 *cs->statusp = resp->status = NFS4_OK;
3316 resp->eof = FALSE;
3317 resp->data_len = 0;
3318 resp->data_val = NULL;
3319 resp->mblk = NULL;
3320 /* RDMA */
3321 resp->wlist = args->wlist;
3322 resp->wlist_len = resp->data_len;
3323 if (resp->wlist)
3324 clist_zero_len(resp->wlist);
3325 goto out;
3326 }
3327
3328 /*
3329 * Do not allocate memory more than maximum allowed
3330 * transfer size
3331 */
3332 if (args->count > rfs4_tsize(req))
3333 args->count = rfs4_tsize(req);
3334
3335 if (loaned_buffers) {
3336 uiop = (uio_t *)rfs_setup_xuio(vp);
3337 ASSERT(uiop != NULL);
3338 uiop->uio_segflg = UIO_SYSSPACE;
3339 uiop->uio_loffset = args->offset;
3340 uiop->uio_resid = args->count;
3341
3342 /* Jump to do the read if successful */
3343 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3344 /*
3345 * Need to hold the vnode until after VOP_RETZCBUF()
3346 * is called.
3347 */
3348 VN_HOLD(vp);
3349 goto doio_read;
3350 }
3351
3352 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3353 uiop->uio_loffset, int, uiop->uio_resid);
3354
3355 uiop->uio_extflg = 0;
3356
3357 /* failure to setup for zero copy */
3358 rfs_free_xuio((void *)uiop);
3359 loaned_buffers = 0;
3360 }
3361
3362 /*
3363 * If returning data via RDMA Write, then grab the chunk list. If we
3364 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3365 */
3366 if (rdma_used) {
3367 mp = NULL;
3368 (void) rdma_get_wchunk(req, &iov, args->wlist);
3369 uio.uio_iov = &iov;
3370 uio.uio_iovcnt = 1;
3371 } else {
3372 /*
3373 * mp will contain the data to be sent out in the read reply.
3374 * It will be freed after the reply has been sent.
3375 */
3376 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3377 ASSERT(mp != NULL);
3378 ASSERT(alloc_err == 0);
3379 uio.uio_iov = iovp;
3380 uio.uio_iovcnt = iovcnt;
3381 }
3382
3383 uio.uio_segflg = UIO_SYSSPACE;
3384 uio.uio_extflg = UIO_COPY_CACHED;
3385 uio.uio_loffset = args->offset;
3386 uio.uio_resid = args->count;
3387 uiop = &uio;
3388
3389 doio_read:
3390 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3391
3392 va.va_mask = AT_SIZE;
3393 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3394
3395 if (error) {
3396 if (mp)
3397 freemsg(mp);
3398 *cs->statusp = resp->status = puterrno4(error);
3399 goto out;
3400 }
3401
3402 /* make mblk using zc buffers */
3403 if (loaned_buffers) {
3404 mp = uio_to_mblk(uiop);
3405 ASSERT(mp != NULL);
3406 }
3407
3408 *cs->statusp = resp->status = NFS4_OK;
3409
3410 ASSERT(uiop->uio_resid >= 0);
3411 resp->data_len = args->count - uiop->uio_resid;
3412 if (mp) {
3413 resp->data_val = (char *)mp->b_datap->db_base;
3414 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3415 } else {
3416 resp->data_val = (caddr_t)iov.iov_base;
3417 }
3418
3419 resp->mblk = mp;
3420
3421 if (!verror && offset + resp->data_len == va.va_size)
3422 resp->eof = TRUE;
3423 else
3424 resp->eof = FALSE;
3425
3426 if (rdma_used) {
3427 if (!rdma_setup_read_data4(args, resp)) {
3428 *cs->statusp = resp->status = NFS4ERR_INVAL;
3429 }
3430 } else {
3431 resp->wlist = NULL;
3432 }
3433
3434 out:
3435 if (in_crit)
3436 nbl_end_crit(vp);
3437
3438 if (iovp != NULL)
3439 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3440
3441 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3442 READ4res *, resp);
3443 }
3444
3445 static void
3446 rfs4_op_read_free(nfs_resop4 *resop)
3447 {
3448 READ4res *resp = &resop->nfs_resop4_u.opread;
3449
3450 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3451 freemsg(resp->mblk);
3452 resp->mblk = NULL;
3453 resp->data_val = NULL;
3454 resp->data_len = 0;
3455 }
3456 }
3457
3458 static void
3459 rfs4_op_readdir_free(nfs_resop4 * resop)
3460 {
3461 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3462
3463 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464 freeb(resp->mblk);
3465 resp->mblk = NULL;
3466 resp->data_len = 0;
3467 }
3468 }
3469
3470
3471 /* ARGSUSED */
3472 static void
3473 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3474 struct compound_state *cs)
3475 {
3476 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3477 int error;
3478 vnode_t *vp;
3479 struct exportinfo *exi, *sav_exi;
3480 nfs_fh4_fmt_t *fh_fmtp;
3481 nfs_export_t *ne = nfs_get_export();
3482
3483 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3484
3485 if (cs->vp) {
3486 VN_RELE(cs->vp);
3487 cs->vp = NULL;
3488 }
3489
3490 if (cs->cr)
3491 crfree(cs->cr);
3492
3493 cs->cr = crdup(cs->basecr);
3494
3495 vp = ne->exi_public->exi_vp;
3496 if (vp == NULL) {
3497 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3498 goto out;
3499 }
3500
3501 error = makefh4(&cs->fh, vp, ne->exi_public);
3502 if (error != 0) {
3503 *cs->statusp = resp->status = puterrno4(error);
3504 goto out;
3505 }
3506 sav_exi = cs->exi;
3507 if (ne->exi_public == ne->exi_root) {
3508 /*
3509 * No filesystem is actually shared public, so we default
3510 * to exi_root. In this case, we must check whether root
3511 * is exported.
3512 */
3513 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3514
3515 /*
3516 * if root filesystem is exported, the exportinfo struct that we
3517 * should use is what checkexport4 returns, because root_exi is
3518 * actually a mostly empty struct.
3519 */
3520 exi = checkexport4(&fh_fmtp->fh4_fsid,
3521 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3522 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3523 } else {
3524 /*
3525 * it's a properly shared filesystem
3526 */
3527 cs->exi = ne->exi_public;
3528 }
3529
3530 if (is_system_labeled()) {
3531 bslabel_t *clabel;
3532
3533 ASSERT(req->rq_label != NULL);
3534 clabel = req->rq_label;
3535 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3536 "got client label from request(1)",
3537 struct svc_req *, req);
3538 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3539 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3540 cs->exi)) {
3541 *cs->statusp = resp->status =
3542 NFS4ERR_SERVERFAULT;
3543 goto out;
3544 }
3545 }
3546 }
3547
3548 VN_HOLD(vp);
3549 cs->vp = vp;
3550
3551 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3552 VN_RELE(cs->vp);
3553 cs->vp = NULL;
3554 cs->exi = sav_exi;
3555 goto out;
3556 }
3557
3558 *cs->statusp = resp->status = NFS4_OK;
3559 out:
3560 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3561 PUTPUBFH4res *, resp);
3562 }
3563
3564 /*
3565 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3566 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3567 * or joe have restrictive search permissions, then we shouldn't let
3568 * the client get a file handle. This is easy to enforce. However, we
3569 * don't know what security flavor should be used until we resolve the
3570 * path name. Another complication is uid mapping. If root is
3571 * the user, then it will be mapped to the anonymous user by default,
3572 * but we won't know that till we've resolved the path name. And we won't
3573 * know what the anonymous user is.
3574 * Luckily, SECINFO is specified to take a full filename.
3575 * So what we will have to in rfs4_op_lookup is check that flavor of
3576 * the target object matches that of the request, and if root was the
3577 * caller, check for the root= and anon= options, and if necessary,
3578 * repeat the lookup using the right cred_t. But that's not done yet.
3579 */
3580 /* ARGSUSED */
3581 static void
3582 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3583 struct compound_state *cs)
3584 {
3585 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3586 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3587 nfs_fh4_fmt_t *fh_fmtp;
3588
3589 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3590 PUTFH4args *, args);
3591
3592 if (cs->vp) {
3593 VN_RELE(cs->vp);
3594 cs->vp = NULL;
3595 }
3596
3597 if (cs->cr) {
3598 crfree(cs->cr);
3599 cs->cr = NULL;
3600 }
3601
3602
3603 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3604 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3605 goto out;
3606 }
3607
3608 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3609 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3610 NULL);
3611
3612 if (cs->exi == NULL) {
3613 *cs->statusp = resp->status = NFS4ERR_STALE;
3614 goto out;
3615 }
3616
3617 cs->cr = crdup(cs->basecr);
3618
3619 ASSERT(cs->cr != NULL);
3620
3621 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3622 *cs->statusp = resp->status;
3623 goto out;
3624 }
3625
3626 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3627 VN_RELE(cs->vp);
3628 cs->vp = NULL;
3629 goto out;
3630 }
3631
3632 nfs_fh4_copy(&args->object, &cs->fh);
3633 *cs->statusp = resp->status = NFS4_OK;
3634 cs->deleg = FALSE;
3635
3636 out:
3637 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3638 PUTFH4res *, resp);
3639 }
3640
3641 /* ARGSUSED */
3642 static void
3643 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3644 struct compound_state *cs)
3645 {
3646 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3647 int error;
3648 fid_t fid;
3649 struct exportinfo *exi, *sav_exi;
3650
3651 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3652
3653 if (cs->vp) {
3654 VN_RELE(cs->vp);
3655 cs->vp = NULL;
3656 }
3657
3658 if (cs->cr)
3659 crfree(cs->cr);
3660
3661 cs->cr = crdup(cs->basecr);
3662
3663 /*
3664 * Using rootdir, the system root vnode,
3665 * get its fid.
3666 */
3667 bzero(&fid, sizeof (fid));
3668 fid.fid_len = MAXFIDSZ;
3669 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3670 if (error != 0) {
3671 *cs->statusp = resp->status = puterrno4(error);
3672 goto out;
3673 }
3674
3675 /*
3676 * Then use the root fsid & fid it to find out if it's exported
3677 *
3678 * If the server root isn't exported directly, then
3679 * it should at least be a pseudo export based on
3680 * one or more exports further down in the server's
3681 * file tree.
3682 */
3683 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3684 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3685 NFS4_DEBUG(rfs4_debug,
3686 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3687 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3688 goto out;
3689 }
3690
3691 /*
3692 * Now make a filehandle based on the root
3693 * export and root vnode.
3694 */
3695 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3696 if (error != 0) {
3697 *cs->statusp = resp->status = puterrno4(error);
3698 goto out;
3699 }
3700
3701 sav_exi = cs->exi;
3702 cs->exi = exi;
3703
3704 VN_HOLD(ZONE_ROOTVP());
3705 cs->vp = ZONE_ROOTVP();
3706
3707 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3708 VN_RELE(cs->vp);
3709 cs->vp = NULL;
3710 cs->exi = sav_exi;
3711 goto out;
3712 }
3713
3714 *cs->statusp = resp->status = NFS4_OK;
3715 cs->deleg = FALSE;
3716 out:
3717 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3718 PUTROOTFH4res *, resp);
3719 }
3720
3721 /*
3722 * readlink: args: CURRENT_FH.
3723 * res: status. If success - CURRENT_FH unchanged, return linktext.
3724 */
3725
3726 /* ARGSUSED */
3727 static void
3728 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3729 struct compound_state *cs)
3730 {
3731 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3732 int error;
3733 vnode_t *vp;
3734 struct iovec iov;
3735 struct vattr va;
3736 struct uio uio;
3737 char *data;
3738 struct sockaddr *ca;
3739 char *name = NULL;
3740 int is_referral;
3741
3742 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3743
3744 /* CURRENT_FH: directory */
3745 vp = cs->vp;
3746 if (vp == NULL) {
3747 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3748 goto out;
3749 }
3750
3751 if (cs->access == CS_ACCESS_DENIED) {
3752 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3753 goto out;
3754 }
3755
3756 /* Is it a referral? */
3757 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3758
3759 is_referral = 1;
3760
3761 } else {
3762
3763 is_referral = 0;
3764
3765 if (vp->v_type == VDIR) {
3766 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3767 goto out;
3768 }
3769
3770 if (vp->v_type != VLNK) {
3771 *cs->statusp = resp->status = NFS4ERR_INVAL;
3772 goto out;
3773 }
3774
3775 }
3776
3777 va.va_mask = AT_MODE;
3778 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3779 if (error) {
3780 *cs->statusp = resp->status = puterrno4(error);
3781 goto out;
3782 }
3783
3784 if (MANDLOCK(vp, va.va_mode)) {
3785 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3786 goto out;
3787 }
3788
3789 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3790
3791 if (is_referral) {
3792 char *s;
3793 size_t strsz;
3794
3795 /* Get an artificial symlink based on a referral */
3796 s = build_symlink(vp, cs->cr, &strsz);
3797 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3798 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3799 vnode_t *, vp, char *, s);
3800 if (s == NULL)
3801 error = EINVAL;
3802 else {
3803 error = 0;
3804 (void) strlcpy(data, s, MAXPATHLEN + 1);
3805 kmem_free(s, strsz);
3806 }
3807
3808 } else {
3809
3810 iov.iov_base = data;
3811 iov.iov_len = MAXPATHLEN;
3812 uio.uio_iov = &iov;
3813 uio.uio_iovcnt = 1;
3814 uio.uio_segflg = UIO_SYSSPACE;
3815 uio.uio_extflg = UIO_COPY_CACHED;
3816 uio.uio_loffset = 0;
3817 uio.uio_resid = MAXPATHLEN;
3818
3819 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3820
3821 if (!error)
3822 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3823 }
3824
3825 if (error) {
3826 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3827 *cs->statusp = resp->status = puterrno4(error);
3828 goto out;
3829 }
3830
3831 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3832 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3833 MAXPATHLEN + 1);
3834
3835 if (name == NULL) {
3836 /*
3837 * Even though the conversion failed, we return
3838 * something. We just don't translate it.
3839 */
3840 name = data;
3841 }
3842
3843 /*
3844 * treat link name as data
3845 */
3846 (void) str_to_utf8(name, (utf8string *)&resp->link);
3847
3848 if (name != data)
3849 kmem_free(name, MAXPATHLEN + 1);
3850 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3851 *cs->statusp = resp->status = NFS4_OK;
3852
3853 out:
3854 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3855 READLINK4res *, resp);
3856 }
3857
3858 static void
3859 rfs4_op_readlink_free(nfs_resop4 *resop)
3860 {
3861 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3862 utf8string *symlink = (utf8string *)&resp->link;
3863
3864 if (symlink->utf8string_val) {
3865 UTF8STRING_FREE(*symlink)
3866 }
3867 }
3868
3869 /*
3870 * release_lockowner:
3871 * Release any state associated with the supplied
3872 * lockowner. Note if any lo_state is holding locks we will not
3873 * rele that lo_state and thus the lockowner will not be destroyed.
3874 * A client using lock after the lock owner stateid has been released
3875 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3876 * to reissue the lock with new_lock_owner set to TRUE.
3877 * args: lock_owner
3878 * res: status
3879 */
3880 /* ARGSUSED */
3881 static void
3882 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3883 struct svc_req *req, struct compound_state *cs)
3884 {
3885 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3886 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3887 rfs4_lockowner_t *lo;
3888 rfs4_openowner_t *oo;
3889 rfs4_state_t *sp;
3890 rfs4_lo_state_t *lsp;
3891 rfs4_client_t *cp;
3892 bool_t create = FALSE;
3893 locklist_t *llist;
3894 sysid_t sysid;
3895
3896 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3897 cs, RELEASE_LOCKOWNER4args *, ap);
3898
3899 /* Make sure there is a clientid around for this request */
3900 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3901
3902 if (cp == NULL) {
3903 *cs->statusp = resp->status =
3904 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3905 goto out;
3906 }
3907 rfs4_client_rele(cp);
3908
3909 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3910 if (lo == NULL) {
3911 *cs->statusp = resp->status = NFS4_OK;
3912 goto out;
3913 }
3914 ASSERT(lo->rl_client != NULL);
3915
3916 /*
3917 * Check for EXPIRED client. If so will reap state with in a lease
3918 * period or on next set_clientid_confirm step
3919 */
3920 if (rfs4_lease_expired(lo->rl_client)) {
3921 rfs4_lockowner_rele(lo);
3922 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3923 goto out;
3924 }
3925
3926 /*
3927 * If no sysid has been assigned, then no locks exist; just return.
3928 */
3929 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3930 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3931 rfs4_lockowner_rele(lo);
3932 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3933 goto out;
3934 }
3935
3936 sysid = lo->rl_client->rc_sysidt;
3937 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3938
3939 /*
3940 * Mark the lockowner invalid.
3941 */
3942 rfs4_dbe_hide(lo->rl_dbe);
3943
3944 /*
3945 * sysid-pid pair should now not be used since the lockowner is
3946 * invalid. If the client were to instantiate the lockowner again
3947 * it would be assigned a new pid. Thus we can get the list of
3948 * current locks.
3949 */
3950
3951 llist = flk_get_active_locks(sysid, lo->rl_pid);
3952 /* If we are still holding locks fail */
3953 if (llist != NULL) {
3954
3955 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3956
3957 flk_free_locklist(llist);
3958 /*
3959 * We need to unhide the lockowner so the client can
3960 * try it again. The bad thing here is if the client
3961 * has a logic error that took it here in the first place
3962 * they probably have lost accounting of the locks that it
3963 * is holding. So we may have dangling state until the
3964 * open owner state is reaped via close. One scenario
3965 * that could possibly occur is that the client has
3966 * sent the unlock request(s) in separate threads
3967 * and has not waited for the replies before sending the
3968 * RELEASE_LOCKOWNER request. Presumably, it would expect
3969 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3970 * reissuing the request.
3971 */
3972 rfs4_dbe_unhide(lo->rl_dbe);
3973 rfs4_lockowner_rele(lo);
3974 goto out;
3975 }
3976
3977 /*
3978 * For the corresponding client we need to check each open
3979 * owner for any opens that have lockowner state associated
3980 * with this lockowner.
3981 */
3982
3983 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3984 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3985 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3986
3987 rfs4_dbe_lock(oo->ro_dbe);
3988 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3989 sp = list_next(&oo->ro_statelist, sp)) {
3990
3991 rfs4_dbe_lock(sp->rs_dbe);
3992 for (lsp = list_head(&sp->rs_lostatelist);
3993 lsp != NULL;
3994 lsp = list_next(&sp->rs_lostatelist, lsp)) {
3995 if (lsp->rls_locker == lo) {
3996 rfs4_dbe_lock(lsp->rls_dbe);
3997 rfs4_dbe_invalidate(lsp->rls_dbe);
3998 rfs4_dbe_unlock(lsp->rls_dbe);
3999 }
4000 }
4001 rfs4_dbe_unlock(sp->rs_dbe);
4002 }
4003 rfs4_dbe_unlock(oo->ro_dbe);
4004 }
4005 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4006
4007 rfs4_lockowner_rele(lo);
4008
4009 *cs->statusp = resp->status = NFS4_OK;
4010
4011 out:
4012 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4013 cs, RELEASE_LOCKOWNER4res *, resp);
4014 }
4015
4016 /*
4017 * short utility function to lookup a file and recall the delegation
4018 */
4019 static rfs4_file_t *
4020 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4021 int *lkup_error, cred_t *cr)
4022 {
4023 vnode_t *vp;
4024 rfs4_file_t *fp = NULL;
4025 bool_t fcreate = FALSE;
4026 int error;
4027
4028 if (vpp)
4029 *vpp = NULL;
4030
4031 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4032 NULL)) == 0) {
4033 if (vp->v_type == VREG)
4034 fp = rfs4_findfile(vp, NULL, &fcreate);
4035 if (vpp)
4036 *vpp = vp;
4037 else
4038 VN_RELE(vp);
4039 }
4040
4041 if (lkup_error)
4042 *lkup_error = error;
4043
4044 return (fp);
4045 }
4046
4047 /*
4048 * remove: args: CURRENT_FH: directory; name.
4049 * res: status. If success - CURRENT_FH unchanged, return change_info
4050 * for directory.
4051 */
4052 /* ARGSUSED */
4053 static void
4054 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4055 struct compound_state *cs)
4056 {
4057 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4058 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4059 int error;
4060 vnode_t *dvp, *vp;
4061 struct vattr bdva, idva, adva;
4062 char *nm;
4063 uint_t len;
4064 rfs4_file_t *fp;
4065 int in_crit = 0;
4066 bslabel_t *clabel;
4067 struct sockaddr *ca;
4068 char *name = NULL;
4069 nfsstat4 status;
4070
4071 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4072 REMOVE4args *, args);
4073
4074 /* CURRENT_FH: directory */
4075 dvp = cs->vp;
4076 if (dvp == NULL) {
4077 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4078 goto out;
4079 }
4080
4081 if (cs->access == CS_ACCESS_DENIED) {
4082 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4083 goto out;
4084 }
4085
4086 /*
4087 * If there is an unshared filesystem mounted on this vnode,
4088 * Do not allow to remove anything in this directory.
4089 */
4090 if (vn_ismntpt(dvp)) {
4091 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4092 goto out;
4093 }
4094
4095 if (dvp->v_type != VDIR) {
4096 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4097 goto out;
4098 }
4099
4100 status = utf8_dir_verify(&args->target);
4101 if (status != NFS4_OK) {
4102 *cs->statusp = resp->status = status;
4103 goto out;
4104 }
4105
4106 /*
4107 * Lookup the file so that we can check if it's a directory
4108 */
4109 nm = utf8_to_fn(&args->target, &len, NULL);
4110 if (nm == NULL) {
4111 *cs->statusp = resp->status = NFS4ERR_INVAL;
4112 goto out;
4113 }
4114
4115 if (len > MAXNAMELEN) {
4116 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4117 kmem_free(nm, len);
4118 goto out;
4119 }
4120
4121 if (rdonly4(req, cs)) {
4122 *cs->statusp = resp->status = NFS4ERR_ROFS;
4123 kmem_free(nm, len);
4124 goto out;
4125 }
4126
4127 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4128 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4129 MAXPATHLEN + 1);
4130
4131 if (name == NULL) {
4132 *cs->statusp = resp->status = NFS4ERR_INVAL;
4133 kmem_free(nm, len);
4134 goto out;
4135 }
4136
4137 /*
4138 * Lookup the file to determine type and while we are see if
4139 * there is a file struct around and check for delegation.
4140 * We don't need to acquire va_seq before this lookup, if
4141 * it causes an update, cinfo.before will not match, which will
4142 * trigger a cache flush even if atomic is TRUE.
4143 */
4144 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4145 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4146 NULL)) {
4147 VN_RELE(vp);
4148 rfs4_file_rele(fp);
4149 *cs->statusp = resp->status = NFS4ERR_DELAY;
4150 if (nm != name)
4151 kmem_free(name, MAXPATHLEN + 1);
4152 kmem_free(nm, len);
4153 goto out;
4154 }
4155 }
4156
4157 /* Didn't find anything to remove */
4158 if (vp == NULL) {
4159 *cs->statusp = resp->status = error;
4160 if (nm != name)
4161 kmem_free(name, MAXPATHLEN + 1);
4162 kmem_free(nm, len);
4163 goto out;
4164 }
4165
4166 if (nbl_need_check(vp)) {
4167 nbl_start_crit(vp, RW_READER);
4168 in_crit = 1;
4169 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4170 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4171 if (nm != name)
4172 kmem_free(name, MAXPATHLEN + 1);
4173 kmem_free(nm, len);
4174 nbl_end_crit(vp);
4175 VN_RELE(vp);
4176 if (fp) {
4177 rfs4_clear_dont_grant(fp);
4178 rfs4_file_rele(fp);
4179 }
4180 goto out;
4181 }
4182 }
4183
4184 /* check label before allowing removal */
4185 if (is_system_labeled()) {
4186 ASSERT(req->rq_label != NULL);
4187 clabel = req->rq_label;
4188 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4189 "got client label from request(1)",
4190 struct svc_req *, req);
4191 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4192 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4193 cs->exi)) {
4194 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4195 if (name != nm)
4196 kmem_free(name, MAXPATHLEN + 1);
4197 kmem_free(nm, len);
4198 if (in_crit)
4199 nbl_end_crit(vp);
4200 VN_RELE(vp);
4201 if (fp) {
4202 rfs4_clear_dont_grant(fp);
4203 rfs4_file_rele(fp);
4204 }
4205 goto out;
4206 }
4207 }
4208 }
4209
4210 /* Get dir "before" change value */
4211 bdva.va_mask = AT_CTIME|AT_SEQ;
4212 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4213 if (error) {
4214 *cs->statusp = resp->status = puterrno4(error);
4215 if (nm != name)
4216 kmem_free(name, MAXPATHLEN + 1);
4217 kmem_free(nm, len);
4218 if (in_crit)
4219 nbl_end_crit(vp);
4220 VN_RELE(vp);
4221 if (fp) {
4222 rfs4_clear_dont_grant(fp);
4223 rfs4_file_rele(fp);
4224 }
4225 goto out;
4226 }
4227 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4228
4229 /* Actually do the REMOVE operation */
4230 if (vp->v_type == VDIR) {
4231 /*
4232 * Can't remove a directory that has a mounted-on filesystem.
4233 */
4234 if (vn_ismntpt(vp)) {
4235 error = EACCES;
4236 } else {
4237 /*
4238 * System V defines rmdir to return EEXIST,
4239 * not ENOTEMPTY, if the directory is not
4240 * empty. A System V NFS server needs to map
4241 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4242 * transmit over the wire.
4243 */
4244 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4245 NULL, 0)) == EEXIST)
4246 error = ENOTEMPTY;
4247 }
4248 } else {
4249 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4250 fp != NULL) {
4251 struct vattr va;
4252 vnode_t *tvp;
4253
4254 rfs4_dbe_lock(fp->rf_dbe);
4255 tvp = fp->rf_vp;
4256 if (tvp)
4257 VN_HOLD(tvp);
4258 rfs4_dbe_unlock(fp->rf_dbe);
4259
4260 if (tvp) {
4261 /*
4262 * This is va_seq safe because we are not
4263 * manipulating dvp.
4264 */
4265 va.va_mask = AT_NLINK;
4266 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4267 va.va_nlink == 0) {
4268 /* Remove state on file remove */
4269 if (in_crit) {
4270 nbl_end_crit(vp);
4271 in_crit = 0;
4272 }
4273 rfs4_close_all_state(fp);
4274 }
4275 VN_RELE(tvp);
4276 }
4277 }
4278 }
4279
4280 if (in_crit)
4281 nbl_end_crit(vp);
4282 VN_RELE(vp);
4283
4284 if (fp) {
4285 rfs4_clear_dont_grant(fp);
4286 rfs4_file_rele(fp);
4287 }
4288 if (nm != name)
4289 kmem_free(name, MAXPATHLEN + 1);
4290 kmem_free(nm, len);
4291
4292 if (error) {
4293 *cs->statusp = resp->status = puterrno4(error);
4294 goto out;
4295 }
4296
4297 /*
4298 * Get the initial "after" sequence number, if it fails, set to zero
4299 */
4300 idva.va_mask = AT_SEQ;
4301 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4302 idva.va_seq = 0;
4303
4304 /*
4305 * Force modified data and metadata out to stable storage.
4306 */
4307 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4308
4309 /*
4310 * Get "after" change value, if it fails, simply return the
4311 * before value.
4312 */
4313 adva.va_mask = AT_CTIME|AT_SEQ;
4314 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4315 adva.va_ctime = bdva.va_ctime;
4316 adva.va_seq = 0;
4317 }
4318
4319 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4320
4321 /*
4322 * The cinfo.atomic = TRUE only if we have
4323 * non-zero va_seq's, and it has incremented by exactly one
4324 * during the VOP_REMOVE/RMDIR and it didn't change during
4325 * the VOP_FSYNC.
4326 */
4327 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4328 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4329 resp->cinfo.atomic = TRUE;
4330 else
4331 resp->cinfo.atomic = FALSE;
4332
4333 *cs->statusp = resp->status = NFS4_OK;
4334
4335 out:
4336 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4337 REMOVE4res *, resp);
4338 }
4339
4340 /*
4341 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4342 * oldname and newname.
4343 * res: status. If success - CURRENT_FH unchanged, return change_info
4344 * for both from and target directories.
4345 */
4346 /* ARGSUSED */
4347 static void
4348 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4349 struct compound_state *cs)
4350 {
4351 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4352 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4353 int error;
4354 vnode_t *odvp;
4355 vnode_t *ndvp;
4356 vnode_t *srcvp, *targvp, *tvp;
4357 struct vattr obdva, oidva, oadva;
4358 struct vattr nbdva, nidva, nadva;
4359 char *onm, *nnm;
4360 uint_t olen, nlen;
4361 rfs4_file_t *fp, *sfp;
4362 int in_crit_src, in_crit_targ;
4363 int fp_rele_grant_hold, sfp_rele_grant_hold;
4364 int unlinked;
4365 bslabel_t *clabel;
4366 struct sockaddr *ca;
4367 char *converted_onm = NULL;
4368 char *converted_nnm = NULL;
4369 nfsstat4 status;
4370
4371 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4372 RENAME4args *, args);
4373
4374 fp = sfp = NULL;
4375 srcvp = targvp = tvp = NULL;
4376 in_crit_src = in_crit_targ = 0;
4377 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4378 unlinked = 0;
4379
4380 /* CURRENT_FH: target directory */
4381 ndvp = cs->vp;
4382 if (ndvp == NULL) {
4383 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4384 goto out;
4385 }
4386
4387 /* SAVED_FH: from directory */
4388 odvp = cs->saved_vp;
4389 if (odvp == NULL) {
4390 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4391 goto out;
4392 }
4393
4394 if (cs->access == CS_ACCESS_DENIED) {
4395 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4396 goto out;
4397 }
4398
4399 /*
4400 * If there is an unshared filesystem mounted on this vnode,
4401 * do not allow to rename objects in this directory.
4402 */
4403 if (vn_ismntpt(odvp)) {
4404 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4405 goto out;
4406 }
4407
4408 /*
4409 * If there is an unshared filesystem mounted on this vnode,
4410 * do not allow to rename to this directory.
4411 */
4412 if (vn_ismntpt(ndvp)) {
4413 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414 goto out;
4415 }
4416
4417 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4418 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4419 goto out;
4420 }
4421
4422 if (cs->saved_exi != cs->exi) {
4423 *cs->statusp = resp->status = NFS4ERR_XDEV;
4424 goto out;
4425 }
4426
4427 status = utf8_dir_verify(&args->oldname);
4428 if (status != NFS4_OK) {
4429 *cs->statusp = resp->status = status;
4430 goto out;
4431 }
4432
4433 status = utf8_dir_verify(&args->newname);
4434 if (status != NFS4_OK) {
4435 *cs->statusp = resp->status = status;
4436 goto out;
4437 }
4438
4439 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4440 if (onm == NULL) {
4441 *cs->statusp = resp->status = NFS4ERR_INVAL;
4442 goto out;
4443 }
4444 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4445 nlen = MAXPATHLEN + 1;
4446 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4447 nlen);
4448
4449 if (converted_onm == NULL) {
4450 *cs->statusp = resp->status = NFS4ERR_INVAL;
4451 kmem_free(onm, olen);
4452 goto out;
4453 }
4454
4455 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4456 if (nnm == NULL) {
4457 *cs->statusp = resp->status = NFS4ERR_INVAL;
4458 if (onm != converted_onm)
4459 kmem_free(converted_onm, MAXPATHLEN + 1);
4460 kmem_free(onm, olen);
4461 goto out;
4462 }
4463 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4464 MAXPATHLEN + 1);
4465
4466 if (converted_nnm == NULL) {
4467 *cs->statusp = resp->status = NFS4ERR_INVAL;
4468 kmem_free(nnm, nlen);
4469 nnm = NULL;
4470 if (onm != converted_onm)
4471 kmem_free(converted_onm, MAXPATHLEN + 1);
4472 kmem_free(onm, olen);
4473 goto out;
4474 }
4475
4476
4477 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4478 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4479 kmem_free(onm, olen);
4480 kmem_free(nnm, nlen);
4481 goto out;
4482 }
4483
4484
4485 if (rdonly4(req, cs)) {
4486 *cs->statusp = resp->status = NFS4ERR_ROFS;
4487 if (onm != converted_onm)
4488 kmem_free(converted_onm, MAXPATHLEN + 1);
4489 kmem_free(onm, olen);
4490 if (nnm != converted_nnm)
4491 kmem_free(converted_nnm, MAXPATHLEN + 1);
4492 kmem_free(nnm, nlen);
4493 goto out;
4494 }
4495
4496 /* check label of the target dir */
4497 if (is_system_labeled()) {
4498 ASSERT(req->rq_label != NULL);
4499 clabel = req->rq_label;
4500 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4501 "got client label from request(1)",
4502 struct svc_req *, req);
4503 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4504 if (!do_rfs_label_check(clabel, ndvp,
4505 EQUALITY_CHECK, cs->exi)) {
4506 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4507 goto err_out;
4508 }
4509 }
4510 }
4511
4512 /*
4513 * Is the source a file and have a delegation?
4514 * We don't need to acquire va_seq before these lookups, if
4515 * it causes an update, cinfo.before will not match, which will
4516 * trigger a cache flush even if atomic is TRUE.
4517 */
4518 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4519 &error, cs->cr)) {
4520 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4521 NULL)) {
4522 *cs->statusp = resp->status = NFS4ERR_DELAY;
4523 goto err_out;
4524 }
4525 }
4526
4527 if (srcvp == NULL) {
4528 *cs->statusp = resp->status = puterrno4(error);
4529 if (onm != converted_onm)
4530 kmem_free(converted_onm, MAXPATHLEN + 1);
4531 kmem_free(onm, olen);
4532 if (nnm != converted_nnm)
4533 kmem_free(converted_nnm, MAXPATHLEN + 1);
4534 kmem_free(nnm, nlen);
4535 goto out;
4536 }
4537
4538 sfp_rele_grant_hold = 1;
4539
4540 /* Does the destination exist and a file and have a delegation? */
4541 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4542 NULL, cs->cr)) {
4543 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4544 NULL)) {
4545 *cs->statusp = resp->status = NFS4ERR_DELAY;
4546 goto err_out;
4547 }
4548 }
4549 fp_rele_grant_hold = 1;
4550
4551 /* Check for NBMAND lock on both source and target */
4552 if (nbl_need_check(srcvp)) {
4553 nbl_start_crit(srcvp, RW_READER);
4554 in_crit_src = 1;
4555 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4556 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4557 goto err_out;
4558 }
4559 }
4560
4561 if (targvp && nbl_need_check(targvp)) {
4562 nbl_start_crit(targvp, RW_READER);
4563 in_crit_targ = 1;
4564 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4565 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4566 goto err_out;
4567 }
4568 }
4569
4570 /* Get source "before" change value */
4571 obdva.va_mask = AT_CTIME|AT_SEQ;
4572 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4573 if (!error) {
4574 nbdva.va_mask = AT_CTIME|AT_SEQ;
4575 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4576 }
4577 if (error) {
4578 *cs->statusp = resp->status = puterrno4(error);
4579 goto err_out;
4580 }
4581
4582 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4583 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4584
4585 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4586 NULL, 0);
4587
4588 /*
4589 * If target existed and was unlinked by VOP_RENAME, state will need
4590 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4591 * any necessary nbl_end_crit on srcvp and tgtvp.
4592 */
4593 if (error == 0 && fp != NULL) {
4594 rfs4_dbe_lock(fp->rf_dbe);
4595 tvp = fp->rf_vp;
4596 if (tvp)
4597 VN_HOLD(tvp);
4598 rfs4_dbe_unlock(fp->rf_dbe);
4599
4600 if (tvp) {
4601 struct vattr va;
4602 va.va_mask = AT_NLINK;
4603
4604 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4605 va.va_nlink == 0) {
4606 unlinked = 1;
4607
4608 /* DEBUG data */
4609 if ((srcvp == targvp) || (tvp != targvp)) {
4610 cmn_err(CE_WARN, "rfs4_op_rename: "
4611 "srcvp %p, targvp: %p, tvp: %p",
4612 (void *)srcvp, (void *)targvp,
4613 (void *)tvp);
4614 }
4615 } else {
4616 VN_RELE(tvp);
4617 }
4618 }
4619 }
4620 if (error == 0)
4621 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4622
4623 if (in_crit_src)
4624 nbl_end_crit(srcvp);
4625 if (srcvp)
4626 VN_RELE(srcvp);
4627 if (in_crit_targ)
4628 nbl_end_crit(targvp);
4629 if (targvp)
4630 VN_RELE(targvp);
4631
4632 if (unlinked) {
4633 ASSERT(fp != NULL);
4634 ASSERT(tvp != NULL);
4635
4636 /* DEBUG data */
4637 if (RW_READ_HELD(&tvp->v_nbllock)) {
4638 cmn_err(CE_WARN, "rfs4_op_rename: "
4639 "RW_READ_HELD(%p)", (void *)tvp);
4640 }
4641
4642 /* The file is gone and so should the state */
4643 rfs4_close_all_state(fp);
4644 VN_RELE(tvp);
4645 }
4646
4647 if (sfp) {
4648 rfs4_clear_dont_grant(sfp);
4649 rfs4_file_rele(sfp);
4650 }
4651 if (fp) {
4652 rfs4_clear_dont_grant(fp);
4653 rfs4_file_rele(fp);
4654 }
4655
4656 if (converted_onm != onm)
4657 kmem_free(converted_onm, MAXPATHLEN + 1);
4658 kmem_free(onm, olen);
4659 if (converted_nnm != nnm)
4660 kmem_free(converted_nnm, MAXPATHLEN + 1);
4661 kmem_free(nnm, nlen);
4662
4663 /*
4664 * Get the initial "after" sequence number, if it fails, set to zero
4665 */
4666 oidva.va_mask = AT_SEQ;
4667 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4668 oidva.va_seq = 0;
4669
4670 nidva.va_mask = AT_SEQ;
4671 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4672 nidva.va_seq = 0;
4673
4674 /*
4675 * Force modified data and metadata out to stable storage.
4676 */
4677 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4678 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4679
4680 if (error) {
4681 *cs->statusp = resp->status = puterrno4(error);
4682 goto out;
4683 }
4684
4685 /*
4686 * Get "after" change values, if it fails, simply return the
4687 * before value.
4688 */
4689 oadva.va_mask = AT_CTIME|AT_SEQ;
4690 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4691 oadva.va_ctime = obdva.va_ctime;
4692 oadva.va_seq = 0;
4693 }
4694
4695 nadva.va_mask = AT_CTIME|AT_SEQ;
4696 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4697 nadva.va_ctime = nbdva.va_ctime;
4698 nadva.va_seq = 0;
4699 }
4700
4701 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4702 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4703
4704 /*
4705 * The cinfo.atomic = TRUE only if we have
4706 * non-zero va_seq's, and it has incremented by exactly one
4707 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4708 */
4709 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4710 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4711 resp->source_cinfo.atomic = TRUE;
4712 else
4713 resp->source_cinfo.atomic = FALSE;
4714
4715 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4716 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4717 resp->target_cinfo.atomic = TRUE;
4718 else
4719 resp->target_cinfo.atomic = FALSE;
4720
4721 #ifdef VOLATILE_FH_TEST
4722 {
4723 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4724
4725 /*
4726 * Add the renamed file handle to the volatile rename list
4727 */
4728 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4729 /* file handles may expire on rename */
4730 vnode_t *vp;
4731
4732 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4733 /*
4734 * Already know that nnm will be a valid string
4735 */
4736 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4737 NULL, NULL, NULL);
4738 kmem_free(nnm, nlen);
4739 if (!error) {
4740 add_volrnm_fh(cs->exi, vp);
4741 VN_RELE(vp);
4742 }
4743 }
4744 }
4745 #endif /* VOLATILE_FH_TEST */
4746
4747 *cs->statusp = resp->status = NFS4_OK;
4748 out:
4749 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4750 RENAME4res *, resp);
4751 return;
4752
4753 err_out:
4754 if (onm != converted_onm)
4755 kmem_free(converted_onm, MAXPATHLEN + 1);
4756 if (onm != NULL)
4757 kmem_free(onm, olen);
4758 if (nnm != converted_nnm)
4759 kmem_free(converted_nnm, MAXPATHLEN + 1);
4760 if (nnm != NULL)
4761 kmem_free(nnm, nlen);
4762
4763 if (in_crit_src) nbl_end_crit(srcvp);
4764 if (in_crit_targ) nbl_end_crit(targvp);
4765 if (targvp) VN_RELE(targvp);
4766 if (srcvp) VN_RELE(srcvp);
4767 if (sfp) {
4768 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4769 rfs4_file_rele(sfp);
4770 }
4771 if (fp) {
4772 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4773 rfs4_file_rele(fp);
4774 }
4775
4776 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4777 RENAME4res *, resp);
4778 }
4779
4780 /* ARGSUSED */
4781 static void
4782 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4783 struct compound_state *cs)
4784 {
4785 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4786 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4787 rfs4_client_t *cp;
4788
4789 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4790 RENEW4args *, args);
4791
4792 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4793 *cs->statusp = resp->status =
4794 rfs4_check_clientid(&args->clientid, 0);
4795 goto out;
4796 }
4797
4798 if (rfs4_lease_expired(cp)) {
4799 rfs4_client_rele(cp);
4800 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4801 goto out;
4802 }
4803
4804 rfs4_update_lease(cp);
4805
4806 mutex_enter(cp->rc_cbinfo.cb_lock);
4807 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4808 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4809 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4810 } else {
4811 *cs->statusp = resp->status = NFS4_OK;
4812 }
4813 mutex_exit(cp->rc_cbinfo.cb_lock);
4814
4815 rfs4_client_rele(cp);
4816
4817 out:
4818 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4819 RENEW4res *, resp);
4820 }
4821
4822 /* ARGSUSED */
4823 static void
4824 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4825 struct compound_state *cs)
4826 {
4827 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4828
4829 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4830
4831 /* No need to check cs->access - we are not accessing any object */
4832 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4833 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4834 goto out;
4835 }
4836 if (cs->vp != NULL) {
4837 VN_RELE(cs->vp);
4838 }
4839 cs->vp = cs->saved_vp;
4840 cs->saved_vp = NULL;
4841 cs->exi = cs->saved_exi;
4842 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4843 *cs->statusp = resp->status = NFS4_OK;
4844 cs->deleg = FALSE;
4845
4846 out:
4847 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4848 RESTOREFH4res *, resp);
4849 }
4850
4851 /* ARGSUSED */
4852 static void
4853 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4854 struct compound_state *cs)
4855 {
4856 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4857
4858 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4859
4860 /* No need to check cs->access - we are not accessing any object */
4861 if (cs->vp == NULL) {
4862 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4863 goto out;
4864 }
4865 if (cs->saved_vp != NULL) {
4866 VN_RELE(cs->saved_vp);
4867 }
4868 cs->saved_vp = cs->vp;
4869 VN_HOLD(cs->saved_vp);
4870 cs->saved_exi = cs->exi;
4871 /*
4872 * since SAVEFH is fairly rare, don't alloc space for its fh
4873 * unless necessary.
4874 */
4875 if (cs->saved_fh.nfs_fh4_val == NULL) {
4876 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4877 }
4878 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4879 *cs->statusp = resp->status = NFS4_OK;
4880
4881 out:
4882 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4883 SAVEFH4res *, resp);
4884 }
4885
4886 /*
4887 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4888 * return the bitmap of attrs that were set successfully. It is also
4889 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4890 * always be called only after rfs4_do_set_attrs().
4891 *
4892 * Verify that the attributes are same as the expected ones. sargp->vap
4893 * and sargp->sbp contain the input attributes as translated from fattr4.
4894 *
4895 * This function verifies only the attrs that correspond to a vattr or
4896 * vfsstat struct. That is because of the extra step needed to get the
4897 * corresponding system structs. Other attributes have already been set or
4898 * verified by do_rfs4_set_attrs.
4899 *
4900 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4901 */
4902 static int
4903 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4904 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4905 {
4906 int error, ret_error = 0;
4907 int i, k;
4908 uint_t sva_mask = sargp->vap->va_mask;
4909 uint_t vbit;
4910 union nfs4_attr_u *na;
4911 uint8_t *amap;
4912 bool_t getsb = ntovp->vfsstat;
4913
4914 if (sva_mask != 0) {
4915 /*
4916 * Okay to overwrite sargp->vap because we verify based
4917 * on the incoming values.
4918 */
4919 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4920 sargp->cs->cr, NULL);
4921 if (ret_error) {
4922 if (resp == NULL)
4923 return (ret_error);
4924 /*
4925 * Must return bitmap of successful attrs
4926 */
4927 sva_mask = 0; /* to prevent checking vap later */
4928 } else {
4929 /*
4930 * Some file systems clobber va_mask. it is probably
4931 * wrong of them to do so, nonethless we practice
4932 * defensive coding.
4933 * See bug id 4276830.
4934 */
4935 sargp->vap->va_mask = sva_mask;
4936 }
4937 }
4938
4939 if (getsb) {
4940 /*
4941 * Now get the superblock and loop on the bitmap, as there is
4942 * no simple way of translating from superblock to bitmap4.
4943 */
4944 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4945 if (ret_error) {
4946 if (resp == NULL)
4947 goto errout;
4948 getsb = FALSE;
4949 }
4950 }
4951
4952 /*
4953 * Now loop and verify each attribute which getattr returned
4954 * whether it's the same as the input.
4955 */
4956 if (resp == NULL && !getsb && (sva_mask == 0))
4957 goto errout;
4958
4959 na = ntovp->na;
4960 amap = ntovp->amap;
4961 k = 0;
4962 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4963 k = *amap;
4964 ASSERT(nfs4_ntov_map[k].nval == k);
4965 vbit = nfs4_ntov_map[k].vbit;
4966
4967 /*
4968 * If vattr attribute but VOP_GETATTR failed, or it's
4969 * superblock attribute but VFS_STATVFS failed, skip
4970 */
4971 if (vbit) {
4972 if ((vbit & sva_mask) == 0)
4973 continue;
4974 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4975 continue;
4976 }
4977 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4978 if (resp != NULL) {
4979 if (error)
4980 ret_error = -1; /* not all match */
4981 else /* update response bitmap */
4982 *resp |= nfs4_ntov_map[k].fbit;
4983 continue;
4984 }
4985 if (error) {
4986 ret_error = -1; /* not all match */
4987 break;
4988 }
4989 }
4990 errout:
4991 return (ret_error);
4992 }
4993
4994 /*
4995 * Decode the attribute to be set/verified. If the attr requires a sys op
4996 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4997 * call the sv_getit function for it, because the sys op hasn't yet been done.
4998 * Return 0 for success, error code if failed.
4999 *
5000 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5001 */
5002 static int
5003 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5004 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5005 {
5006 int error = 0;
5007 bool_t set_later;
5008
5009 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5010
5011 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5012 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5013 /*
5014 * don't verify yet if a vattr or sb dependent attr,
5015 * because we don't have their sys values yet.
5016 * Will be done later.
5017 */
5018 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5019 /*
5020 * ACLs are a special case, since setting the MODE
5021 * conflicts with setting the ACL. We delay setting
5022 * the ACL until all other attributes have been set.
5023 * The ACL gets set in do_rfs4_op_setattr().
5024 */
5025 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5026 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5027 sargp, nap);
5028 if (error) {
5029 xdr_free(nfs4_ntov_map[k].xfunc,
5030 (caddr_t)nap);
5031 }
5032 }
5033 }
5034 } else {
5035 #ifdef DEBUG
5036 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5037 "decoding attribute %d\n", k);
5038 #endif
5039 error = EINVAL;
5040 }
5041 if (!error && resp_bval && !set_later) {
5042 *resp_bval |= nfs4_ntov_map[k].fbit;
5043 }
5044
5045 return (error);
5046 }
5047
5048 /*
5049 * Set vattr based on incoming fattr4 attrs - used by setattr.
5050 * Set response mask. Ignore any values that are not writable vattr attrs.
5051 */
5052 static nfsstat4
5053 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5054 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5055 nfs4_attr_cmd_t cmd)
5056 {
5057 int error = 0;
5058 int i;
5059 char *attrs = fattrp->attrlist4;
5060 uint32_t attrslen = fattrp->attrlist4_len;
5061 XDR xdr;
5062 nfsstat4 status = NFS4_OK;
5063 vnode_t *vp = cs->vp;
5064 union nfs4_attr_u *na;
5065 uint8_t *amap;
5066
5067 #ifndef lint
5068 /*
5069 * Make sure that maximum attribute number can be expressed as an
5070 * 8 bit quantity.
5071 */
5072 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5073 #endif
5074
5075 if (vp == NULL) {
5076 if (resp)
5077 *resp = 0;
5078 return (NFS4ERR_NOFILEHANDLE);
5079 }
5080 if (cs->access == CS_ACCESS_DENIED) {
5081 if (resp)
5082 *resp = 0;
5083 return (NFS4ERR_ACCESS);
5084 }
5085
5086 sargp->op = cmd;
5087 sargp->cs = cs;
5088 sargp->flag = 0; /* may be set later */
5089 sargp->vap->va_mask = 0;
5090 sargp->rdattr_error = NFS4_OK;
5091 sargp->rdattr_error_req = FALSE;
5092 /* sargp->sbp is set by the caller */
5093
5094 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5095
5096 na = ntovp->na;
5097 amap = ntovp->amap;
5098
5099 /*
5100 * The following loop iterates on the nfs4_ntov_map checking
5101 * if the fbit is set in the requested bitmap.
5102 * If set then we process the arguments using the
5103 * rfs4_fattr4 conversion functions to populate the setattr
5104 * vattr and va_mask. Any settable attrs that are not using vattr
5105 * will be set in this loop.
5106 */
5107 for (i = 0; i < nfs4_ntov_map_size; i++) {
5108 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5109 continue;
5110 }
5111 /*
5112 * If setattr, must be a writable attr.
5113 * If verify/nverify, must be a readable attr.
5114 */
5115 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5116 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5117 /*
5118 * Client tries to set/verify an
5119 * unsupported attribute, tries to set
5120 * a read only attr or verify a write
5121 * only one - error!
5122 */
5123 break;
5124 }
5125 /*
5126 * Decode the attribute to set/verify
5127 */
5128 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5129 &xdr, resp ? resp : NULL, na);
5130 if (error)
5131 break;
5132 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5133 na++;
5134 (ntovp->attrcnt)++;
5135 if (nfs4_ntov_map[i].vfsstat)
5136 ntovp->vfsstat = TRUE;
5137 }
5138
5139 if (error != 0)
5140 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5141 puterrno4(error));
5142 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5143 return (status);
5144 }
5145
5146 static nfsstat4
5147 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5148 stateid4 *stateid)
5149 {
5150 int error = 0;
5151 struct nfs4_svgetit_arg sarg;
5152 bool_t trunc;
5153
5154 nfsstat4 status = NFS4_OK;
5155 cred_t *cr = cs->cr;
5156 vnode_t *vp = cs->vp;
5157 struct nfs4_ntov_table ntov;
5158 struct statvfs64 sb;
5159 struct vattr bva;
5160 struct flock64 bf;
5161 int in_crit = 0;
5162 uint_t saved_mask = 0;
5163 caller_context_t ct;
5164
5165 *resp = 0;
5166 sarg.sbp = &sb;
5167 sarg.is_referral = B_FALSE;
5168 nfs4_ntov_table_init(&ntov);
5169 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5170 NFS4ATTR_SETIT);
5171 if (status != NFS4_OK) {
5172 /*
5173 * failed set attrs
5174 */
5175 goto done;
5176 }
5177 if ((sarg.vap->va_mask == 0) &&
5178 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5179 /*
5180 * no further work to be done
5181 */
5182 goto done;
5183 }
5184
5185 /*
5186 * If we got a request to set the ACL and the MODE, only
5187 * allow changing VSUID, VSGID, and VSVTX. Attempting
5188 * to change any other bits, along with setting an ACL,
5189 * gives NFS4ERR_INVAL.
5190 */
5191 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5192 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5193 vattr_t va;
5194
5195 va.va_mask = AT_MODE;
5196 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5197 if (error) {
5198 status = puterrno4(error);
5199 goto done;
5200 }
5201 if ((sarg.vap->va_mode ^ va.va_mode) &
5202 ~(VSUID | VSGID | VSVTX)) {
5203 status = NFS4ERR_INVAL;
5204 goto done;
5205 }
5206 }
5207
5208 /* Check stateid only if size has been set */
5209 if (sarg.vap->va_mask & AT_SIZE) {
5210 trunc = (sarg.vap->va_size == 0);
5211 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5212 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5213 if (status != NFS4_OK)
5214 goto done;
5215 } else {
5216 ct.cc_sysid = 0;
5217 ct.cc_pid = 0;
5218 ct.cc_caller_id = nfs4_srv_caller_id;
5219 ct.cc_flags = CC_DONTBLOCK;
5220 }
5221
5222 /* XXX start of possible race with delegations */
5223
5224 /*
5225 * We need to specially handle size changes because it is
5226 * possible for the client to create a file with read-only
5227 * modes, but with the file opened for writing. If the client
5228 * then tries to set the file size, e.g. ftruncate(3C),
5229 * fcntl(F_FREESP), the normal access checking done in
5230 * VOP_SETATTR would prevent the client from doing it even though
5231 * it should be allowed to do so. To get around this, we do the
5232 * access checking for ourselves and use VOP_SPACE which doesn't
5233 * do the access checking.
5234 * Also the client should not be allowed to change the file
5235 * size if there is a conflicting non-blocking mandatory lock in
5236 * the region of the change.
5237 */
5238 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5239 u_offset_t offset;
5240 ssize_t length;
5241
5242 /*
5243 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5244 * before returning, sarg.vap->va_mask is used to
5245 * generate the setattr reply bitmap. We also clear
5246 * AT_SIZE below before calling VOP_SPACE. For both
5247 * of these cases, the va_mask needs to be saved here
5248 * and restored after calling VOP_SETATTR.
5249 */
5250 saved_mask = sarg.vap->va_mask;
5251
5252 /*
5253 * Check any possible conflict due to NBMAND locks.
5254 * Get into critical region before VOP_GETATTR, so the
5255 * size attribute is valid when checking conflicts.
5256 */
5257 if (nbl_need_check(vp)) {
5258 nbl_start_crit(vp, RW_READER);
5259 in_crit = 1;
5260 }
5261
5262 bva.va_mask = AT_UID|AT_SIZE;
5263 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5264 status = puterrno4(error);
5265 goto done;
5266 }
5267
5268 if (in_crit) {
5269 if (sarg.vap->va_size < bva.va_size) {
5270 offset = sarg.vap->va_size;
5271 length = bva.va_size - sarg.vap->va_size;
5272 } else {
5273 offset = bva.va_size;
5274 length = sarg.vap->va_size - bva.va_size;
5275 }
5276 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5277 &ct)) {
5278 status = NFS4ERR_LOCKED;
5279 goto done;
5280 }
5281 }
5282
5283 if (crgetuid(cr) == bva.va_uid) {
5284 sarg.vap->va_mask &= ~AT_SIZE;
5285 bf.l_type = F_WRLCK;
5286 bf.l_whence = 0;
5287 bf.l_start = (off64_t)sarg.vap->va_size;
5288 bf.l_len = 0;
5289 bf.l_sysid = 0;
5290 bf.l_pid = 0;
5291 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5292 (offset_t)sarg.vap->va_size, cr, &ct);
5293 }
5294 }
5295
5296 if (!error && sarg.vap->va_mask != 0)
5297 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5298
5299 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5300 if (saved_mask & AT_SIZE)
5301 sarg.vap->va_mask |= AT_SIZE;
5302
5303 /*
5304 * If an ACL was being set, it has been delayed until now,
5305 * in order to set the mode (via the VOP_SETATTR() above) first.
5306 */
5307 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5308 int i;
5309
5310 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5311 if (ntov.amap[i] == FATTR4_ACL)
5312 break;
5313 if (i < NFS4_MAXNUM_ATTRS) {
5314 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5315 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5316 if (error == 0) {
5317 *resp |= FATTR4_ACL_MASK;
5318 } else if (error == ENOTSUP) {
5319 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5320 status = NFS4ERR_ATTRNOTSUPP;
5321 goto done;
5322 }
5323 } else {
5324 NFS4_DEBUG(rfs4_debug,
5325 (CE_NOTE, "do_rfs4_op_setattr: "
5326 "unable to find ACL in fattr4"));
5327 error = EINVAL;
5328 }
5329 }
5330
5331 if (error) {
5332 /* check if a monitor detected a delegation conflict */
5333 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5334 status = NFS4ERR_DELAY;
5335 else
5336 status = puterrno4(error);
5337
5338 /*
5339 * Set the response bitmap when setattr failed.
5340 * If VOP_SETATTR partially succeeded, test by doing a
5341 * VOP_GETATTR on the object and comparing the data
5342 * to the setattr arguments.
5343 */
5344 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5345 } else {
5346 /*
5347 * Force modified metadata out to stable storage.
5348 */
5349 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5350 /*
5351 * Set response bitmap
5352 */
5353 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5354 }
5355
5356 /* Return early and already have a NFSv4 error */
5357 done:
5358 /*
5359 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5360 * conversion sets both readable and writeable NFS4 attrs
5361 * for AT_MTIME and AT_ATIME. The line below masks out
5362 * unrequested attrs from the setattr result bitmap. This
5363 * is placed after the done: label to catch the ATTRNOTSUP
5364 * case.
5365 */
5366 *resp &= fattrp->attrmask;
5367
5368 if (in_crit)
5369 nbl_end_crit(vp);
5370
5371 nfs4_ntov_table_free(&ntov, &sarg);
5372
5373 return (status);
5374 }
5375
5376 /* ARGSUSED */
5377 static void
5378 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5379 struct compound_state *cs)
5380 {
5381 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5382 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5383 bslabel_t *clabel;
5384
5385 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5386 SETATTR4args *, args);
5387
5388 if (cs->vp == NULL) {
5389 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5390 goto out;
5391 }
5392
5393 /*
5394 * If there is an unshared filesystem mounted on this vnode,
5395 * do not allow to setattr on this vnode.
5396 */
5397 if (vn_ismntpt(cs->vp)) {
5398 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5399 goto out;
5400 }
5401
5402 resp->attrsset = 0;
5403
5404 if (rdonly4(req, cs)) {
5405 *cs->statusp = resp->status = NFS4ERR_ROFS;
5406 goto out;
5407 }
5408
5409 /* check label before setting attributes */
5410 if (is_system_labeled()) {
5411 ASSERT(req->rq_label != NULL);
5412 clabel = req->rq_label;
5413 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5414 "got client label from request(1)",
5415 struct svc_req *, req);
5416 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5417 if (!do_rfs_label_check(clabel, cs->vp,
5418 EQUALITY_CHECK, cs->exi)) {
5419 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5420 goto out;
5421 }
5422 }
5423 }
5424
5425 *cs->statusp = resp->status =
5426 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5427 &args->stateid);
5428
5429 out:
5430 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5431 SETATTR4res *, resp);
5432 }
5433
5434 /* ARGSUSED */
5435 static void
5436 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5437 struct compound_state *cs)
5438 {
5439 /*
5440 * verify and nverify are exactly the same, except that nverify
5441 * succeeds when some argument changed, and verify succeeds when
5442 * when none changed.
5443 */
5444
5445 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5446 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5447
5448 int error;
5449 struct nfs4_svgetit_arg sarg;
5450 struct statvfs64 sb;
5451 struct nfs4_ntov_table ntov;
5452
5453 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5454 VERIFY4args *, args);
5455
5456 if (cs->vp == NULL) {
5457 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5458 goto out;
5459 }
5460
5461 sarg.sbp = &sb;
5462 sarg.is_referral = B_FALSE;
5463 nfs4_ntov_table_init(&ntov);
5464 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5465 &sarg, &ntov, NFS4ATTR_VERIT);
5466 if (resp->status != NFS4_OK) {
5467 /*
5468 * do_rfs4_set_attrs will try to verify systemwide attrs,
5469 * so could return -1 for "no match".
5470 */
5471 if (resp->status == -1)
5472 resp->status = NFS4ERR_NOT_SAME;
5473 goto done;
5474 }
5475 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5476 switch (error) {
5477 case 0:
5478 resp->status = NFS4_OK;
5479 break;
5480 case -1:
5481 resp->status = NFS4ERR_NOT_SAME;
5482 break;
5483 default:
5484 resp->status = puterrno4(error);
5485 break;
5486 }
5487 done:
5488 *cs->statusp = resp->status;
5489 nfs4_ntov_table_free(&ntov, &sarg);
5490 out:
5491 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5492 VERIFY4res *, resp);
5493 }
5494
5495 /* ARGSUSED */
5496 static void
5497 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5498 struct compound_state *cs)
5499 {
5500 /*
5501 * verify and nverify are exactly the same, except that nverify
5502 * succeeds when some argument changed, and verify succeeds when
5503 * when none changed.
5504 */
5505
5506 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5507 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5508
5509 int error;
5510 struct nfs4_svgetit_arg sarg;
5511 struct statvfs64 sb;
5512 struct nfs4_ntov_table ntov;
5513
5514 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5515 NVERIFY4args *, args);
5516
5517 if (cs->vp == NULL) {
5518 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5519 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5520 NVERIFY4res *, resp);
5521 return;
5522 }
5523 sarg.sbp = &sb;
5524 sarg.is_referral = B_FALSE;
5525 nfs4_ntov_table_init(&ntov);
5526 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5527 &sarg, &ntov, NFS4ATTR_VERIT);
5528 if (resp->status != NFS4_OK) {
5529 /*
5530 * do_rfs4_set_attrs will try to verify systemwide attrs,
5531 * so could return -1 for "no match".
5532 */
5533 if (resp->status == -1)
5534 resp->status = NFS4_OK;
5535 goto done;
5536 }
5537 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5538 switch (error) {
5539 case 0:
5540 resp->status = NFS4ERR_SAME;
5541 break;
5542 case -1:
5543 resp->status = NFS4_OK;
5544 break;
5545 default:
5546 resp->status = puterrno4(error);
5547 break;
5548 }
5549 done:
5550 *cs->statusp = resp->status;
5551 nfs4_ntov_table_free(&ntov, &sarg);
5552
5553 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5554 NVERIFY4res *, resp);
5555 }
5556
5557 /*
5558 * XXX - This should live in an NFS header file.
5559 */
5560 #define MAX_IOVECS 12
5561
5562 /* ARGSUSED */
5563 static void
5564 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5565 struct compound_state *cs)
5566 {
5567 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5568 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5569 int error;
5570 vnode_t *vp;
5571 struct vattr bva;
5572 u_offset_t rlimit;
5573 struct uio uio;
5574 struct iovec iov[MAX_IOVECS];
5575 struct iovec *iovp;
5576 int iovcnt;
5577 int ioflag;
5578 cred_t *savecred, *cr;
5579 bool_t *deleg = &cs->deleg;
5580 nfsstat4 stat;
5581 int in_crit = 0;
5582 caller_context_t ct;
5583 nfs4_srv_t *nsrv4;
5584
5585 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5586 WRITE4args *, args);
5587
5588 vp = cs->vp;
5589 if (vp == NULL) {
5590 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5591 goto out;
5592 }
5593 if (cs->access == CS_ACCESS_DENIED) {
5594 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5595 goto out;
5596 }
5597
5598 cr = cs->cr;
5599
5600 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5601 deleg, TRUE, &ct)) != NFS4_OK) {
5602 *cs->statusp = resp->status = stat;
5603 goto out;
5604 }
5605
5606 /*
5607 * We have to enter the critical region before calling VOP_RWLOCK
5608 * to avoid a deadlock with ufs.
5609 */
5610 if (nbl_need_check(vp)) {
5611 nbl_start_crit(vp, RW_READER);
5612 in_crit = 1;
5613 if (nbl_conflict(vp, NBL_WRITE,
5614 args->offset, args->data_len, 0, &ct)) {
5615 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5616 goto out;
5617 }
5618 }
5619
5620 bva.va_mask = AT_MODE | AT_UID;
5621 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5622
5623 /*
5624 * If we can't get the attributes, then we can't do the
5625 * right access checking. So, we'll fail the request.
5626 */
5627 if (error) {
5628 *cs->statusp = resp->status = puterrno4(error);
5629 goto out;
5630 }
5631
5632 if (rdonly4(req, cs)) {
5633 *cs->statusp = resp->status = NFS4ERR_ROFS;
5634 goto out;
5635 }
5636
5637 if (vp->v_type != VREG) {
5638 *cs->statusp = resp->status =
5639 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5640 goto out;
5641 }
5642
5643 if (crgetuid(cr) != bva.va_uid &&
5644 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5645 *cs->statusp = resp->status = puterrno4(error);
5646 goto out;
5647 }
5648
5649 if (MANDLOCK(vp, bva.va_mode)) {
5650 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5651 goto out;
5652 }
5653
5654 nsrv4 = nfs4_get_srv();
5655 if (args->data_len == 0) {
5656 *cs->statusp = resp->status = NFS4_OK;
5657 resp->count = 0;
5658 resp->committed = args->stable;
5659 resp->writeverf = nsrv4->write4verf;
5660 goto out;
5661 }
5662
5663 if (args->mblk != NULL) {
5664 mblk_t *m;
5665 uint_t bytes, round_len;
5666
5667 iovcnt = 0;
5668 bytes = 0;
5669 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5670 for (m = args->mblk;
5671 m != NULL && bytes < round_len;
5672 m = m->b_cont) {
5673 iovcnt++;
5674 bytes += MBLKL(m);
5675 }
5676 #ifdef DEBUG
5677 /* should have ended on an mblk boundary */
5678 if (bytes != round_len) {
5679 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5680 bytes, round_len, args->data_len);
5681 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5682 (void *)args->mblk, (void *)m);
5683 ASSERT(bytes == round_len);
5684 }
5685 #endif
5686 if (iovcnt <= MAX_IOVECS) {
5687 iovp = iov;
5688 } else {
5689 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5690 }
5691 mblk_to_iov(args->mblk, iovcnt, iovp);
5692 } else if (args->rlist != NULL) {
5693 iovcnt = 1;
5694 iovp = iov;
5695 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5696 iovp->iov_len = args->data_len;
5697 } else {
5698 iovcnt = 1;
5699 iovp = iov;
5700 iovp->iov_base = args->data_val;
5701 iovp->iov_len = args->data_len;
5702 }
5703
5704 uio.uio_iov = iovp;
5705 uio.uio_iovcnt = iovcnt;
5706
5707 uio.uio_segflg = UIO_SYSSPACE;
5708 uio.uio_extflg = UIO_COPY_DEFAULT;
5709 uio.uio_loffset = args->offset;
5710 uio.uio_resid = args->data_len;
5711 uio.uio_llimit = curproc->p_fsz_ctl;
5712 rlimit = uio.uio_llimit - args->offset;
5713 if (rlimit < (u_offset_t)uio.uio_resid)
5714 uio.uio_resid = (int)rlimit;
5715
5716 if (args->stable == UNSTABLE4)
5717 ioflag = 0;
5718 else if (args->stable == FILE_SYNC4)
5719 ioflag = FSYNC;
5720 else if (args->stable == DATA_SYNC4)
5721 ioflag = FDSYNC;
5722 else {
5723 if (iovp != iov)
5724 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5725 *cs->statusp = resp->status = NFS4ERR_INVAL;
5726 goto out;
5727 }
5728
5729 /*
5730 * We're changing creds because VM may fault and we need
5731 * the cred of the current thread to be used if quota
5732 * checking is enabled.
5733 */
5734 savecred = curthread->t_cred;
5735 curthread->t_cred = cr;
5736 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5737 curthread->t_cred = savecred;
5738
5739 if (iovp != iov)
5740 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5741
5742 if (error) {
5743 *cs->statusp = resp->status = puterrno4(error);
5744 goto out;
5745 }
5746
5747 *cs->statusp = resp->status = NFS4_OK;
5748 resp->count = args->data_len - uio.uio_resid;
5749
5750 if (ioflag == 0)
5751 resp->committed = UNSTABLE4;
5752 else
5753 resp->committed = FILE_SYNC4;
5754
5755 resp->writeverf = nsrv4->write4verf;
5756
5757 out:
5758 if (in_crit)
5759 nbl_end_crit(vp);
5760
5761 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5762 WRITE4res *, resp);
5763 }
5764
5765
5766 /* XXX put in a header file */
5767 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5768
5769 void
5770 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5771 struct svc_req *req, cred_t *cr, int *rv)
5772 {
5773 uint_t i;
5774 struct compound_state cs;
5775 nfs4_srv_t *nsrv4;
5776 nfs_export_t *ne = nfs_get_export();
5777
5778 if (rv != NULL)
5779 *rv = 0;
5780 rfs4_init_compound_state(&cs);
5781 /*
5782 * Form a reply tag by copying over the reqeuest tag.
5783 */
5784 resp->tag.utf8string_val =
5785 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5786 resp->tag.utf8string_len = args->tag.utf8string_len;
5787 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5788 resp->tag.utf8string_len);
5789
5790 cs.statusp = &resp->status;
5791 cs.req = req;
5792 resp->array = NULL;
5793 resp->array_len = 0;
5794
5795 /*
5796 * XXX for now, minorversion should be zero
5797 */
5798 if (args->minorversion != NFS4_MINORVERSION) {
5799 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5800 &cs, COMPOUND4args *, args);
5801 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5802 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5803 &cs, COMPOUND4res *, resp);
5804 return;
5805 }
5806
5807 if (args->array_len == 0) {
5808 resp->status = NFS4_OK;
5809 return;
5810 }
5811
5812 ASSERT(exi == NULL);
5813 ASSERT(cr == NULL);
5814
5815 cr = crget();
5816 ASSERT(cr != NULL);
5817
5818 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5819 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5820 &cs, COMPOUND4args *, args);
5821 crfree(cr);
5822 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5823 &cs, COMPOUND4res *, resp);
5824 svcerr_badcred(req->rq_xprt);
5825 if (rv != NULL)
5826 *rv = 1;
5827 return;
5828 }
5829 resp->array_len = args->array_len;
5830 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5831 KM_SLEEP);
5832
5833 cs.basecr = cr;
5834 nsrv4 = nfs4_get_srv();
5835
5836 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5837 COMPOUND4args *, args);
5838
5839 /*
5840 * For now, NFS4 compound processing must be protected by
5841 * exported_lock because it can access more than one exportinfo
5842 * per compound and share/unshare can now change multiple
5843 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5844 * per proc (excluding public exinfo), and exi_count design
5845 * is sufficient to protect concurrent execution of NFS2/3
5846 * ops along with unexport. This lock will be removed as
5847 * part of the NFSv4 phase 2 namespace redesign work.
5848 */
5849 rw_enter(&ne->exported_lock, RW_READER);
5850
5851 /*
5852 * If this is the first compound we've seen, we need to start all
5853 * new instances' grace periods.
5854 */
5855 if (nsrv4->seen_first_compound == 0) {
5856 rfs4_grace_start_new(nsrv4);
5857 /*
5858 * This must be set after rfs4_grace_start_new(), otherwise
5859 * another thread could proceed past here before the former
5860 * is finished.
5861 */
5862 nsrv4->seen_first_compound = 1;
5863 }
5864
5865 for (i = 0; i < args->array_len && cs.cont; i++) {
5866 nfs_argop4 *argop;
5867 nfs_resop4 *resop;
5868 uint_t op;
5869
5870 argop = &args->array[i];
5871 resop = &resp->array[i];
5872 resop->resop = argop->argop;
5873 op = (uint_t)resop->resop;
5874
5875 if (op < rfsv4disp_cnt) {
5876 /*
5877 * Count the individual ops here; NULL and COMPOUND
5878 * are counted in common_dispatch()
5879 */
5880 rfsproccnt_v4_ptr[op].value.ui64++;
5881
5882 NFS4_DEBUG(rfs4_debug > 1,
5883 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5884 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5885 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5886 rfs4_op_string[op], *cs.statusp));
5887 if (*cs.statusp != NFS4_OK)
5888 cs.cont = FALSE;
5889 } else {
5890 /*
5891 * This is effectively dead code since XDR code
5892 * will have already returned BADXDR if op doesn't
5893 * decode to legal value. This only done for a
5894 * day when XDR code doesn't verify v4 opcodes.
5895 */
5896 op = OP_ILLEGAL;
5897 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5898
5899 rfs4_op_illegal(argop, resop, req, &cs);
5900 cs.cont = FALSE;
5901 }
5902
5903 /*
5904 * If not at last op, and if we are to stop, then
5905 * compact the results array.
5906 */
5907 if ((i + 1) < args->array_len && !cs.cont) {
5908 nfs_resop4 *new_res = kmem_alloc(
5909 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5910 bcopy(resp->array,
5911 new_res, (i+1) * sizeof (nfs_resop4));
5912 kmem_free(resp->array,
5913 args->array_len * sizeof (nfs_resop4));
5914
5915 resp->array_len = i + 1;
5916 resp->array = new_res;
5917 }
5918 }
5919
5920 rw_exit(&ne->exported_lock);
5921
5922 /*
5923 * clear exportinfo and vnode fields from compound_state before dtrace
5924 * probe, to avoid tracing residual values for path and share path.
5925 */
5926 if (cs.vp)
5927 VN_RELE(cs.vp);
5928 if (cs.saved_vp)
5929 VN_RELE(cs.saved_vp);
5930 cs.exi = cs.saved_exi = NULL;
5931 cs.vp = cs.saved_vp = NULL;
5932
5933 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5934 COMPOUND4res *, resp);
5935
5936 if (cs.saved_fh.nfs_fh4_val)
5937 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5938
5939 if (cs.basecr)
5940 crfree(cs.basecr);
5941 if (cs.cr)
5942 crfree(cs.cr);
5943 /*
5944 * done with this compound request, free the label
5945 */
5946
5947 if (req->rq_label != NULL) {
5948 kmem_free(req->rq_label, sizeof (bslabel_t));
5949 req->rq_label = NULL;
5950 }
5951 }
5952
5953 /*
5954 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5955 * XXX zero out the tag and array values. Need to investigate why the
5956 * XXX calls occur, but at least prevent the panic for now.
5957 */
5958 void
5959 rfs4_compound_free(COMPOUND4res *resp)
5960 {
5961 uint_t i;
5962
5963 if (resp->tag.utf8string_val) {
5964 UTF8STRING_FREE(resp->tag)
5965 }
5966
5967 for (i = 0; i < resp->array_len; i++) {
5968 nfs_resop4 *resop;
5969 uint_t op;
5970
5971 resop = &resp->array[i];
5972 op = (uint_t)resop->resop;
5973 if (op < rfsv4disp_cnt) {
5974 (*rfsv4disptab[op].dis_resfree)(resop);
5975 }
5976 }
5977 if (resp->array != NULL) {
5978 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5979 }
5980 }
5981
5982 /*
5983 * Process the value of the compound request rpc flags, as a bit-AND
5984 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5985 */
5986 void
5987 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5988 {
5989 int i;
5990 int flag = RPC_ALL;
5991
5992 for (i = 0; flag && i < args->array_len; i++) {
5993 uint_t op;
5994
5995 op = (uint_t)args->array[i].argop;
5996
5997 if (op < rfsv4disp_cnt)
5998 flag &= rfsv4disptab[op].dis_flags;
5999 else
6000 flag = 0;
6001 }
6002 *flagp = flag;
6003 }
6004
6005 nfsstat4
6006 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6007 {
6008 nfsstat4 e;
6009
6010 rfs4_dbe_lock(cp->rc_dbe);
6011
6012 if (cp->rc_sysidt != LM_NOSYSID) {
6013 *sp = cp->rc_sysidt;
6014 e = NFS4_OK;
6015
6016 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6017 *sp = cp->rc_sysidt;
6018 e = NFS4_OK;
6019
6020 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6021 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6022 } else
6023 e = NFS4ERR_DELAY;
6024
6025 rfs4_dbe_unlock(cp->rc_dbe);
6026 return (e);
6027 }
6028
6029 #if defined(DEBUG) && ! defined(lint)
6030 static void lock_print(char *str, int operation, struct flock64 *flk)
6031 {
6032 char *op, *type;
6033
6034 switch (operation) {
6035 case F_GETLK: op = "F_GETLK";
6036 break;
6037 case F_SETLK: op = "F_SETLK";
6038 break;
6039 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6040 break;
6041 default: op = "F_UNKNOWN";
6042 break;
6043 }
6044 switch (flk->l_type) {
6045 case F_UNLCK: type = "F_UNLCK";
6046 break;
6047 case F_RDLCK: type = "F_RDLCK";
6048 break;
6049 case F_WRLCK: type = "F_WRLCK";
6050 break;
6051 default: type = "F_UNKNOWN";
6052 break;
6053 }
6054
6055 ASSERT(flk->l_whence == 0);
6056 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6057 str, op, type, (longlong_t)flk->l_start,
6058 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6059 }
6060
6061 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6062 #else
6063 #define LOCK_PRINT(d, s, t, f)
6064 #endif
6065
6066 /*ARGSUSED*/
6067 static bool_t
6068 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6069 {
6070 return (TRUE);
6071 }
6072
6073 /*
6074 * Look up the pathname using the vp in cs as the directory vnode.
6075 * cs->vp will be the vnode for the file on success
6076 */
6077
6078 static nfsstat4
6079 rfs4_lookup(component4 *component, struct svc_req *req,
6080 struct compound_state *cs)
6081 {
6082 char *nm;
6083 uint32_t len;
6084 nfsstat4 status;
6085 struct sockaddr *ca;
6086 char *name;
6087
6088 if (cs->vp == NULL) {
6089 return (NFS4ERR_NOFILEHANDLE);
6090 }
6091 if (cs->vp->v_type != VDIR) {
6092 return (NFS4ERR_NOTDIR);
6093 }
6094
6095 status = utf8_dir_verify(component);
6096 if (status != NFS4_OK)
6097 return (status);
6098
6099 nm = utf8_to_fn(component, &len, NULL);
6100 if (nm == NULL) {
6101 return (NFS4ERR_INVAL);
6102 }
6103
6104 if (len > MAXNAMELEN) {
6105 kmem_free(nm, len);
6106 return (NFS4ERR_NAMETOOLONG);
6107 }
6108
6109 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6110 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6111 MAXPATHLEN + 1);
6112
6113 if (name == NULL) {
6114 kmem_free(nm, len);
6115 return (NFS4ERR_INVAL);
6116 }
6117
6118 status = do_rfs4_op_lookup(name, req, cs);
6119
6120 if (name != nm)
6121 kmem_free(name, MAXPATHLEN + 1);
6122
6123 kmem_free(nm, len);
6124
6125 return (status);
6126 }
6127
6128 static nfsstat4
6129 rfs4_lookupfile(component4 *component, struct svc_req *req,
6130 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6131 {
6132 nfsstat4 status;
6133 vnode_t *dvp = cs->vp;
6134 vattr_t bva, ava, fva;
6135 int error;
6136
6137 /* Get "before" change value */
6138 bva.va_mask = AT_CTIME|AT_SEQ;
6139 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6140 if (error)
6141 return (puterrno4(error));
6142
6143 /* rfs4_lookup may VN_RELE directory */
6144 VN_HOLD(dvp);
6145
6146 status = rfs4_lookup(component, req, cs);
6147 if (status != NFS4_OK) {
6148 VN_RELE(dvp);
6149 return (status);
6150 }
6151
6152 /*
6153 * Get "after" change value, if it fails, simply return the
6154 * before value.
6155 */
6156 ava.va_mask = AT_CTIME|AT_SEQ;
6157 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6158 ava.va_ctime = bva.va_ctime;
6159 ava.va_seq = 0;
6160 }
6161 VN_RELE(dvp);
6162
6163 /*
6164 * Validate the file is a file
6165 */
6166 fva.va_mask = AT_TYPE|AT_MODE;
6167 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6168 if (error)
6169 return (puterrno4(error));
6170
6171 if (fva.va_type != VREG) {
6172 if (fva.va_type == VDIR)
6173 return (NFS4ERR_ISDIR);
6174 if (fva.va_type == VLNK)
6175 return (NFS4ERR_SYMLINK);
6176 return (NFS4ERR_INVAL);
6177 }
6178
6179 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6180 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6181
6182 /*
6183 * It is undefined if VOP_LOOKUP will change va_seq, so
6184 * cinfo.atomic = TRUE only if we have
6185 * non-zero va_seq's, and they have not changed.
6186 */
6187 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6188 cinfo->atomic = TRUE;
6189 else
6190 cinfo->atomic = FALSE;
6191
6192 /* Check for mandatory locking */
6193 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6194 return (check_open_access(access, cs, req));
6195 }
6196
6197 static nfsstat4
6198 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6199 cred_t *cr, vnode_t **vpp, bool_t *created)
6200 {
6201 int error;
6202 nfsstat4 status = NFS4_OK;
6203 vattr_t va;
6204
6205 tryagain:
6206
6207 /*
6208 * The file open mode used is VWRITE. If the client needs
6209 * some other semantic, then it should do the access checking
6210 * itself. It would have been nice to have the file open mode
6211 * passed as part of the arguments.
6212 */
6213
6214 *created = TRUE;
6215 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6216
6217 if (error) {
6218 *created = FALSE;
6219
6220 /*
6221 * If we got something other than file already exists
6222 * then just return this error. Otherwise, we got
6223 * EEXIST. If we were doing a GUARDED create, then
6224 * just return this error. Otherwise, we need to
6225 * make sure that this wasn't a duplicate of an
6226 * exclusive create request.
6227 *
6228 * The assumption is made that a non-exclusive create
6229 * request will never return EEXIST.
6230 */
6231
6232 if (error != EEXIST || mode == GUARDED4) {
6233 status = puterrno4(error);
6234 return (status);
6235 }
6236 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6237 NULL, NULL, NULL);
6238
6239 if (error) {
6240 /*
6241 * We couldn't find the file that we thought that
6242 * we just created. So, we'll just try creating
6243 * it again.
6244 */
6245 if (error == ENOENT)
6246 goto tryagain;
6247
6248 status = puterrno4(error);
6249 return (status);
6250 }
6251
6252 if (mode == UNCHECKED4) {
6253 /* existing object must be regular file */
6254 if ((*vpp)->v_type != VREG) {
6255 if ((*vpp)->v_type == VDIR)
6256 status = NFS4ERR_ISDIR;
6257 else if ((*vpp)->v_type == VLNK)
6258 status = NFS4ERR_SYMLINK;
6259 else
6260 status = NFS4ERR_INVAL;
6261 VN_RELE(*vpp);
6262 return (status);
6263 }
6264
6265 return (NFS4_OK);
6266 }
6267
6268 /* Check for duplicate request */
6269 va.va_mask = AT_MTIME;
6270 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6271 if (!error) {
6272 /* We found the file */
6273 const timestruc_t *mtime = &vap->va_mtime;
6274
6275 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6276 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6277 /* but its not our creation */
6278 VN_RELE(*vpp);
6279 return (NFS4ERR_EXIST);
6280 }
6281 *created = TRUE; /* retrans of create == created */
6282 return (NFS4_OK);
6283 }
6284 VN_RELE(*vpp);
6285 return (NFS4ERR_EXIST);
6286 }
6287
6288 return (NFS4_OK);
6289 }
6290
6291 static nfsstat4
6292 check_open_access(uint32_t access, struct compound_state *cs,
6293 struct svc_req *req)
6294 {
6295 int error;
6296 vnode_t *vp;
6297 bool_t readonly;
6298 cred_t *cr = cs->cr;
6299
6300 /* For now we don't allow mandatory locking as per V2/V3 */
6301 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6302 return (NFS4ERR_ACCESS);
6303 }
6304
6305 vp = cs->vp;
6306 ASSERT(cr != NULL && vp->v_type == VREG);
6307
6308 /*
6309 * If the file system is exported read only and we are trying
6310 * to open for write, then return NFS4ERR_ROFS
6311 */
6312
6313 readonly = rdonly4(req, cs);
6314
6315 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6316 return (NFS4ERR_ROFS);
6317
6318 if (access & OPEN4_SHARE_ACCESS_READ) {
6319 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6320 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6321 return (NFS4ERR_ACCESS);
6322 }
6323 }
6324
6325 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6326 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6327 if (error)
6328 return (NFS4ERR_ACCESS);
6329 }
6330
6331 return (NFS4_OK);
6332 }
6333
6334 static nfsstat4
6335 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6336 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6337 {
6338 struct nfs4_svgetit_arg sarg;
6339 struct nfs4_ntov_table ntov;
6340
6341 bool_t ntov_table_init = FALSE;
6342 struct statvfs64 sb;
6343 nfsstat4 status;
6344 vnode_t *vp;
6345 vattr_t bva, ava, iva, cva, *vap;
6346 vnode_t *dvp;
6347 timespec32_t *mtime;
6348 char *nm = NULL;
6349 uint_t buflen;
6350 bool_t created;
6351 bool_t setsize = FALSE;
6352 len_t reqsize;
6353 int error;
6354 bool_t trunc;
6355 caller_context_t ct;
6356 component4 *component;
6357 bslabel_t *clabel;
6358 struct sockaddr *ca;
6359 char *name = NULL;
6360
6361 sarg.sbp = &sb;
6362 sarg.is_referral = B_FALSE;
6363
6364 dvp = cs->vp;
6365
6366 /* Check if the file system is read only */
6367 if (rdonly4(req, cs))
6368 return (NFS4ERR_ROFS);
6369
6370 /* check the label of including directory */
6371 if (is_system_labeled()) {
6372 ASSERT(req->rq_label != NULL);
6373 clabel = req->rq_label;
6374 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6375 "got client label from request(1)",
6376 struct svc_req *, req);
6377 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6378 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6379 cs->exi)) {
6380 return (NFS4ERR_ACCESS);
6381 }
6382 }
6383 }
6384
6385 /*
6386 * Get the last component of path name in nm. cs will reference
6387 * the including directory on success.
6388 */
6389 component = &args->open_claim4_u.file;
6390 status = utf8_dir_verify(component);
6391 if (status != NFS4_OK)
6392 return (status);
6393
6394 nm = utf8_to_fn(component, &buflen, NULL);
6395
6396 if (nm == NULL)
6397 return (NFS4ERR_RESOURCE);
6398
6399 if (buflen > MAXNAMELEN) {
6400 kmem_free(nm, buflen);
6401 return (NFS4ERR_NAMETOOLONG);
6402 }
6403
6404 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6405 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6406 if (error) {
6407 kmem_free(nm, buflen);
6408 return (puterrno4(error));
6409 }
6410
6411 if (bva.va_type != VDIR) {
6412 kmem_free(nm, buflen);
6413 return (NFS4ERR_NOTDIR);
6414 }
6415
6416 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6417
6418 switch (args->mode) {
6419 case GUARDED4:
6420 /*FALLTHROUGH*/
6421 case UNCHECKED4:
6422 nfs4_ntov_table_init(&ntov);
6423 ntov_table_init = TRUE;
6424
6425 *attrset = 0;
6426 status = do_rfs4_set_attrs(attrset,
6427 &args->createhow4_u.createattrs,
6428 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6429
6430 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6431 sarg.vap->va_type != VREG) {
6432 if (sarg.vap->va_type == VDIR)
6433 status = NFS4ERR_ISDIR;
6434 else if (sarg.vap->va_type == VLNK)
6435 status = NFS4ERR_SYMLINK;
6436 else
6437 status = NFS4ERR_INVAL;
6438 }
6439
6440 if (status != NFS4_OK) {
6441 kmem_free(nm, buflen);
6442 nfs4_ntov_table_free(&ntov, &sarg);
6443 *attrset = 0;
6444 return (status);
6445 }
6446
6447 vap = sarg.vap;
6448 vap->va_type = VREG;
6449 vap->va_mask |= AT_TYPE;
6450
6451 if ((vap->va_mask & AT_MODE) == 0) {
6452 vap->va_mask |= AT_MODE;
6453 vap->va_mode = (mode_t)0600;
6454 }
6455
6456 if (vap->va_mask & AT_SIZE) {
6457
6458 /* Disallow create with a non-zero size */
6459
6460 if ((reqsize = sarg.vap->va_size) != 0) {
6461 kmem_free(nm, buflen);
6462 nfs4_ntov_table_free(&ntov, &sarg);
6463 *attrset = 0;
6464 return (NFS4ERR_INVAL);
6465 }
6466 setsize = TRUE;
6467 }
6468 break;
6469
6470 case EXCLUSIVE4:
6471 /* prohibit EXCL create of named attributes */
6472 if (dvp->v_flag & V_XATTRDIR) {
6473 kmem_free(nm, buflen);
6474 *attrset = 0;
6475 return (NFS4ERR_INVAL);
6476 }
6477
6478 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6479 cva.va_type = VREG;
6480 /*
6481 * Ensure no time overflows. Assumes underlying
6482 * filesystem supports at least 32 bits.
6483 * Truncate nsec to usec resolution to allow valid
6484 * compares even if the underlying filesystem truncates.
6485 */
6486 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6487 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6488 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6489 cva.va_mode = (mode_t)0;
6490 vap = &cva;
6491
6492 /*
6493 * For EXCL create, attrset is set to the server attr
6494 * used to cache the client's verifier.
6495 */
6496 *attrset = FATTR4_TIME_MODIFY_MASK;
6497 break;
6498 }
6499
6500 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6501 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6502 MAXPATHLEN + 1);
6503
6504 if (name == NULL) {
6505 kmem_free(nm, buflen);
6506 return (NFS4ERR_SERVERFAULT);
6507 }
6508
6509 status = create_vnode(dvp, name, vap, args->mode,
6510 cs->cr, &vp, &created);
6511 if (nm != name)
6512 kmem_free(name, MAXPATHLEN + 1);
6513 kmem_free(nm, buflen);
6514
6515 if (status != NFS4_OK) {
6516 if (ntov_table_init)
6517 nfs4_ntov_table_free(&ntov, &sarg);
6518 *attrset = 0;
6519 return (status);
6520 }
6521
6522 trunc = (setsize && !created);
6523
6524 if (args->mode != EXCLUSIVE4) {
6525 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6526
6527 /*
6528 * True verification that object was created with correct
6529 * attrs is impossible. The attrs could have been changed
6530 * immediately after object creation. If attributes did
6531 * not verify, the only recourse for the server is to
6532 * destroy the object. Maybe if some attrs (like gid)
6533 * are set incorrectly, the object should be destroyed;
6534 * however, seems bad as a default policy. Do we really
6535 * want to destroy an object over one of the times not
6536 * verifying correctly? For these reasons, the server
6537 * currently sets bits in attrset for createattrs
6538 * that were set; however, no verification is done.
6539 *
6540 * vmask_to_nmask accounts for vattr bits set on create
6541 * [do_rfs4_set_attrs() only sets resp bits for
6542 * non-vattr/vfs bits.]
6543 * Mask off any bits we set by default so as not to return
6544 * more attrset bits than were requested in createattrs
6545 */
6546 if (created) {
6547 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6548 *attrset &= createmask;
6549 } else {
6550 /*
6551 * We did not create the vnode (we tried but it
6552 * already existed). In this case, the only createattr
6553 * that the spec allows the server to set is size,
6554 * and even then, it can only be set if it is 0.
6555 */
6556 *attrset = 0;
6557 if (trunc)
6558 *attrset = FATTR4_SIZE_MASK;
6559 }
6560 }
6561 if (ntov_table_init)
6562 nfs4_ntov_table_free(&ntov, &sarg);
6563
6564 /*
6565 * Get the initial "after" sequence number, if it fails,
6566 * set to zero, time to before.
6567 */
6568 iva.va_mask = AT_CTIME|AT_SEQ;
6569 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6570 iva.va_seq = 0;
6571 iva.va_ctime = bva.va_ctime;
6572 }
6573
6574 /*
6575 * create_vnode attempts to create the file exclusive,
6576 * if it already exists the VOP_CREATE will fail and
6577 * may not increase va_seq. It is atomic if
6578 * we haven't changed the directory, but if it has changed
6579 * we don't know what changed it.
6580 */
6581 if (!created) {
6582 if (bva.va_seq && iva.va_seq &&
6583 bva.va_seq == iva.va_seq)
6584 cinfo->atomic = TRUE;
6585 else
6586 cinfo->atomic = FALSE;
6587 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6588 } else {
6589 /*
6590 * The entry was created, we need to sync the
6591 * directory metadata.
6592 */
6593 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6594
6595 /*
6596 * Get "after" change value, if it fails, simply return the
6597 * before value.
6598 */
6599 ava.va_mask = AT_CTIME|AT_SEQ;
6600 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6601 ava.va_ctime = bva.va_ctime;
6602 ava.va_seq = 0;
6603 }
6604
6605 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6606
6607 /*
6608 * The cinfo->atomic = TRUE only if we have
6609 * non-zero va_seq's, and it has incremented by exactly one
6610 * during the create_vnode and it didn't
6611 * change during the VOP_FSYNC.
6612 */
6613 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6614 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6615 cinfo->atomic = TRUE;
6616 else
6617 cinfo->atomic = FALSE;
6618 }
6619
6620 /* Check for mandatory locking and that the size gets set. */
6621 cva.va_mask = AT_MODE;
6622 if (setsize)
6623 cva.va_mask |= AT_SIZE;
6624
6625 /* Assume the worst */
6626 cs->mandlock = TRUE;
6627
6628 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6629 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6630
6631 /*
6632 * Truncate the file if necessary; this would be
6633 * the case for create over an existing file.
6634 */
6635
6636 if (trunc) {
6637 int in_crit = 0;
6638 rfs4_file_t *fp;
6639 nfs4_srv_t *nsrv4;
6640 bool_t create = FALSE;
6641
6642 /*
6643 * We are writing over an existing file.
6644 * Check to see if we need to recall a delegation.
6645 */
6646 nsrv4 = nfs4_get_srv();
6647 rfs4_hold_deleg_policy(nsrv4);
6648 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6649 if (rfs4_check_delegated_byfp(FWRITE, fp,
6650 (reqsize == 0), FALSE, FALSE, &clientid)) {
6651 rfs4_file_rele(fp);
6652 rfs4_rele_deleg_policy(nsrv4);
6653 VN_RELE(vp);
6654 *attrset = 0;
6655 return (NFS4ERR_DELAY);
6656 }
6657 rfs4_file_rele(fp);
6658 }
6659 rfs4_rele_deleg_policy(nsrv4);
6660
6661 if (nbl_need_check(vp)) {
6662 in_crit = 1;
6663
6664 ASSERT(reqsize == 0);
6665
6666 nbl_start_crit(vp, RW_READER);
6667 if (nbl_conflict(vp, NBL_WRITE, 0,
6668 cva.va_size, 0, NULL)) {
6669 in_crit = 0;
6670 nbl_end_crit(vp);
6671 VN_RELE(vp);
6672 *attrset = 0;
6673 return (NFS4ERR_ACCESS);
6674 }
6675 }
6676 ct.cc_sysid = 0;
6677 ct.cc_pid = 0;
6678 ct.cc_caller_id = nfs4_srv_caller_id;
6679 ct.cc_flags = CC_DONTBLOCK;
6680
6681 cva.va_mask = AT_SIZE;
6682 cva.va_size = reqsize;
6683 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6684 if (in_crit)
6685 nbl_end_crit(vp);
6686 }
6687 }
6688
6689 error = makefh4(&cs->fh, vp, cs->exi);
6690
6691 /*
6692 * Force modified data and metadata out to stable storage.
6693 */
6694 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6695
6696 if (error) {
6697 VN_RELE(vp);
6698 *attrset = 0;
6699 return (puterrno4(error));
6700 }
6701
6702 /* if parent dir is attrdir, set namedattr fh flag */
6703 if (dvp->v_flag & V_XATTRDIR)
6704 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6705
6706 if (cs->vp)
6707 VN_RELE(cs->vp);
6708
6709 cs->vp = vp;
6710
6711 /*
6712 * if we did not create the file, we will need to check
6713 * the access bits on the file
6714 */
6715
6716 if (!created) {
6717 if (setsize)
6718 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6719 status = check_open_access(args->share_access, cs, req);
6720 if (status != NFS4_OK)
6721 *attrset = 0;
6722 }
6723 return (status);
6724 }
6725
6726 /*ARGSUSED*/
6727 static void
6728 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6729 rfs4_openowner_t *oo, delegreq_t deleg,
6730 uint32_t access, uint32_t deny,
6731 OPEN4res *resp, int deleg_cur)
6732 {
6733 /* XXX Currently not using req */
6734 rfs4_state_t *sp;
6735 rfs4_file_t *fp;
6736 bool_t screate = TRUE;
6737 bool_t fcreate = TRUE;
6738 uint32_t open_a, share_a;
6739 uint32_t open_d, share_d;
6740 rfs4_deleg_state_t *dsp;
6741 sysid_t sysid;
6742 nfsstat4 status;
6743 caller_context_t ct;
6744 int fflags = 0;
6745 int recall = 0;
6746 int err;
6747 int first_open;
6748
6749 /* get the file struct and hold a lock on it during initial open */
6750 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6751 if (fp == NULL) {
6752 resp->status = NFS4ERR_RESOURCE;
6753 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6754 return;
6755 }
6756
6757 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6758 if (sp == NULL) {
6759 resp->status = NFS4ERR_RESOURCE;
6760 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6761 /* No need to keep any reference */
6762 rw_exit(&fp->rf_file_rwlock);
6763 rfs4_file_rele(fp);
6764 return;
6765 }
6766
6767 /* try to get the sysid before continuing */
6768 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6769 resp->status = status;
6770 rfs4_file_rele(fp);
6771 /* Not a fully formed open; "close" it */
6772 if (screate == TRUE)
6773 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6774 rfs4_state_rele(sp);
6775 return;
6776 }
6777
6778 /* Calculate the fflags for this OPEN. */
6779 if (access & OPEN4_SHARE_ACCESS_READ)
6780 fflags |= FREAD;
6781 if (access & OPEN4_SHARE_ACCESS_WRITE)
6782 fflags |= FWRITE;
6783
6784 rfs4_dbe_lock(sp->rs_dbe);
6785
6786 /*
6787 * Calculate the new deny and access mode that this open is adding to
6788 * the file for this open owner;
6789 */
6790 open_d = (deny & ~sp->rs_open_deny);
6791 open_a = (access & ~sp->rs_open_access);
6792
6793 /*
6794 * Calculate the new share access and share deny modes that this open
6795 * is adding to the file for this open owner;
6796 */
6797 share_a = (access & ~sp->rs_share_access);
6798 share_d = (deny & ~sp->rs_share_deny);
6799
6800 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6801
6802 /*
6803 * Check to see the client has already sent an open for this
6804 * open owner on this file with the same share/deny modes.
6805 * If so, we don't need to check for a conflict and we don't
6806 * need to add another shrlock. If not, then we need to
6807 * check for conflicts in deny and access before checking for
6808 * conflicts in delegation. We don't want to recall a
6809 * delegation based on an open that will eventually fail based
6810 * on shares modes.
6811 */
6812
6813 if (share_a || share_d) {
6814 if ((err = rfs4_share(sp, access, deny)) != 0) {
6815 rfs4_dbe_unlock(sp->rs_dbe);
6816 resp->status = err;
6817
6818 rfs4_file_rele(fp);
6819 /* Not a fully formed open; "close" it */
6820 if (screate == TRUE)
6821 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6822 rfs4_state_rele(sp);
6823 return;
6824 }
6825 }
6826
6827 rfs4_dbe_lock(fp->rf_dbe);
6828
6829 /*
6830 * Check to see if this file is delegated and if so, if a
6831 * recall needs to be done.
6832 */
6833 if (rfs4_check_recall(sp, access)) {
6834 rfs4_dbe_unlock(fp->rf_dbe);
6835 rfs4_dbe_unlock(sp->rs_dbe);
6836 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6837 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6838 rfs4_dbe_lock(sp->rs_dbe);
6839
6840 /* if state closed while lock was dropped */
6841 if (sp->rs_closed) {
6842 if (share_a || share_d)
6843 (void) rfs4_unshare(sp);
6844 rfs4_dbe_unlock(sp->rs_dbe);
6845 rfs4_file_rele(fp);
6846 /* Not a fully formed open; "close" it */
6847 if (screate == TRUE)
6848 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6849 rfs4_state_rele(sp);
6850 resp->status = NFS4ERR_OLD_STATEID;
6851 return;
6852 }
6853
6854 rfs4_dbe_lock(fp->rf_dbe);
6855 /* Let's see if the delegation was returned */
6856 if (rfs4_check_recall(sp, access)) {
6857 rfs4_dbe_unlock(fp->rf_dbe);
6858 if (share_a || share_d)
6859 (void) rfs4_unshare(sp);
6860 rfs4_dbe_unlock(sp->rs_dbe);
6861 rfs4_file_rele(fp);
6862 rfs4_update_lease(sp->rs_owner->ro_client);
6863
6864 /* Not a fully formed open; "close" it */
6865 if (screate == TRUE)
6866 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867 rfs4_state_rele(sp);
6868 resp->status = NFS4ERR_DELAY;
6869 return;
6870 }
6871 }
6872 /*
6873 * the share check passed and any delegation conflict has been
6874 * taken care of, now call vop_open.
6875 * if this is the first open then call vop_open with fflags.
6876 * if not, call vn_open_upgrade with just the upgrade flags.
6877 *
6878 * if the file has been opened already, it will have the current
6879 * access mode in the state struct. if it has no share access, then
6880 * this is a new open.
6881 *
6882 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6883 * call VOP_OPEN(), just do the open upgrade.
6884 */
6885 if (first_open && !deleg_cur) {
6886 ct.cc_sysid = sysid;
6887 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6888 ct.cc_caller_id = nfs4_srv_caller_id;
6889 ct.cc_flags = CC_DONTBLOCK;
6890 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6891 if (err) {
6892 rfs4_dbe_unlock(fp->rf_dbe);
6893 if (share_a || share_d)
6894 (void) rfs4_unshare(sp);
6895 rfs4_dbe_unlock(sp->rs_dbe);
6896 rfs4_file_rele(fp);
6897
6898 /* Not a fully formed open; "close" it */
6899 if (screate == TRUE)
6900 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6901 rfs4_state_rele(sp);
6902 /* check if a monitor detected a delegation conflict */
6903 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6904 resp->status = NFS4ERR_DELAY;
6905 else
6906 resp->status = NFS4ERR_SERVERFAULT;
6907 return;
6908 }
6909 } else { /* open upgrade */
6910 /*
6911 * calculate the fflags for the new mode that is being added
6912 * by this upgrade.
6913 */
6914 fflags = 0;
6915 if (open_a & OPEN4_SHARE_ACCESS_READ)
6916 fflags |= FREAD;
6917 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6918 fflags |= FWRITE;
6919 vn_open_upgrade(cs->vp, fflags);
6920 }
6921 sp->rs_open_access |= access;
6922 sp->rs_open_deny |= deny;
6923
6924 if (open_d & OPEN4_SHARE_DENY_READ)
6925 fp->rf_deny_read++;
6926 if (open_d & OPEN4_SHARE_DENY_WRITE)
6927 fp->rf_deny_write++;
6928 fp->rf_share_deny |= deny;
6929
6930 if (open_a & OPEN4_SHARE_ACCESS_READ)
6931 fp->rf_access_read++;
6932 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6933 fp->rf_access_write++;
6934 fp->rf_share_access |= access;
6935
6936 /*
6937 * Check for delegation here. if the deleg argument is not
6938 * DELEG_ANY, then this is a reclaim from a client and
6939 * we must honor the delegation requested. If necessary we can
6940 * set the recall flag.
6941 */
6942
6943 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6944
6945 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6946
6947 next_stateid(&sp->rs_stateid);
6948
6949 resp->stateid = sp->rs_stateid.stateid;
6950
6951 rfs4_dbe_unlock(fp->rf_dbe);
6952 rfs4_dbe_unlock(sp->rs_dbe);
6953
6954 if (dsp) {
6955 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6956 rfs4_deleg_state_rele(dsp);
6957 }
6958
6959 rfs4_file_rele(fp);
6960 rfs4_state_rele(sp);
6961
6962 resp->status = NFS4_OK;
6963 }
6964
6965 /*ARGSUSED*/
6966 static void
6967 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6968 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6969 {
6970 change_info4 *cinfo = &resp->cinfo;
6971 bitmap4 *attrset = &resp->attrset;
6972
6973 if (args->opentype == OPEN4_NOCREATE)
6974 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6975 req, cs, args->share_access, cinfo);
6976 else {
6977 /* inhibit delegation grants during exclusive create */
6978
6979 if (args->mode == EXCLUSIVE4)
6980 rfs4_disable_delegation();
6981
6982 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6983 oo->ro_client->rc_clientid);
6984 }
6985
6986 if (resp->status == NFS4_OK) {
6987
6988 /* cs->vp cs->fh now reference the desired file */
6989
6990 rfs4_do_open(cs, req, oo,
6991 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6992 args->share_access, args->share_deny, resp, 0);
6993
6994 /*
6995 * If rfs4_createfile set attrset, we must
6996 * clear this attrset before the response is copied.
6997 */
6998 if (resp->status != NFS4_OK && resp->attrset) {
6999 resp->attrset = 0;
7000 }
7001 }
7002 else
7003 *cs->statusp = resp->status;
7004
7005 if (args->mode == EXCLUSIVE4)
7006 rfs4_enable_delegation();
7007 }
7008
7009 /*ARGSUSED*/
7010 static void
7011 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7012 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7013 {
7014 change_info4 *cinfo = &resp->cinfo;
7015 vattr_t va;
7016 vtype_t v_type = cs->vp->v_type;
7017 int error = 0;
7018
7019 /* Verify that we have a regular file */
7020 if (v_type != VREG) {
7021 if (v_type == VDIR)
7022 resp->status = NFS4ERR_ISDIR;
7023 else if (v_type == VLNK)
7024 resp->status = NFS4ERR_SYMLINK;
7025 else
7026 resp->status = NFS4ERR_INVAL;
7027 return;
7028 }
7029
7030 va.va_mask = AT_MODE|AT_UID;
7031 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7032 if (error) {
7033 resp->status = puterrno4(error);
7034 return;
7035 }
7036
7037 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7038
7039 /*
7040 * Check if we have access to the file, Note the the file
7041 * could have originally been open UNCHECKED or GUARDED
7042 * with mode bits that will now fail, but there is nothing
7043 * we can really do about that except in the case that the
7044 * owner of the file is the one requesting the open.
7045 */
7046 if (crgetuid(cs->cr) != va.va_uid) {
7047 resp->status = check_open_access(args->share_access, cs, req);
7048 if (resp->status != NFS4_OK) {
7049 return;
7050 }
7051 }
7052
7053 /*
7054 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7055 */
7056 cinfo->before = 0;
7057 cinfo->after = 0;
7058 cinfo->atomic = FALSE;
7059
7060 rfs4_do_open(cs, req, oo,
7061 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7062 args->share_access, args->share_deny, resp, 0);
7063 }
7064
7065 static void
7066 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7067 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7068 {
7069 int error;
7070 nfsstat4 status;
7071 stateid4 stateid =
7072 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7073 rfs4_deleg_state_t *dsp;
7074
7075 /*
7076 * Find the state info from the stateid and confirm that the
7077 * file is delegated. If the state openowner is the same as
7078 * the supplied openowner we're done. If not, get the file
7079 * info from the found state info. Use that file info to
7080 * create the state for this lock owner. Note solaris doen't
7081 * really need the pathname to find the file. We may want to
7082 * lookup the pathname and make sure that the vp exist and
7083 * matches the vp in the file structure. However it is
7084 * possible that the pathname nolonger exists (local process
7085 * unlinks the file), so this may not be that useful.
7086 */
7087
7088 status = rfs4_get_deleg_state(&stateid, &dsp);
7089 if (status != NFS4_OK) {
7090 resp->status = status;
7091 return;
7092 }
7093
7094 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7095
7096 /*
7097 * New lock owner, create state. Since this was probably called
7098 * in response to a CB_RECALL we set deleg to DELEG_NONE
7099 */
7100
7101 ASSERT(cs->vp != NULL);
7102 VN_RELE(cs->vp);
7103 VN_HOLD(dsp->rds_finfo->rf_vp);
7104 cs->vp = dsp->rds_finfo->rf_vp;
7105
7106 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7107 rfs4_deleg_state_rele(dsp);
7108 *cs->statusp = resp->status = puterrno4(error);
7109 return;
7110 }
7111
7112 /* Mark progress for delegation returns */
7113 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7114 rfs4_deleg_state_rele(dsp);
7115 rfs4_do_open(cs, req, oo, DELEG_NONE,
7116 args->share_access, args->share_deny, resp, 1);
7117 }
7118
7119 /*ARGSUSED*/
7120 static void
7121 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7122 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7123 {
7124 /*
7125 * Lookup the pathname, it must already exist since this file
7126 * was delegated.
7127 *
7128 * Find the file and state info for this vp and open owner pair.
7129 * check that they are in fact delegated.
7130 * check that the state access and deny modes are the same.
7131 *
7132 * Return the delgation possibly seting the recall flag.
7133 */
7134 rfs4_file_t *fp;
7135 rfs4_state_t *sp;
7136 bool_t create = FALSE;
7137 bool_t dcreate = FALSE;
7138 rfs4_deleg_state_t *dsp;
7139 nfsace4 *ace;
7140
7141 /* Note we ignore oflags */
7142 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7143 req, cs, args->share_access, &resp->cinfo);
7144
7145 if (resp->status != NFS4_OK) {
7146 return;
7147 }
7148
7149 /* get the file struct and hold a lock on it during initial open */
7150 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7151 if (fp == NULL) {
7152 resp->status = NFS4ERR_RESOURCE;
7153 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7154 return;
7155 }
7156
7157 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7158 if (sp == NULL) {
7159 resp->status = NFS4ERR_SERVERFAULT;
7160 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7161 rw_exit(&fp->rf_file_rwlock);
7162 rfs4_file_rele(fp);
7163 return;
7164 }
7165
7166 rfs4_dbe_lock(sp->rs_dbe);
7167 rfs4_dbe_lock(fp->rf_dbe);
7168 if (args->share_access != sp->rs_share_access ||
7169 args->share_deny != sp->rs_share_deny ||
7170 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7171 NFS4_DEBUG(rfs4_debug,
7172 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7173 rfs4_dbe_unlock(fp->rf_dbe);
7174 rfs4_dbe_unlock(sp->rs_dbe);
7175 rfs4_file_rele(fp);
7176 rfs4_state_rele(sp);
7177 resp->status = NFS4ERR_SERVERFAULT;
7178 return;
7179 }
7180 rfs4_dbe_unlock(fp->rf_dbe);
7181 rfs4_dbe_unlock(sp->rs_dbe);
7182
7183 dsp = rfs4_finddeleg(sp, &dcreate);
7184 if (dsp == NULL) {
7185 rfs4_state_rele(sp);
7186 rfs4_file_rele(fp);
7187 resp->status = NFS4ERR_SERVERFAULT;
7188 return;
7189 }
7190
7191 next_stateid(&sp->rs_stateid);
7192
7193 resp->stateid = sp->rs_stateid.stateid;
7194
7195 resp->delegation.delegation_type = dsp->rds_dtype;
7196
7197 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7198 open_read_delegation4 *rv =
7199 &resp->delegation.open_delegation4_u.read;
7200
7201 rv->stateid = dsp->rds_delegid.stateid;
7202 rv->recall = FALSE; /* no policy in place to set to TRUE */
7203 ace = &rv->permissions;
7204 } else {
7205 open_write_delegation4 *rv =
7206 &resp->delegation.open_delegation4_u.write;
7207
7208 rv->stateid = dsp->rds_delegid.stateid;
7209 rv->recall = FALSE; /* no policy in place to set to TRUE */
7210 ace = &rv->permissions;
7211 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7212 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7213 }
7214
7215 /* XXX For now */
7216 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7217 ace->flag = 0;
7218 ace->access_mask = 0;
7219 ace->who.utf8string_len = 0;
7220 ace->who.utf8string_val = 0;
7221
7222 rfs4_deleg_state_rele(dsp);
7223 rfs4_state_rele(sp);
7224 rfs4_file_rele(fp);
7225 }
7226
7227 typedef enum {
7228 NFS4_CHKSEQ_OKAY = 0,
7229 NFS4_CHKSEQ_REPLAY = 1,
7230 NFS4_CHKSEQ_BAD = 2
7231 } rfs4_chkseq_t;
7232
7233 /*
7234 * Generic function for sequence number checks.
7235 */
7236 static rfs4_chkseq_t
7237 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7238 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7239 {
7240 /* Same sequence ids and matching operations? */
7241 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7242 if (copyres == TRUE) {
7243 rfs4_free_reply(resop);
7244 rfs4_copy_reply(resop, lastop);
7245 }
7246 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7247 "Replayed SEQID %d\n", seqid));
7248 return (NFS4_CHKSEQ_REPLAY);
7249 }
7250
7251 /* If the incoming sequence is not the next expected then it is bad */
7252 if (rqst_seq != seqid + 1) {
7253 if (rqst_seq == seqid) {
7254 NFS4_DEBUG(rfs4_debug,
7255 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7256 "but last op was %d current op is %d\n",
7257 lastop->resop, resop->resop));
7258 return (NFS4_CHKSEQ_BAD);
7259 }
7260 NFS4_DEBUG(rfs4_debug,
7261 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7262 rqst_seq, seqid));
7263 return (NFS4_CHKSEQ_BAD);
7264 }
7265
7266 /* Everything okay -- next expected */
7267 return (NFS4_CHKSEQ_OKAY);
7268 }
7269
7270
7271 static rfs4_chkseq_t
7272 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7273 {
7274 rfs4_chkseq_t rc;
7275
7276 rfs4_dbe_lock(op->ro_dbe);
7277 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7278 TRUE);
7279 rfs4_dbe_unlock(op->ro_dbe);
7280
7281 if (rc == NFS4_CHKSEQ_OKAY)
7282 rfs4_update_lease(op->ro_client);
7283
7284 return (rc);
7285 }
7286
7287 static rfs4_chkseq_t
7288 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7289 {
7290 rfs4_chkseq_t rc;
7291
7292 rfs4_dbe_lock(op->ro_dbe);
7293 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7294 olo_seqid, resop, FALSE);
7295 rfs4_dbe_unlock(op->ro_dbe);
7296
7297 return (rc);
7298 }
7299
7300 static rfs4_chkseq_t
7301 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7302 {
7303 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7304
7305 rfs4_dbe_lock(lsp->rls_dbe);
7306 if (!lsp->rls_skip_seqid_check)
7307 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7308 resop, TRUE);
7309 rfs4_dbe_unlock(lsp->rls_dbe);
7310
7311 return (rc);
7312 }
7313
7314 static void
7315 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7316 struct svc_req *req, struct compound_state *cs)
7317 {
7318 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7319 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7320 open_owner4 *owner = &args->owner;
7321 open_claim_type4 claim = args->claim;
7322 rfs4_client_t *cp;
7323 rfs4_openowner_t *oo;
7324 bool_t create;
7325 bool_t replay = FALSE;
7326 int can_reclaim;
7327
7328 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7329 OPEN4args *, args);
7330
7331 if (cs->vp == NULL) {
7332 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7333 goto end;
7334 }
7335
7336 /*
7337 * Need to check clientid and lease expiration first based on
7338 * error ordering and incrementing sequence id.
7339 */
7340 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7341 if (cp == NULL) {
7342 *cs->statusp = resp->status =
7343 rfs4_check_clientid(&owner->clientid, 0);
7344 goto end;
7345 }
7346
7347 if (rfs4_lease_expired(cp)) {
7348 rfs4_client_close(cp);
7349 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7350 goto end;
7351 }
7352 can_reclaim = cp->rc_can_reclaim;
7353
7354 /*
7355 * Find the open_owner for use from this point forward. Take
7356 * care in updating the sequence id based on the type of error
7357 * being returned.
7358 */
7359 retry:
7360 create = TRUE;
7361 oo = rfs4_findopenowner(owner, &create, args->seqid);
7362 if (oo == NULL) {
7363 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7364 rfs4_client_rele(cp);
7365 goto end;
7366 }
7367
7368 /* Hold off access to the sequence space while the open is done */
7369 rfs4_sw_enter(&oo->ro_sw);
7370
7371 /*
7372 * If the open_owner existed before at the server, then check
7373 * the sequence id.
7374 */
7375 if (!create && !oo->ro_postpone_confirm) {
7376 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7377 case NFS4_CHKSEQ_BAD:
7378 if ((args->seqid > oo->ro_open_seqid) &&
7379 oo->ro_need_confirm) {
7380 rfs4_free_opens(oo, TRUE, FALSE);
7381 rfs4_sw_exit(&oo->ro_sw);
7382 rfs4_openowner_rele(oo);
7383 goto retry;
7384 }
7385 resp->status = NFS4ERR_BAD_SEQID;
7386 goto out;
7387 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7388 replay = TRUE;
7389 goto out;
7390 default:
7391 break;
7392 }
7393
7394 /*
7395 * Sequence was ok and open owner exists
7396 * check to see if we have yet to see an
7397 * open_confirm.
7398 */
7399 if (oo->ro_need_confirm) {
7400 rfs4_free_opens(oo, TRUE, FALSE);
7401 rfs4_sw_exit(&oo->ro_sw);
7402 rfs4_openowner_rele(oo);
7403 goto retry;
7404 }
7405 }
7406 /* Grace only applies to regular-type OPENs */
7407 if (rfs4_clnt_in_grace(cp) &&
7408 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7409 *cs->statusp = resp->status = NFS4ERR_GRACE;
7410 goto out;
7411 }
7412
7413 /*
7414 * If previous state at the server existed then can_reclaim
7415 * will be set. If not reply NFS4ERR_NO_GRACE to the
7416 * client.
7417 */
7418 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7419 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7420 goto out;
7421 }
7422
7423
7424 /*
7425 * Reject the open if the client has missed the grace period
7426 */
7427 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7428 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7429 goto out;
7430 }
7431
7432 /* Couple of up-front bookkeeping items */
7433 if (oo->ro_need_confirm) {
7434 /*
7435 * If this is a reclaim OPEN then we should not ask
7436 * for a confirmation of the open_owner per the
7437 * protocol specification.
7438 */
7439 if (claim == CLAIM_PREVIOUS)
7440 oo->ro_need_confirm = FALSE;
7441 else
7442 resp->rflags |= OPEN4_RESULT_CONFIRM;
7443 }
7444 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7445
7446 /*
7447 * If there is an unshared filesystem mounted on this vnode,
7448 * do not allow to open/create in this directory.
7449 */
7450 if (vn_ismntpt(cs->vp)) {
7451 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7452 goto out;
7453 }
7454
7455 /*
7456 * access must READ, WRITE, or BOTH. No access is invalid.
7457 * deny can be READ, WRITE, BOTH, or NONE.
7458 * bits not defined for access/deny are invalid.
7459 */
7460 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7461 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7462 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7463 *cs->statusp = resp->status = NFS4ERR_INVAL;
7464 goto out;
7465 }
7466
7467
7468 /*
7469 * make sure attrset is zero before response is built.
7470 */
7471 resp->attrset = 0;
7472
7473 switch (claim) {
7474 case CLAIM_NULL:
7475 rfs4_do_opennull(cs, req, args, oo, resp);
7476 break;
7477 case CLAIM_PREVIOUS:
7478 rfs4_do_openprev(cs, req, args, oo, resp);
7479 break;
7480 case CLAIM_DELEGATE_CUR:
7481 rfs4_do_opendelcur(cs, req, args, oo, resp);
7482 break;
7483 case CLAIM_DELEGATE_PREV:
7484 rfs4_do_opendelprev(cs, req, args, oo, resp);
7485 break;
7486 default:
7487 resp->status = NFS4ERR_INVAL;
7488 break;
7489 }
7490
7491 out:
7492 rfs4_client_rele(cp);
7493
7494 /* Catch sequence id handling here to make it a little easier */
7495 switch (resp->status) {
7496 case NFS4ERR_BADXDR:
7497 case NFS4ERR_BAD_SEQID:
7498 case NFS4ERR_BAD_STATEID:
7499 case NFS4ERR_NOFILEHANDLE:
7500 case NFS4ERR_RESOURCE:
7501 case NFS4ERR_STALE_CLIENTID:
7502 case NFS4ERR_STALE_STATEID:
7503 /*
7504 * The protocol states that if any of these errors are
7505 * being returned, the sequence id should not be
7506 * incremented. Any other return requires an
7507 * increment.
7508 */
7509 break;
7510 default:
7511 /* Always update the lease in this case */
7512 rfs4_update_lease(oo->ro_client);
7513
7514 /* Regular response - copy the result */
7515 if (!replay)
7516 rfs4_update_open_resp(oo, resop, &cs->fh);
7517
7518 /*
7519 * REPLAY case: Only if the previous response was OK
7520 * do we copy the filehandle. If not OK, no
7521 * filehandle to copy.
7522 */
7523 if (replay == TRUE &&
7524 resp->status == NFS4_OK &&
7525 oo->ro_reply_fh.nfs_fh4_val) {
7526 /*
7527 * If this is a replay, we must restore the
7528 * current filehandle/vp to that of what was
7529 * returned originally. Try our best to do
7530 * it.
7531 */
7532 nfs_fh4_fmt_t *fh_fmtp =
7533 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7534
7535 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7536 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7537
7538 if (cs->exi == NULL) {
7539 resp->status = NFS4ERR_STALE;
7540 goto finish;
7541 }
7542
7543 VN_RELE(cs->vp);
7544
7545 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7546 &resp->status);
7547
7548 if (cs->vp == NULL)
7549 goto finish;
7550
7551 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7552 }
7553
7554 /*
7555 * If this was a replay, no need to update the
7556 * sequence id. If the open_owner was not created on
7557 * this pass, then update. The first use of an
7558 * open_owner will not bump the sequence id.
7559 */
7560 if (replay == FALSE && !create)
7561 rfs4_update_open_sequence(oo);
7562 /*
7563 * If the client is receiving an error and the
7564 * open_owner needs to be confirmed, there is no way
7565 * to notify the client of this fact ignoring the fact
7566 * that the server has no method of returning a
7567 * stateid to confirm. Therefore, the server needs to
7568 * mark this open_owner in a way as to avoid the
7569 * sequence id checking the next time the client uses
7570 * this open_owner.
7571 */
7572 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7573 oo->ro_postpone_confirm = TRUE;
7574 /*
7575 * If OK response then clear the postpone flag and
7576 * reset the sequence id to keep in sync with the
7577 * client.
7578 */
7579 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7580 oo->ro_postpone_confirm = FALSE;
7581 oo->ro_open_seqid = args->seqid;
7582 }
7583 break;
7584 }
7585
7586 finish:
7587 *cs->statusp = resp->status;
7588
7589 rfs4_sw_exit(&oo->ro_sw);
7590 rfs4_openowner_rele(oo);
7591
7592 end:
7593 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7594 OPEN4res *, resp);
7595 }
7596
7597 /*ARGSUSED*/
7598 void
7599 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7600 struct svc_req *req, struct compound_state *cs)
7601 {
7602 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7603 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7604 rfs4_state_t *sp;
7605 nfsstat4 status;
7606
7607 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7608 OPEN_CONFIRM4args *, args);
7609
7610 if (cs->vp == NULL) {
7611 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7612 goto out;
7613 }
7614
7615 if (cs->vp->v_type != VREG) {
7616 *cs->statusp = resp->status =
7617 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7618 return;
7619 }
7620
7621 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7622 if (status != NFS4_OK) {
7623 *cs->statusp = resp->status = status;
7624 goto out;
7625 }
7626
7627 /* Ensure specified filehandle matches */
7628 if (cs->vp != sp->rs_finfo->rf_vp) {
7629 rfs4_state_rele(sp);
7630 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7631 goto out;
7632 }
7633
7634 /* hold off other access to open_owner while we tinker */
7635 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7636
7637 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7638 case NFS4_CHECK_STATEID_OKAY:
7639 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7640 resop) != 0) {
7641 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7642 break;
7643 }
7644 /*
7645 * If it is the appropriate stateid and determined to
7646 * be "OKAY" then this means that the stateid does not
7647 * need to be confirmed and the client is in error for
7648 * sending an OPEN_CONFIRM.
7649 */
7650 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7651 break;
7652 case NFS4_CHECK_STATEID_OLD:
7653 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7654 break;
7655 case NFS4_CHECK_STATEID_BAD:
7656 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7657 break;
7658 case NFS4_CHECK_STATEID_EXPIRED:
7659 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7660 break;
7661 case NFS4_CHECK_STATEID_CLOSED:
7662 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7663 break;
7664 case NFS4_CHECK_STATEID_REPLAY:
7665 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7666 resop)) {
7667 case NFS4_CHKSEQ_OKAY:
7668 /*
7669 * This is replayed stateid; if seqid matches
7670 * next expected, then client is using wrong seqid.
7671 */
7672 /* fall through */
7673 case NFS4_CHKSEQ_BAD:
7674 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7675 break;
7676 case NFS4_CHKSEQ_REPLAY:
7677 /*
7678 * Note this case is the duplicate case so
7679 * resp->status is already set.
7680 */
7681 *cs->statusp = resp->status;
7682 rfs4_update_lease(sp->rs_owner->ro_client);
7683 break;
7684 }
7685 break;
7686 case NFS4_CHECK_STATEID_UNCONFIRMED:
7687 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7688 resop) != NFS4_CHKSEQ_OKAY) {
7689 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7690 break;
7691 }
7692 *cs->statusp = resp->status = NFS4_OK;
7693
7694 next_stateid(&sp->rs_stateid);
7695 resp->open_stateid = sp->rs_stateid.stateid;
7696 sp->rs_owner->ro_need_confirm = FALSE;
7697 rfs4_update_lease(sp->rs_owner->ro_client);
7698 rfs4_update_open_sequence(sp->rs_owner);
7699 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7700 break;
7701 default:
7702 ASSERT(FALSE);
7703 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7704 break;
7705 }
7706 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7707 rfs4_state_rele(sp);
7708
7709 out:
7710 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7711 OPEN_CONFIRM4res *, resp);
7712 }
7713
7714 /*ARGSUSED*/
7715 void
7716 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7717 struct svc_req *req, struct compound_state *cs)
7718 {
7719 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7720 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7721 uint32_t access = args->share_access;
7722 uint32_t deny = args->share_deny;
7723 nfsstat4 status;
7724 rfs4_state_t *sp;
7725 rfs4_file_t *fp;
7726 int fflags = 0;
7727
7728 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7729 OPEN_DOWNGRADE4args *, args);
7730
7731 if (cs->vp == NULL) {
7732 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7733 goto out;
7734 }
7735
7736 if (cs->vp->v_type != VREG) {
7737 *cs->statusp = resp->status = NFS4ERR_INVAL;
7738 return;
7739 }
7740
7741 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7742 if (status != NFS4_OK) {
7743 *cs->statusp = resp->status = status;
7744 goto out;
7745 }
7746
7747 /* Ensure specified filehandle matches */
7748 if (cs->vp != sp->rs_finfo->rf_vp) {
7749 rfs4_state_rele(sp);
7750 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7751 goto out;
7752 }
7753
7754 /* hold off other access to open_owner while we tinker */
7755 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7756
7757 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7758 case NFS4_CHECK_STATEID_OKAY:
7759 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7760 resop) != NFS4_CHKSEQ_OKAY) {
7761 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7762 goto end;
7763 }
7764 break;
7765 case NFS4_CHECK_STATEID_OLD:
7766 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7767 goto end;
7768 case NFS4_CHECK_STATEID_BAD:
7769 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7770 goto end;
7771 case NFS4_CHECK_STATEID_EXPIRED:
7772 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7773 goto end;
7774 case NFS4_CHECK_STATEID_CLOSED:
7775 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7776 goto end;
7777 case NFS4_CHECK_STATEID_UNCONFIRMED:
7778 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7779 goto end;
7780 case NFS4_CHECK_STATEID_REPLAY:
7781 /* Check the sequence id for the open owner */
7782 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7783 resop)) {
7784 case NFS4_CHKSEQ_OKAY:
7785 /*
7786 * This is replayed stateid; if seqid matches
7787 * next expected, then client is using wrong seqid.
7788 */
7789 /* fall through */
7790 case NFS4_CHKSEQ_BAD:
7791 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7792 goto end;
7793 case NFS4_CHKSEQ_REPLAY:
7794 /*
7795 * Note this case is the duplicate case so
7796 * resp->status is already set.
7797 */
7798 *cs->statusp = resp->status;
7799 rfs4_update_lease(sp->rs_owner->ro_client);
7800 goto end;
7801 }
7802 break;
7803 default:
7804 ASSERT(FALSE);
7805 break;
7806 }
7807
7808 rfs4_dbe_lock(sp->rs_dbe);
7809 /*
7810 * Check that the new access modes and deny modes are valid.
7811 * Check that no invalid bits are set.
7812 */
7813 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7814 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7815 *cs->statusp = resp->status = NFS4ERR_INVAL;
7816 rfs4_update_open_sequence(sp->rs_owner);
7817 rfs4_dbe_unlock(sp->rs_dbe);
7818 goto end;
7819 }
7820
7821 /*
7822 * The new modes must be a subset of the current modes and
7823 * the access must specify at least one mode. To test that
7824 * the new mode is a subset of the current modes we bitwise
7825 * AND them together and check that the result equals the new
7826 * mode. For example:
7827 * New mode, access == R and current mode, sp->rs_open_access == RW
7828 * access & sp->rs_open_access == R == access, so the new access mode
7829 * is valid. Consider access == RW, sp->rs_open_access = R
7830 * access & sp->rs_open_access == R != access, so the new access mode
7831 * is invalid.
7832 */
7833 if ((access & sp->rs_open_access) != access ||
7834 (deny & sp->rs_open_deny) != deny ||
7835 (access &
7836 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7837 *cs->statusp = resp->status = NFS4ERR_INVAL;
7838 rfs4_update_open_sequence(sp->rs_owner);
7839 rfs4_dbe_unlock(sp->rs_dbe);
7840 goto end;
7841 }
7842
7843 /*
7844 * Release any share locks associated with this stateID.
7845 * Strictly speaking, this violates the spec because the
7846 * spec effectively requires that open downgrade be atomic.
7847 * At present, fs_shrlock does not have this capability.
7848 */
7849 (void) rfs4_unshare(sp);
7850
7851 status = rfs4_share(sp, access, deny);
7852 if (status != NFS4_OK) {
7853 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7854 rfs4_update_open_sequence(sp->rs_owner);
7855 rfs4_dbe_unlock(sp->rs_dbe);
7856 goto end;
7857 }
7858
7859 fp = sp->rs_finfo;
7860 rfs4_dbe_lock(fp->rf_dbe);
7861
7862 /*
7863 * If the current mode has deny read and the new mode
7864 * does not, decrement the number of deny read mode bits
7865 * and if it goes to zero turn off the deny read bit
7866 * on the file.
7867 */
7868 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7869 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7870 fp->rf_deny_read--;
7871 if (fp->rf_deny_read == 0)
7872 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7873 }
7874
7875 /*
7876 * If the current mode has deny write and the new mode
7877 * does not, decrement the number of deny write mode bits
7878 * and if it goes to zero turn off the deny write bit
7879 * on the file.
7880 */
7881 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7882 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7883 fp->rf_deny_write--;
7884 if (fp->rf_deny_write == 0)
7885 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7886 }
7887
7888 /*
7889 * If the current mode has access read and the new mode
7890 * does not, decrement the number of access read mode bits
7891 * and if it goes to zero turn off the access read bit
7892 * on the file. set fflags to FREAD for the call to
7893 * vn_open_downgrade().
7894 */
7895 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7896 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7897 fp->rf_access_read--;
7898 if (fp->rf_access_read == 0)
7899 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7900 fflags |= FREAD;
7901 }
7902
7903 /*
7904 * If the current mode has access write and the new mode
7905 * does not, decrement the number of access write mode bits
7906 * and if it goes to zero turn off the access write bit
7907 * on the file. set fflags to FWRITE for the call to
7908 * vn_open_downgrade().
7909 */
7910 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7911 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7912 fp->rf_access_write--;
7913 if (fp->rf_access_write == 0)
7914 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7915 fflags |= FWRITE;
7916 }
7917
7918 /* Check that the file is still accessible */
7919 ASSERT(fp->rf_share_access);
7920
7921 rfs4_dbe_unlock(fp->rf_dbe);
7922
7923 /* now set the new open access and deny modes */
7924 sp->rs_open_access = access;
7925 sp->rs_open_deny = deny;
7926
7927 /*
7928 * we successfully downgraded the share lock, now we need to downgrade
7929 * the open. it is possible that the downgrade was only for a deny
7930 * mode and we have nothing else to do.
7931 */
7932 if ((fflags & (FREAD|FWRITE)) != 0)
7933 vn_open_downgrade(cs->vp, fflags);
7934
7935 /* Update the stateid */
7936 next_stateid(&sp->rs_stateid);
7937 resp->open_stateid = sp->rs_stateid.stateid;
7938
7939 rfs4_dbe_unlock(sp->rs_dbe);
7940
7941 *cs->statusp = resp->status = NFS4_OK;
7942 /* Update the lease */
7943 rfs4_update_lease(sp->rs_owner->ro_client);
7944 /* And the sequence */
7945 rfs4_update_open_sequence(sp->rs_owner);
7946 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7947
7948 end:
7949 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7950 rfs4_state_rele(sp);
7951 out:
7952 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7953 OPEN_DOWNGRADE4res *, resp);
7954 }
7955
7956 static void *
7957 memstr(const void *s1, const char *s2, size_t n)
7958 {
7959 size_t l = strlen(s2);
7960 char *p = (char *)s1;
7961
7962 while (n >= l) {
7963 if (bcmp(p, s2, l) == 0)
7964 return (p);
7965 p++;
7966 n--;
7967 }
7968
7969 return (NULL);
7970 }
7971
7972 /*
7973 * The logic behind this function is detailed in the NFSv4 RFC in the
7974 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7975 * that section for explicit guidance to server behavior for
7976 * SETCLIENTID.
7977 */
7978 void
7979 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7980 struct svc_req *req, struct compound_state *cs)
7981 {
7982 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7983 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7984 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7985 rfs4_clntip_t *ci;
7986 bool_t create;
7987 char *addr, *netid;
7988 int len;
7989
7990 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7991 SETCLIENTID4args *, args);
7992 retry:
7993 newcp = cp_confirmed = cp_unconfirmed = NULL;
7994
7995 /*
7996 * Save the caller's IP address
7997 */
7998 args->client.cl_addr =
7999 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8000
8001 /*
8002 * Record if it is a Solaris client that cannot handle referrals.
8003 */
8004 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8005 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8006 /* Add a "yes, it's downrev" record */
8007 create = TRUE;
8008 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8009 ASSERT(ci != NULL);
8010 rfs4_dbe_rele(ci->ri_dbe);
8011 } else {
8012 /* Remove any previous record */
8013 rfs4_invalidate_clntip(args->client.cl_addr);
8014 }
8015
8016 /*
8017 * In search of an EXISTING client matching the incoming
8018 * request to establish a new client identifier at the server
8019 */
8020 create = TRUE;
8021 cp = rfs4_findclient(&args->client, &create, NULL);
8022
8023 /* Should never happen */
8024 ASSERT(cp != NULL);
8025
8026 if (cp == NULL) {
8027 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8028 goto out;
8029 }
8030
8031 /*
8032 * Easiest case. Client identifier is newly created and is
8033 * unconfirmed. Also note that for this case, no other
8034 * entries exist for the client identifier. Nothing else to
8035 * check. Just setup the response and respond.
8036 */
8037 if (create) {
8038 *cs->statusp = res->status = NFS4_OK;
8039 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8040 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8041 cp->rc_confirm_verf;
8042 /* Setup callback information; CB_NULL confirmation later */
8043 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8044
8045 rfs4_client_rele(cp);
8046 goto out;
8047 }
8048
8049 /*
8050 * An existing, confirmed client may exist but it may not have
8051 * been active for at least one lease period. If so, then
8052 * "close" the client and create a new client identifier
8053 */
8054 if (rfs4_lease_expired(cp)) {
8055 rfs4_client_close(cp);
8056 goto retry;
8057 }
8058
8059 if (cp->rc_need_confirm == TRUE)
8060 cp_unconfirmed = cp;
8061 else
8062 cp_confirmed = cp;
8063
8064 cp = NULL;
8065
8066 /*
8067 * We have a confirmed client, now check for an
8068 * unconfimred entry
8069 */
8070 if (cp_confirmed) {
8071 /* If creds don't match then client identifier is inuse */
8072 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8073 rfs4_cbinfo_t *cbp;
8074 /*
8075 * Some one else has established this client
8076 * id. Try and say * who they are. We will use
8077 * the call back address supplied by * the
8078 * first client.
8079 */
8080 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8081
8082 addr = netid = NULL;
8083
8084 cbp = &cp_confirmed->rc_cbinfo;
8085 if (cbp->cb_callback.cb_location.r_addr &&
8086 cbp->cb_callback.cb_location.r_netid) {
8087 cb_client4 *cbcp = &cbp->cb_callback;
8088
8089 len = strlen(cbcp->cb_location.r_addr)+1;
8090 addr = kmem_alloc(len, KM_SLEEP);
8091 bcopy(cbcp->cb_location.r_addr, addr, len);
8092 len = strlen(cbcp->cb_location.r_netid)+1;
8093 netid = kmem_alloc(len, KM_SLEEP);
8094 bcopy(cbcp->cb_location.r_netid, netid, len);
8095 }
8096
8097 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8098 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8099
8100 rfs4_client_rele(cp_confirmed);
8101 }
8102
8103 /*
8104 * Confirmed, creds match, and verifier matches; must
8105 * be an update of the callback info
8106 */
8107 if (cp_confirmed->rc_nfs_client.verifier ==
8108 args->client.verifier) {
8109 /* Setup callback information */
8110 rfs4_client_setcb(cp_confirmed, &args->callback,
8111 args->callback_ident);
8112
8113 /* everything okay -- move ahead */
8114 *cs->statusp = res->status = NFS4_OK;
8115 res->SETCLIENTID4res_u.resok4.clientid =
8116 cp_confirmed->rc_clientid;
8117
8118 /* update the confirm_verifier and return it */
8119 rfs4_client_scv_next(cp_confirmed);
8120 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8121 cp_confirmed->rc_confirm_verf;
8122
8123 rfs4_client_rele(cp_confirmed);
8124 goto out;
8125 }
8126
8127 /*
8128 * Creds match but the verifier doesn't. Must search
8129 * for an unconfirmed client that would be replaced by
8130 * this request.
8131 */
8132 create = FALSE;
8133 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8134 cp_confirmed);
8135 }
8136
8137 /*
8138 * At this point, we have taken care of the brand new client
8139 * struct, INUSE case, update of an existing, and confirmed
8140 * client struct.
8141 */
8142
8143 /*
8144 * check to see if things have changed while we originally
8145 * picked up the client struct. If they have, then return and
8146 * retry the processing of this SETCLIENTID request.
8147 */
8148 if (cp_unconfirmed) {
8149 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8150 if (!cp_unconfirmed->rc_need_confirm) {
8151 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8152 rfs4_client_rele(cp_unconfirmed);
8153 if (cp_confirmed)
8154 rfs4_client_rele(cp_confirmed);
8155 goto retry;
8156 }
8157 /* do away with the old unconfirmed one */
8158 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8159 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8160 rfs4_client_rele(cp_unconfirmed);
8161 cp_unconfirmed = NULL;
8162 }
8163
8164 /*
8165 * This search will temporarily hide the confirmed client
8166 * struct while a new client struct is created as the
8167 * unconfirmed one.
8168 */
8169 create = TRUE;
8170 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8171
8172 ASSERT(newcp != NULL);
8173
8174 if (newcp == NULL) {
8175 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8176 rfs4_client_rele(cp_confirmed);
8177 goto out;
8178 }
8179
8180 /*
8181 * If one was not created, then a similar request must be in
8182 * process so release and start over with this one
8183 */
8184 if (create != TRUE) {
8185 rfs4_client_rele(newcp);
8186 if (cp_confirmed)
8187 rfs4_client_rele(cp_confirmed);
8188 goto retry;
8189 }
8190
8191 *cs->statusp = res->status = NFS4_OK;
8192 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8193 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8194 newcp->rc_confirm_verf;
8195 /* Setup callback information; CB_NULL confirmation later */
8196 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8197
8198 newcp->rc_cp_confirmed = cp_confirmed;
8199
8200 rfs4_client_rele(newcp);
8201
8202 out:
8203 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8204 SETCLIENTID4res *, res);
8205 }
8206
8207 /*ARGSUSED*/
8208 void
8209 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8210 struct svc_req *req, struct compound_state *cs)
8211 {
8212 SETCLIENTID_CONFIRM4args *args =
8213 &argop->nfs_argop4_u.opsetclientid_confirm;
8214 SETCLIENTID_CONFIRM4res *res =
8215 &resop->nfs_resop4_u.opsetclientid_confirm;
8216 rfs4_client_t *cp, *cptoclose = NULL;
8217 nfs4_srv_t *nsrv4;
8218
8219 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8220 struct compound_state *, cs,
8221 SETCLIENTID_CONFIRM4args *, args);
8222
8223 nsrv4 = nfs4_get_srv();
8224 *cs->statusp = res->status = NFS4_OK;
8225
8226 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8227
8228 if (cp == NULL) {
8229 *cs->statusp = res->status =
8230 rfs4_check_clientid(&args->clientid, 1);
8231 goto out;
8232 }
8233
8234 if (!creds_ok(cp, req, cs)) {
8235 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8236 rfs4_client_rele(cp);
8237 goto out;
8238 }
8239
8240 /* If the verifier doesn't match, the record doesn't match */
8241 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8242 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8243 rfs4_client_rele(cp);
8244 goto out;
8245 }
8246
8247 rfs4_dbe_lock(cp->rc_dbe);
8248 cp->rc_need_confirm = FALSE;
8249 if (cp->rc_cp_confirmed) {
8250 cptoclose = cp->rc_cp_confirmed;
8251 cptoclose->rc_ss_remove = 1;
8252 cp->rc_cp_confirmed = NULL;
8253 }
8254
8255 /*
8256 * Update the client's associated server instance, if it's changed
8257 * since the client was created.
8258 */
8259 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8260 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8261
8262 /*
8263 * Record clientid in stable storage.
8264 * Must be done after server instance has been assigned.
8265 */
8266 rfs4_ss_clid(nsrv4, cp);
8267
8268 rfs4_dbe_unlock(cp->rc_dbe);
8269
8270 if (cptoclose)
8271 /* don't need to rele, client_close does it */
8272 rfs4_client_close(cptoclose);
8273
8274 /* If needed, initiate CB_NULL call for callback path */
8275 rfs4_deleg_cb_check(cp);
8276 rfs4_update_lease(cp);
8277
8278 /*
8279 * Check to see if client can perform reclaims
8280 */
8281 rfs4_ss_chkclid(nsrv4, cp);
8282
8283 rfs4_client_rele(cp);
8284
8285 out:
8286 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8287 struct compound_state *, cs,
8288 SETCLIENTID_CONFIRM4 *, res);
8289 }
8290
8291
8292 /*ARGSUSED*/
8293 void
8294 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8295 struct svc_req *req, struct compound_state *cs)
8296 {
8297 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8298 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8299 rfs4_state_t *sp;
8300 nfsstat4 status;
8301
8302 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8303 CLOSE4args *, args);
8304
8305 if (cs->vp == NULL) {
8306 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8307 goto out;
8308 }
8309
8310 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8311 if (status != NFS4_OK) {
8312 *cs->statusp = resp->status = status;
8313 goto out;
8314 }
8315
8316 /* Ensure specified filehandle matches */
8317 if (cs->vp != sp->rs_finfo->rf_vp) {
8318 rfs4_state_rele(sp);
8319 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8320 goto out;
8321 }
8322
8323 /* hold off other access to open_owner while we tinker */
8324 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8325
8326 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8327 case NFS4_CHECK_STATEID_OKAY:
8328 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8329 resop) != NFS4_CHKSEQ_OKAY) {
8330 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8331 goto end;
8332 }
8333 break;
8334 case NFS4_CHECK_STATEID_OLD:
8335 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8336 goto end;
8337 case NFS4_CHECK_STATEID_BAD:
8338 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8339 goto end;
8340 case NFS4_CHECK_STATEID_EXPIRED:
8341 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8342 goto end;
8343 case NFS4_CHECK_STATEID_CLOSED:
8344 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8345 goto end;
8346 case NFS4_CHECK_STATEID_UNCONFIRMED:
8347 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8348 goto end;
8349 case NFS4_CHECK_STATEID_REPLAY:
8350 /* Check the sequence id for the open owner */
8351 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8352 resop)) {
8353 case NFS4_CHKSEQ_OKAY:
8354 /*
8355 * This is replayed stateid; if seqid matches
8356 * next expected, then client is using wrong seqid.
8357 */
8358 /* FALL THROUGH */
8359 case NFS4_CHKSEQ_BAD:
8360 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8361 goto end;
8362 case NFS4_CHKSEQ_REPLAY:
8363 /*
8364 * Note this case is the duplicate case so
8365 * resp->status is already set.
8366 */
8367 *cs->statusp = resp->status;
8368 rfs4_update_lease(sp->rs_owner->ro_client);
8369 goto end;
8370 }
8371 break;
8372 default:
8373 ASSERT(FALSE);
8374 break;
8375 }
8376
8377 rfs4_dbe_lock(sp->rs_dbe);
8378
8379 /* Update the stateid. */
8380 next_stateid(&sp->rs_stateid);
8381 resp->open_stateid = sp->rs_stateid.stateid;
8382
8383 rfs4_dbe_unlock(sp->rs_dbe);
8384
8385 rfs4_update_lease(sp->rs_owner->ro_client);
8386 rfs4_update_open_sequence(sp->rs_owner);
8387 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8388
8389 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8390
8391 *cs->statusp = resp->status = status;
8392
8393 end:
8394 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8395 rfs4_state_rele(sp);
8396 out:
8397 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8398 CLOSE4res *, resp);
8399 }
8400
8401 /*
8402 * Manage the counts on the file struct and close all file locks
8403 */
8404 /*ARGSUSED*/
8405 void
8406 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8407 bool_t close_of_client)
8408 {
8409 rfs4_file_t *fp = sp->rs_finfo;
8410 rfs4_lo_state_t *lsp;
8411 int fflags = 0;
8412
8413 /*
8414 * If this call is part of the larger closing down of client
8415 * state then it is just easier to release all locks
8416 * associated with this client instead of going through each
8417 * individual file and cleaning locks there.
8418 */
8419 if (close_of_client) {
8420 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8421 !list_is_empty(&sp->rs_lostatelist) &&
8422 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8423 /* Is the PxFS kernel module loaded? */
8424 if (lm_remove_file_locks != NULL) {
8425 int new_sysid;
8426
8427 /* Encode the cluster nodeid in new sysid */
8428 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8429 lm_set_nlmid_flk(&new_sysid);
8430
8431 /*
8432 * This PxFS routine removes file locks for a
8433 * client over all nodes of a cluster.
8434 */
8435 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8436 "lm_remove_file_locks(sysid=0x%x)\n",
8437 new_sysid));
8438 (*lm_remove_file_locks)(new_sysid);
8439 } else {
8440 struct flock64 flk;
8441
8442 /* Release all locks for this client */
8443 flk.l_type = F_UNLKSYS;
8444 flk.l_whence = 0;
8445 flk.l_start = 0;
8446 flk.l_len = 0;
8447 flk.l_sysid =
8448 sp->rs_owner->ro_client->rc_sysidt;
8449 flk.l_pid = 0;
8450 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8451 &flk, F_REMOTELOCK | FREAD | FWRITE,
8452 (u_offset_t)0, NULL, CRED(), NULL);
8453 }
8454
8455 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8456 }
8457 }
8458
8459 /*
8460 * Release all locks on this file by this lock owner or at
8461 * least mark the locks as having been released
8462 */
8463 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8464 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8465 lsp->rls_locks_cleaned = TRUE;
8466
8467 /* Was this already taken care of above? */
8468 if (!close_of_client &&
8469 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8470 (void) cleanlocks(sp->rs_finfo->rf_vp,
8471 lsp->rls_locker->rl_pid,
8472 lsp->rls_locker->rl_client->rc_sysidt);
8473 }
8474
8475 /*
8476 * Release any shrlocks associated with this open state ID.
8477 * This must be done before the rfs4_state gets marked closed.
8478 */
8479 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8480 (void) rfs4_unshare(sp);
8481
8482 if (sp->rs_open_access) {
8483 rfs4_dbe_lock(fp->rf_dbe);
8484
8485 /*
8486 * Decrement the count for each access and deny bit that this
8487 * state has contributed to the file.
8488 * If the file counts go to zero
8489 * clear the appropriate bit in the appropriate mask.
8490 */
8491 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8492 fp->rf_access_read--;
8493 fflags |= FREAD;
8494 if (fp->rf_access_read == 0)
8495 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8496 }
8497 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8498 fp->rf_access_write--;
8499 fflags |= FWRITE;
8500 if (fp->rf_access_write == 0)
8501 fp->rf_share_access &=
8502 ~OPEN4_SHARE_ACCESS_WRITE;
8503 }
8504 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8505 fp->rf_deny_read--;
8506 if (fp->rf_deny_read == 0)
8507 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8508 }
8509 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8510 fp->rf_deny_write--;
8511 if (fp->rf_deny_write == 0)
8512 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8513 }
8514
8515 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8516
8517 rfs4_dbe_unlock(fp->rf_dbe);
8518
8519 sp->rs_open_access = 0;
8520 sp->rs_open_deny = 0;
8521 }
8522 }
8523
8524 /*
8525 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8526 */
8527 static nfsstat4
8528 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8529 {
8530 rfs4_lockowner_t *lo;
8531 rfs4_client_t *cp;
8532 uint32_t len;
8533
8534 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8535 if (lo != NULL) {
8536 cp = lo->rl_client;
8537 if (rfs4_lease_expired(cp)) {
8538 rfs4_lockowner_rele(lo);
8539 rfs4_dbe_hold(cp->rc_dbe);
8540 rfs4_client_close(cp);
8541 return (NFS4ERR_EXPIRED);
8542 }
8543 dp->owner.clientid = lo->rl_owner.clientid;
8544 len = lo->rl_owner.owner_len;
8545 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8546 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8547 dp->owner.owner_len = len;
8548 rfs4_lockowner_rele(lo);
8549 goto finish;
8550 }
8551
8552 /*
8553 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8554 * of the client id contain the boot time for a NFS4 lock. So we
8555 * fabricate and identity by setting clientid to the sysid, and
8556 * the lock owner to the pid.
8557 */
8558 dp->owner.clientid = flk->l_sysid;
8559 len = sizeof (pid_t);
8560 dp->owner.owner_len = len;
8561 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8562 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8563 finish:
8564 dp->offset = flk->l_start;
8565 dp->length = flk->l_len;
8566
8567 if (flk->l_type == F_RDLCK)
8568 dp->locktype = READ_LT;
8569 else if (flk->l_type == F_WRLCK)
8570 dp->locktype = WRITE_LT;
8571 else
8572 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8573
8574 return (NFS4_OK);
8575 }
8576
8577 /*
8578 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8579 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8580 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8581 * for that (obviously); they are sending the LOCK requests with some delays
8582 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8583 * locking and delay implementation at the client side.
8584 *
8585 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8586 * fast retries on its own (the for loop below) in a hope the lock will be
8587 * available soon. And if not, the client won't need to resend the LOCK
8588 * requests so fast to check the lock availability. This basically saves some
8589 * network traffic and tries to make sure the client gets the lock ASAP.
8590 */
8591 static int
8592 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8593 {
8594 int error;
8595 struct flock64 flk;
8596 int i;
8597 clock_t delaytime;
8598 int cmd;
8599 int spin_cnt = 0;
8600
8601 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8602 retry:
8603 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8604
8605 for (i = 0; i < rfs4_maxlock_tries; i++) {
8606 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8607 error = VOP_FRLOCK(vp, cmd,
8608 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8609
8610 if (error != EAGAIN && error != EACCES)
8611 break;
8612
8613 if (i < rfs4_maxlock_tries - 1) {
8614 delay(delaytime);
8615 delaytime *= 2;
8616 }
8617 }
8618
8619 if (error == EAGAIN || error == EACCES) {
8620 /* Get the owner of the lock */
8621 flk = *flock;
8622 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8623 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8624 NULL) == 0) {
8625 /*
8626 * There's a race inherent in the current VOP_FRLOCK
8627 * design where:
8628 * a: "other guy" takes a lock that conflicts with a
8629 * lock we want
8630 * b: we attempt to take our lock (non-blocking) and
8631 * the attempt fails.
8632 * c: "other guy" releases the conflicting lock
8633 * d: we ask what lock conflicts with the lock we want,
8634 * getting F_UNLCK (no lock blocks us)
8635 *
8636 * If we retry the non-blocking lock attempt in this
8637 * case (restart at step 'b') there's some possibility
8638 * that many such attempts might fail. However a test
8639 * designed to actually provoke this race shows that
8640 * the vast majority of cases require no retry, and
8641 * only a few took as many as three retries. Here's
8642 * the test outcome:
8643 *
8644 * number of retries how many times we needed
8645 * that many retries
8646 * 0 79461
8647 * 1 862
8648 * 2 49
8649 * 3 5
8650 *
8651 * Given those empirical results, we arbitrarily limit
8652 * the retry count to ten.
8653 *
8654 * If we actually make to ten retries and give up,
8655 * nothing catastrophic happens, but we're unable to
8656 * return the information about the conflicting lock to
8657 * the NFS client. That's an acceptable trade off vs.
8658 * letting this retry loop run forever.
8659 */
8660 if (flk.l_type == F_UNLCK) {
8661 if (spin_cnt++ < 10) {
8662 /* No longer locked, retry */
8663 goto retry;
8664 }
8665 } else {
8666 *flock = flk;
8667 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8668 F_GETLK, &flk);
8669 }
8670 }
8671 }
8672
8673 return (error);
8674 }
8675
8676 /*ARGSUSED*/
8677 static nfsstat4
8678 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8679 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8680 {
8681 nfsstat4 status;
8682 rfs4_lockowner_t *lo = lsp->rls_locker;
8683 rfs4_state_t *sp = lsp->rls_state;
8684 struct flock64 flock;
8685 int16_t ltype;
8686 int flag;
8687 int error;
8688 sysid_t sysid;
8689 LOCK4res *lres;
8690 vnode_t *vp;
8691
8692 if (rfs4_lease_expired(lo->rl_client)) {
8693 return (NFS4ERR_EXPIRED);
8694 }
8695
8696 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8697 return (status);
8698
8699 /* Check for zero length. To lock to end of file use all ones for V4 */
8700 if (length == 0)
8701 return (NFS4ERR_INVAL);
8702 else if (length == (length4)(~0))
8703 length = 0; /* Posix to end of file */
8704
8705 retry:
8706 rfs4_dbe_lock(sp->rs_dbe);
8707 if (sp->rs_closed == TRUE) {
8708 rfs4_dbe_unlock(sp->rs_dbe);
8709 return (NFS4ERR_OLD_STATEID);
8710 }
8711
8712 if (resop->resop != OP_LOCKU) {
8713 switch (locktype) {
8714 case READ_LT:
8715 case READW_LT:
8716 if ((sp->rs_share_access
8717 & OPEN4_SHARE_ACCESS_READ) == 0) {
8718 rfs4_dbe_unlock(sp->rs_dbe);
8719
8720 return (NFS4ERR_OPENMODE);
8721 }
8722 ltype = F_RDLCK;
8723 break;
8724 case WRITE_LT:
8725 case WRITEW_LT:
8726 if ((sp->rs_share_access
8727 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8728 rfs4_dbe_unlock(sp->rs_dbe);
8729
8730 return (NFS4ERR_OPENMODE);
8731 }
8732 ltype = F_WRLCK;
8733 break;
8734 }
8735 } else
8736 ltype = F_UNLCK;
8737
8738 flock.l_type = ltype;
8739 flock.l_whence = 0; /* SEEK_SET */
8740 flock.l_start = offset;
8741 flock.l_len = length;
8742 flock.l_sysid = sysid;
8743 flock.l_pid = lsp->rls_locker->rl_pid;
8744
8745 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8746 if (flock.l_len < 0 || flock.l_start < 0) {
8747 rfs4_dbe_unlock(sp->rs_dbe);
8748 return (NFS4ERR_INVAL);
8749 }
8750
8751 /*
8752 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8753 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8754 */
8755 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8756
8757 vp = sp->rs_finfo->rf_vp;
8758 VN_HOLD(vp);
8759
8760 /*
8761 * We need to unlock sp before we call the underlying filesystem to
8762 * acquire the file lock.
8763 */
8764 rfs4_dbe_unlock(sp->rs_dbe);
8765
8766 error = setlock(vp, &flock, flag, cred);
8767
8768 /*
8769 * Make sure the file is still open. In a case the file was closed in
8770 * the meantime, clean the lock we acquired using the setlock() call
8771 * above, and return the appropriate error.
8772 */
8773 rfs4_dbe_lock(sp->rs_dbe);
8774 if (sp->rs_closed == TRUE) {
8775 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8776 rfs4_dbe_unlock(sp->rs_dbe);
8777
8778 VN_RELE(vp);
8779
8780 return (NFS4ERR_OLD_STATEID);
8781 }
8782 rfs4_dbe_unlock(sp->rs_dbe);
8783
8784 VN_RELE(vp);
8785
8786 if (error == 0) {
8787 rfs4_dbe_lock(lsp->rls_dbe);
8788 next_stateid(&lsp->rls_lockid);
8789 rfs4_dbe_unlock(lsp->rls_dbe);
8790 }
8791
8792 /*
8793 * N.B. We map error values to nfsv4 errors. This is differrent
8794 * than puterrno4 routine.
8795 */
8796 switch (error) {
8797 case 0:
8798 status = NFS4_OK;
8799 break;
8800 case EAGAIN:
8801 case EACCES: /* Old value */
8802 /* Can only get here if op is OP_LOCK */
8803 ASSERT(resop->resop == OP_LOCK);
8804 lres = &resop->nfs_resop4_u.oplock;
8805 status = NFS4ERR_DENIED;
8806 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8807 == NFS4ERR_EXPIRED)
8808 goto retry;
8809 break;
8810 case ENOLCK:
8811 status = NFS4ERR_DELAY;
8812 break;
8813 case EOVERFLOW:
8814 status = NFS4ERR_INVAL;
8815 break;
8816 case EINVAL:
8817 status = NFS4ERR_NOTSUPP;
8818 break;
8819 default:
8820 status = NFS4ERR_SERVERFAULT;
8821 break;
8822 }
8823
8824 return (status);
8825 }
8826
8827 /*ARGSUSED*/
8828 void
8829 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8830 struct svc_req *req, struct compound_state *cs)
8831 {
8832 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8833 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8834 nfsstat4 status;
8835 stateid4 *stateid;
8836 rfs4_lockowner_t *lo;
8837 rfs4_client_t *cp;
8838 rfs4_state_t *sp = NULL;
8839 rfs4_lo_state_t *lsp = NULL;
8840 bool_t ls_sw_held = FALSE;
8841 bool_t create = TRUE;
8842 bool_t lcreate = TRUE;
8843 bool_t dup_lock = FALSE;
8844 int rc;
8845
8846 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8847 LOCK4args *, args);
8848
8849 if (cs->vp == NULL) {
8850 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8851 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8852 cs, LOCK4res *, resp);
8853 return;
8854 }
8855
8856 if (args->locker.new_lock_owner) {
8857 /* Create a new lockowner for this instance */
8858 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8859
8860 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8861
8862 stateid = &olo->open_stateid;
8863 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8864 if (status != NFS4_OK) {
8865 NFS4_DEBUG(rfs4_debug,
8866 (CE_NOTE, "Get state failed in lock %d", status));
8867 *cs->statusp = resp->status = status;
8868 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8869 cs, LOCK4res *, resp);
8870 return;
8871 }
8872
8873 /* Ensure specified filehandle matches */
8874 if (cs->vp != sp->rs_finfo->rf_vp) {
8875 rfs4_state_rele(sp);
8876 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8877 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8878 cs, LOCK4res *, resp);
8879 return;
8880 }
8881
8882 /* hold off other access to open_owner while we tinker */
8883 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8884
8885 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8886 case NFS4_CHECK_STATEID_OLD:
8887 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8888 goto end;
8889 case NFS4_CHECK_STATEID_BAD:
8890 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8891 goto end;
8892 case NFS4_CHECK_STATEID_EXPIRED:
8893 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8894 goto end;
8895 case NFS4_CHECK_STATEID_UNCONFIRMED:
8896 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8897 goto end;
8898 case NFS4_CHECK_STATEID_CLOSED:
8899 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8900 goto end;
8901 case NFS4_CHECK_STATEID_OKAY:
8902 case NFS4_CHECK_STATEID_REPLAY:
8903 switch (rfs4_check_olo_seqid(olo->open_seqid,
8904 sp->rs_owner, resop)) {
8905 case NFS4_CHKSEQ_OKAY:
8906 if (rc == NFS4_CHECK_STATEID_OKAY)
8907 break;
8908 /*
8909 * This is replayed stateid; if seqid
8910 * matches next expected, then client
8911 * is using wrong seqid.
8912 */
8913 /* FALLTHROUGH */
8914 case NFS4_CHKSEQ_BAD:
8915 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8916 goto end;
8917 case NFS4_CHKSEQ_REPLAY:
8918 /* This is a duplicate LOCK request */
8919 dup_lock = TRUE;
8920
8921 /*
8922 * For a duplicate we do not want to
8923 * create a new lockowner as it should
8924 * already exist.
8925 * Turn off the lockowner create flag.
8926 */
8927 lcreate = FALSE;
8928 }
8929 break;
8930 }
8931
8932 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8933 if (lo == NULL) {
8934 NFS4_DEBUG(rfs4_debug,
8935 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8936 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8937 goto end;
8938 }
8939
8940 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8941 if (lsp == NULL) {
8942 rfs4_update_lease(sp->rs_owner->ro_client);
8943 /*
8944 * Only update theh open_seqid if this is not
8945 * a duplicate request
8946 */
8947 if (dup_lock == FALSE) {
8948 rfs4_update_open_sequence(sp->rs_owner);
8949 }
8950
8951 NFS4_DEBUG(rfs4_debug,
8952 (CE_NOTE, "rfs4_op_lock: no state"));
8953 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8954 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8955 rfs4_lockowner_rele(lo);
8956 goto end;
8957 }
8958
8959 /*
8960 * This is the new_lock_owner branch and the client is
8961 * supposed to be associating a new lock_owner with
8962 * the open file at this point. If we find that a
8963 * lock_owner/state association already exists and a
8964 * successful LOCK request was returned to the client,
8965 * an error is returned to the client since this is
8966 * not appropriate. The client should be using the
8967 * existing lock_owner branch.
8968 */
8969 if (dup_lock == FALSE && create == FALSE) {
8970 if (lsp->rls_lock_completed == TRUE) {
8971 *cs->statusp =
8972 resp->status = NFS4ERR_BAD_SEQID;
8973 rfs4_lockowner_rele(lo);
8974 goto end;
8975 }
8976 }
8977
8978 rfs4_update_lease(sp->rs_owner->ro_client);
8979
8980 /*
8981 * Only update theh open_seqid if this is not
8982 * a duplicate request
8983 */
8984 if (dup_lock == FALSE) {
8985 rfs4_update_open_sequence(sp->rs_owner);
8986 }
8987
8988 /*
8989 * If this is a duplicate lock request, just copy the
8990 * previously saved reply and return.
8991 */
8992 if (dup_lock == TRUE) {
8993 /* verify that lock_seqid's match */
8994 if (lsp->rls_seqid != olo->lock_seqid) {
8995 NFS4_DEBUG(rfs4_debug,
8996 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8997 "lsp->seqid=%d old->seqid=%d",
8998 lsp->rls_seqid, olo->lock_seqid));
8999 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9000 } else {
9001 rfs4_copy_reply(resop, &lsp->rls_reply);
9002 /*
9003 * Make sure to copy the just
9004 * retrieved reply status into the
9005 * overall compound status
9006 */
9007 *cs->statusp = resp->status;
9008 }
9009 rfs4_lockowner_rele(lo);
9010 goto end;
9011 }
9012
9013 rfs4_dbe_lock(lsp->rls_dbe);
9014
9015 /* Make sure to update the lock sequence id */
9016 lsp->rls_seqid = olo->lock_seqid;
9017
9018 NFS4_DEBUG(rfs4_debug,
9019 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9020
9021 /*
9022 * This is used to signify the newly created lockowner
9023 * stateid and its sequence number. The checks for
9024 * sequence number and increment don't occur on the
9025 * very first lock request for a lockowner.
9026 */
9027 lsp->rls_skip_seqid_check = TRUE;
9028
9029 /* hold off other access to lsp while we tinker */
9030 rfs4_sw_enter(&lsp->rls_sw);
9031 ls_sw_held = TRUE;
9032
9033 rfs4_dbe_unlock(lsp->rls_dbe);
9034
9035 rfs4_lockowner_rele(lo);
9036 } else {
9037 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9038 /* get lsp and hold the lock on the underlying file struct */
9039 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9040 != NFS4_OK) {
9041 *cs->statusp = resp->status = status;
9042 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9043 cs, LOCK4res *, resp);
9044 return;
9045 }
9046 create = FALSE; /* We didn't create lsp */
9047
9048 /* Ensure specified filehandle matches */
9049 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9050 rfs4_lo_state_rele(lsp, TRUE);
9051 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9052 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9053 cs, LOCK4res *, resp);
9054 return;
9055 }
9056
9057 /* hold off other access to lsp while we tinker */
9058 rfs4_sw_enter(&lsp->rls_sw);
9059 ls_sw_held = TRUE;
9060
9061 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9062 /*
9063 * The stateid looks like it was okay (expected to be
9064 * the next one)
9065 */
9066 case NFS4_CHECK_STATEID_OKAY:
9067 /*
9068 * The sequence id is now checked. Determine
9069 * if this is a replay or if it is in the
9070 * expected (next) sequence. In the case of a
9071 * replay, there are two replay conditions
9072 * that may occur. The first is the normal
9073 * condition where a LOCK is done with a
9074 * NFS4_OK response and the stateid is
9075 * updated. That case is handled below when
9076 * the stateid is identified as a REPLAY. The
9077 * second is the case where an error is
9078 * returned, like NFS4ERR_DENIED, and the
9079 * sequence number is updated but the stateid
9080 * is not updated. This second case is dealt
9081 * with here. So it may seem odd that the
9082 * stateid is okay but the sequence id is a
9083 * replay but it is okay.
9084 */
9085 switch (rfs4_check_lock_seqid(
9086 args->locker.locker4_u.lock_owner.lock_seqid,
9087 lsp, resop)) {
9088 case NFS4_CHKSEQ_REPLAY:
9089 if (resp->status != NFS4_OK) {
9090 /*
9091 * Here is our replay and need
9092 * to verify that the last
9093 * response was an error.
9094 */
9095 *cs->statusp = resp->status;
9096 goto end;
9097 }
9098 /*
9099 * This is done since the sequence id
9100 * looked like a replay but it didn't
9101 * pass our check so a BAD_SEQID is
9102 * returned as a result.
9103 */
9104 /*FALLTHROUGH*/
9105 case NFS4_CHKSEQ_BAD:
9106 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9107 goto end;
9108 case NFS4_CHKSEQ_OKAY:
9109 /* Everything looks okay move ahead */
9110 break;
9111 }
9112 break;
9113 case NFS4_CHECK_STATEID_OLD:
9114 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9115 goto end;
9116 case NFS4_CHECK_STATEID_BAD:
9117 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9118 goto end;
9119 case NFS4_CHECK_STATEID_EXPIRED:
9120 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9121 goto end;
9122 case NFS4_CHECK_STATEID_CLOSED:
9123 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9124 goto end;
9125 case NFS4_CHECK_STATEID_REPLAY:
9126 switch (rfs4_check_lock_seqid(
9127 args->locker.locker4_u.lock_owner.lock_seqid,
9128 lsp, resop)) {
9129 case NFS4_CHKSEQ_OKAY:
9130 /*
9131 * This is a replayed stateid; if
9132 * seqid matches the next expected,
9133 * then client is using wrong seqid.
9134 */
9135 case NFS4_CHKSEQ_BAD:
9136 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9137 goto end;
9138 case NFS4_CHKSEQ_REPLAY:
9139 rfs4_update_lease(lsp->rls_locker->rl_client);
9140 *cs->statusp = status = resp->status;
9141 goto end;
9142 }
9143 break;
9144 default:
9145 ASSERT(FALSE);
9146 break;
9147 }
9148
9149 rfs4_update_lock_sequence(lsp);
9150 rfs4_update_lease(lsp->rls_locker->rl_client);
9151 }
9152
9153 /*
9154 * NFS4 only allows locking on regular files, so
9155 * verify type of object.
9156 */
9157 if (cs->vp->v_type != VREG) {
9158 if (cs->vp->v_type == VDIR)
9159 status = NFS4ERR_ISDIR;
9160 else
9161 status = NFS4ERR_INVAL;
9162 goto out;
9163 }
9164
9165 cp = lsp->rls_state->rs_owner->ro_client;
9166
9167 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9168 status = NFS4ERR_GRACE;
9169 goto out;
9170 }
9171
9172 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9173 status = NFS4ERR_NO_GRACE;
9174 goto out;
9175 }
9176
9177 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9178 status = NFS4ERR_NO_GRACE;
9179 goto out;
9180 }
9181
9182 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9183 cs->deleg = TRUE;
9184
9185 status = rfs4_do_lock(lsp, args->locktype,
9186 args->offset, args->length, cs->cr, resop);
9187
9188 out:
9189 lsp->rls_skip_seqid_check = FALSE;
9190
9191 *cs->statusp = resp->status = status;
9192
9193 if (status == NFS4_OK) {
9194 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9195 lsp->rls_lock_completed = TRUE;
9196 }
9197 /*
9198 * Only update the "OPEN" response here if this was a new
9199 * lock_owner
9200 */
9201 if (sp)
9202 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9203
9204 rfs4_update_lock_resp(lsp, resop);
9205
9206 end:
9207 if (lsp) {
9208 if (ls_sw_held)
9209 rfs4_sw_exit(&lsp->rls_sw);
9210 /*
9211 * If an sp obtained, then the lsp does not represent
9212 * a lock on the file struct.
9213 */
9214 if (sp != NULL)
9215 rfs4_lo_state_rele(lsp, FALSE);
9216 else
9217 rfs4_lo_state_rele(lsp, TRUE);
9218 }
9219 if (sp) {
9220 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9221 rfs4_state_rele(sp);
9222 }
9223
9224 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9225 LOCK4res *, resp);
9226 }
9227
9228 /* free function for LOCK/LOCKT */
9229 static void
9230 lock_denied_free(nfs_resop4 *resop)
9231 {
9232 LOCK4denied *dp = NULL;
9233
9234 switch (resop->resop) {
9235 case OP_LOCK:
9236 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9237 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9238 break;
9239 case OP_LOCKT:
9240 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9241 dp = &resop->nfs_resop4_u.oplockt.denied;
9242 break;
9243 default:
9244 break;
9245 }
9246
9247 if (dp)
9248 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9249 }
9250
9251 /*ARGSUSED*/
9252 void
9253 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9254 struct svc_req *req, struct compound_state *cs)
9255 {
9256 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9257 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9258 nfsstat4 status;
9259 stateid4 *stateid = &args->lock_stateid;
9260 rfs4_lo_state_t *lsp;
9261
9262 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9263 LOCKU4args *, args);
9264
9265 if (cs->vp == NULL) {
9266 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9267 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9268 LOCKU4res *, resp);
9269 return;
9270 }
9271
9272 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9273 *cs->statusp = resp->status = status;
9274 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9275 LOCKU4res *, resp);
9276 return;
9277 }
9278
9279 /* Ensure specified filehandle matches */
9280 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9281 rfs4_lo_state_rele(lsp, TRUE);
9282 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9283 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9284 LOCKU4res *, resp);
9285 return;
9286 }
9287
9288 /* hold off other access to lsp while we tinker */
9289 rfs4_sw_enter(&lsp->rls_sw);
9290
9291 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9292 case NFS4_CHECK_STATEID_OKAY:
9293 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9294 != NFS4_CHKSEQ_OKAY) {
9295 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9296 goto end;
9297 }
9298 break;
9299 case NFS4_CHECK_STATEID_OLD:
9300 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9301 goto end;
9302 case NFS4_CHECK_STATEID_BAD:
9303 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9304 goto end;
9305 case NFS4_CHECK_STATEID_EXPIRED:
9306 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9307 goto end;
9308 case NFS4_CHECK_STATEID_CLOSED:
9309 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9310 goto end;
9311 case NFS4_CHECK_STATEID_REPLAY:
9312 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9313 case NFS4_CHKSEQ_OKAY:
9314 /*
9315 * This is a replayed stateid; if
9316 * seqid matches the next expected,
9317 * then client is using wrong seqid.
9318 */
9319 case NFS4_CHKSEQ_BAD:
9320 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9321 goto end;
9322 case NFS4_CHKSEQ_REPLAY:
9323 rfs4_update_lease(lsp->rls_locker->rl_client);
9324 *cs->statusp = status = resp->status;
9325 goto end;
9326 }
9327 break;
9328 default:
9329 ASSERT(FALSE);
9330 break;
9331 }
9332
9333 rfs4_update_lock_sequence(lsp);
9334 rfs4_update_lease(lsp->rls_locker->rl_client);
9335
9336 /*
9337 * NFS4 only allows locking on regular files, so
9338 * verify type of object.
9339 */
9340 if (cs->vp->v_type != VREG) {
9341 if (cs->vp->v_type == VDIR)
9342 status = NFS4ERR_ISDIR;
9343 else
9344 status = NFS4ERR_INVAL;
9345 goto out;
9346 }
9347
9348 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9349 status = NFS4ERR_GRACE;
9350 goto out;
9351 }
9352
9353 status = rfs4_do_lock(lsp, args->locktype,
9354 args->offset, args->length, cs->cr, resop);
9355
9356 out:
9357 *cs->statusp = resp->status = status;
9358
9359 if (status == NFS4_OK)
9360 resp->lock_stateid = lsp->rls_lockid.stateid;
9361
9362 rfs4_update_lock_resp(lsp, resop);
9363
9364 end:
9365 rfs4_sw_exit(&lsp->rls_sw);
9366 rfs4_lo_state_rele(lsp, TRUE);
9367
9368 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9369 LOCKU4res *, resp);
9370 }
9371
9372 /*
9373 * LOCKT is a best effort routine, the client can not be guaranteed that
9374 * the status return is still in effect by the time the reply is received.
9375 * They are numerous race conditions in this routine, but we are not required
9376 * and can not be accurate.
9377 */
9378 /*ARGSUSED*/
9379 void
9380 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9381 struct svc_req *req, struct compound_state *cs)
9382 {
9383 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9384 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9385 rfs4_lockowner_t *lo;
9386 rfs4_client_t *cp;
9387 bool_t create = FALSE;
9388 struct flock64 flk;
9389 int error;
9390 int flag = FREAD | FWRITE;
9391 int ltype;
9392 length4 posix_length;
9393 sysid_t sysid;
9394 pid_t pid;
9395
9396 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9397 LOCKT4args *, args);
9398
9399 if (cs->vp == NULL) {
9400 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9401 goto out;
9402 }
9403
9404 /*
9405 * NFS4 only allows locking on regular files, so
9406 * verify type of object.
9407 */
9408 if (cs->vp->v_type != VREG) {
9409 if (cs->vp->v_type == VDIR)
9410 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9411 else
9412 *cs->statusp = resp->status = NFS4ERR_INVAL;
9413 goto out;
9414 }
9415
9416 /*
9417 * Check out the clientid to ensure the server knows about it
9418 * so that we correctly inform the client of a server reboot.
9419 */
9420 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9421 == NULL) {
9422 *cs->statusp = resp->status =
9423 rfs4_check_clientid(&args->owner.clientid, 0);
9424 goto out;
9425 }
9426 if (rfs4_lease_expired(cp)) {
9427 rfs4_client_close(cp);
9428 /*
9429 * Protocol doesn't allow returning NFS4ERR_STALE as
9430 * other operations do on this check so STALE_CLIENTID
9431 * is returned instead
9432 */
9433 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9434 goto out;
9435 }
9436
9437 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9438 *cs->statusp = resp->status = NFS4ERR_GRACE;
9439 rfs4_client_rele(cp);
9440 goto out;
9441 }
9442 rfs4_client_rele(cp);
9443
9444 resp->status = NFS4_OK;
9445
9446 switch (args->locktype) {
9447 case READ_LT:
9448 case READW_LT:
9449 ltype = F_RDLCK;
9450 break;
9451 case WRITE_LT:
9452 case WRITEW_LT:
9453 ltype = F_WRLCK;
9454 break;
9455 }
9456
9457 posix_length = args->length;
9458 /* Check for zero length. To lock to end of file use all ones for V4 */
9459 if (posix_length == 0) {
9460 *cs->statusp = resp->status = NFS4ERR_INVAL;
9461 goto out;
9462 } else if (posix_length == (length4)(~0)) {
9463 posix_length = 0; /* Posix to end of file */
9464 }
9465
9466 /* Find or create a lockowner */
9467 lo = rfs4_findlockowner(&args->owner, &create);
9468
9469 if (lo) {
9470 pid = lo->rl_pid;
9471 if ((resp->status =
9472 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9473 goto err;
9474 } else {
9475 pid = 0;
9476 sysid = lockt_sysid;
9477 }
9478 retry:
9479 flk.l_type = ltype;
9480 flk.l_whence = 0; /* SEEK_SET */
9481 flk.l_start = args->offset;
9482 flk.l_len = posix_length;
9483 flk.l_sysid = sysid;
9484 flk.l_pid = pid;
9485 flag |= F_REMOTELOCK;
9486
9487 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9488
9489 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9490 if (flk.l_len < 0 || flk.l_start < 0) {
9491 resp->status = NFS4ERR_INVAL;
9492 goto err;
9493 }
9494 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9495 NULL, cs->cr, NULL);
9496
9497 /*
9498 * N.B. We map error values to nfsv4 errors. This is differrent
9499 * than puterrno4 routine.
9500 */
9501 switch (error) {
9502 case 0:
9503 if (flk.l_type == F_UNLCK)
9504 resp->status = NFS4_OK;
9505 else {
9506 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9507 goto retry;
9508 resp->status = NFS4ERR_DENIED;
9509 }
9510 break;
9511 case EOVERFLOW:
9512 resp->status = NFS4ERR_INVAL;
9513 break;
9514 case EINVAL:
9515 resp->status = NFS4ERR_NOTSUPP;
9516 break;
9517 default:
9518 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9519 error);
9520 resp->status = NFS4ERR_SERVERFAULT;
9521 break;
9522 }
9523
9524 err:
9525 if (lo)
9526 rfs4_lockowner_rele(lo);
9527 *cs->statusp = resp->status;
9528 out:
9529 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9530 LOCKT4res *, resp);
9531 }
9532
9533 int
9534 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9535 {
9536 int err;
9537 int cmd;
9538 vnode_t *vp;
9539 struct shrlock shr;
9540 struct shr_locowner shr_loco;
9541 int fflags = 0;
9542
9543 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9544 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9545
9546 if (sp->rs_closed)
9547 return (NFS4ERR_OLD_STATEID);
9548
9549 vp = sp->rs_finfo->rf_vp;
9550 ASSERT(vp);
9551
9552 shr.s_access = shr.s_deny = 0;
9553
9554 if (access & OPEN4_SHARE_ACCESS_READ) {
9555 fflags |= FREAD;
9556 shr.s_access |= F_RDACC;
9557 }
9558 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9559 fflags |= FWRITE;
9560 shr.s_access |= F_WRACC;
9561 }
9562 ASSERT(shr.s_access);
9563
9564 if (deny & OPEN4_SHARE_DENY_READ)
9565 shr.s_deny |= F_RDDNY;
9566 if (deny & OPEN4_SHARE_DENY_WRITE)
9567 shr.s_deny |= F_WRDNY;
9568
9569 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9570 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9571 shr_loco.sl_pid = shr.s_pid;
9572 shr_loco.sl_id = shr.s_sysid;
9573 shr.s_owner = (caddr_t)&shr_loco;
9574 shr.s_own_len = sizeof (shr_loco);
9575
9576 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9577
9578 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9579 if (err != 0) {
9580 if (err == EAGAIN)
9581 err = NFS4ERR_SHARE_DENIED;
9582 else
9583 err = puterrno4(err);
9584 return (err);
9585 }
9586
9587 sp->rs_share_access |= access;
9588 sp->rs_share_deny |= deny;
9589
9590 return (0);
9591 }
9592
9593 int
9594 rfs4_unshare(rfs4_state_t *sp)
9595 {
9596 int err;
9597 struct shrlock shr;
9598 struct shr_locowner shr_loco;
9599
9600 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9601
9602 if (sp->rs_closed || sp->rs_share_access == 0)
9603 return (0);
9604
9605 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9606 ASSERT(sp->rs_finfo->rf_vp);
9607
9608 shr.s_access = shr.s_deny = 0;
9609 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9610 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9611 shr_loco.sl_pid = shr.s_pid;
9612 shr_loco.sl_id = shr.s_sysid;
9613 shr.s_owner = (caddr_t)&shr_loco;
9614 shr.s_own_len = sizeof (shr_loco);
9615
9616 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9617 NULL);
9618 if (err != 0) {
9619 err = puterrno4(err);
9620 return (err);
9621 }
9622
9623 sp->rs_share_access = 0;
9624 sp->rs_share_deny = 0;
9625
9626 return (0);
9627
9628 }
9629
9630 static int
9631 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9632 {
9633 struct clist *wcl;
9634 count4 count = rok->data_len;
9635 int wlist_len;
9636
9637 wcl = args->wlist;
9638 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9639 return (FALSE);
9640 }
9641 wcl = args->wlist;
9642 rok->wlist_len = wlist_len;
9643 rok->wlist = wcl;
9644 return (TRUE);
9645 }
9646
9647 /* tunable to disable server referrals */
9648 int rfs4_no_referrals = 0;
9649
9650 /*
9651 * Find an NFS record in reparse point data.
9652 * Returns 0 for success and <0 or an errno value on failure.
9653 */
9654 int
9655 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9656 {
9657 int err;
9658 char *stype, *val;
9659 nvlist_t *nvl;
9660 nvpair_t *curr;
9661
9662 if ((nvl = reparse_init()) == NULL)
9663 return (-1);
9664
9665 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9666 reparse_free(nvl);
9667 return (err);
9668 }
9669
9670 curr = NULL;
9671 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9672 if ((stype = nvpair_name(curr)) == NULL) {
9673 reparse_free(nvl);
9674 return (-2);
9675 }
9676 if (strncasecmp(stype, "NFS", 3) == 0)
9677 break;
9678 }
9679
9680 if ((curr == NULL) ||
9681 (nvpair_value_string(curr, &val))) {
9682 reparse_free(nvl);
9683 return (-3);
9684 }
9685 *nvlp = nvl;
9686 *svcp = stype;
9687 *datap = val;
9688 return (0);
9689 }
9690
9691 int
9692 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9693 {
9694 nvlist_t *nvl;
9695 char *s, *d;
9696
9697 if (rfs4_no_referrals != 0)
9698 return (B_FALSE);
9699
9700 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9701 return (B_FALSE);
9702
9703 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9704 return (B_FALSE);
9705
9706 reparse_free(nvl);
9707
9708 return (B_TRUE);
9709 }
9710
9711 /*
9712 * There is a user-level copy of this routine in ref_subr.c.
9713 * Changes should be kept in sync.
9714 */
9715 static int
9716 nfs4_create_components(char *path, component4 *comp4)
9717 {
9718 int slen, plen, ncomp;
9719 char *ori_path, *nxtc, buf[MAXNAMELEN];
9720
9721 if (path == NULL)
9722 return (0);
9723
9724 plen = strlen(path) + 1; /* include the terminator */
9725 ori_path = path;
9726 ncomp = 0;
9727
9728 /* count number of components in the path */
9729 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9730 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9731 if ((slen = nxtc - path) == 0) {
9732 path = nxtc + 1;
9733 continue;
9734 }
9735
9736 if (comp4 != NULL) {
9737 bcopy(path, buf, slen);
9738 buf[slen] = '\0';
9739 (void) str_to_utf8(buf, &comp4[ncomp]);
9740 }
9741
9742 ncomp++; /* 1 valid component */
9743 path = nxtc + 1;
9744 }
9745 if (*nxtc == '\0' || *nxtc == '\n')
9746 break;
9747 }
9748
9749 return (ncomp);
9750 }
9751
9752 /*
9753 * There is a user-level copy of this routine in ref_subr.c.
9754 * Changes should be kept in sync.
9755 */
9756 static int
9757 make_pathname4(char *path, pathname4 *pathname)
9758 {
9759 int ncomp;
9760 component4 *comp4;
9761
9762 if (pathname == NULL)
9763 return (0);
9764
9765 if (path == NULL) {
9766 pathname->pathname4_val = NULL;
9767 pathname->pathname4_len = 0;
9768 return (0);
9769 }
9770
9771 /* count number of components to alloc buffer */
9772 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9773 pathname->pathname4_val = NULL;
9774 pathname->pathname4_len = 0;
9775 return (0);
9776 }
9777 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9778
9779 /* copy components into allocated buffer */
9780 ncomp = nfs4_create_components(path, comp4);
9781
9782 pathname->pathname4_val = comp4;
9783 pathname->pathname4_len = ncomp;
9784
9785 return (ncomp);
9786 }
9787
9788 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9789
9790 fs_locations4 *
9791 fetch_referral(vnode_t *vp, cred_t *cr)
9792 {
9793 nvlist_t *nvl;
9794 char *stype, *sdata;
9795 fs_locations4 *result;
9796 char buf[1024];
9797 size_t bufsize;
9798 XDR xdr;
9799 int err;
9800
9801 /*
9802 * Check attrs to ensure it's a reparse point
9803 */
9804 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9805 return (NULL);
9806
9807 /*
9808 * Look for an NFS record and get the type and data
9809 */
9810 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9811 return (NULL);
9812
9813 /*
9814 * With the type and data, upcall to get the referral
9815 */
9816 bufsize = sizeof (buf);
9817 bzero(buf, sizeof (buf));
9818 err = reparse_kderef((const char *)stype, (const char *)sdata,
9819 buf, &bufsize);
9820 reparse_free(nvl);
9821
9822 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9823 char *, stype, char *, sdata, char *, buf, int, err);
9824 if (err) {
9825 cmn_err(CE_NOTE,
9826 "reparsed daemon not running: unable to get referral (%d)",
9827 err);
9828 return (NULL);
9829 }
9830
9831 /*
9832 * We get an XDR'ed record back from the kderef call
9833 */
9834 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9835 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9836 err = xdr_fs_locations4(&xdr, result);
9837 XDR_DESTROY(&xdr);
9838 if (err != TRUE) {
9839 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9840 int, err);
9841 return (NULL);
9842 }
9843
9844 /*
9845 * Look at path to recover fs_root, ignoring the leading '/'
9846 */
9847 (void) make_pathname4(vp->v_path, &result->fs_root);
9848
9849 return (result);
9850 }
9851
9852 char *
9853 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9854 {
9855 fs_locations4 *fsl;
9856 fs_location4 *fs;
9857 char *server, *path, *symbuf;
9858 static char *prefix = "/net/";
9859 int i, size, npaths;
9860 uint_t len;
9861
9862 /* Get the referral */
9863 if ((fsl = fetch_referral(vp, cr)) == NULL)
9864 return (NULL);
9865
9866 /* Deal with only the first location and first server */
9867 fs = &fsl->locations_val[0];
9868 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9869 if (server == NULL) {
9870 rfs4_free_fs_locations4(fsl);
9871 kmem_free(fsl, sizeof (fs_locations4));
9872 return (NULL);
9873 }
9874
9875 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9876 size = strlen(prefix) + len;
9877 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9878 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9879
9880 /* Allocate the symlink buffer and fill it */
9881 symbuf = kmem_zalloc(size, KM_SLEEP);
9882 (void) strcat(symbuf, prefix);
9883 (void) strcat(symbuf, server);
9884 kmem_free(server, len);
9885
9886 npaths = 0;
9887 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9888 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9889 if (path == NULL)
9890 continue;
9891 (void) strcat(symbuf, "/");
9892 (void) strcat(symbuf, path);
9893 npaths++;
9894 kmem_free(path, len);
9895 }
9896
9897 rfs4_free_fs_locations4(fsl);
9898 kmem_free(fsl, sizeof (fs_locations4));
9899
9900 if (strsz != NULL)
9901 *strsz = size;
9902 return (symbuf);
9903 }
9904
9905 /*
9906 * Check to see if we have a downrev Solaris client, so that we
9907 * can send it a symlink instead of a referral.
9908 */
9909 int
9910 client_is_downrev(struct svc_req *req)
9911 {
9912 struct sockaddr *ca;
9913 rfs4_clntip_t *ci;
9914 bool_t create = FALSE;
9915 int is_downrev;
9916
9917 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9918 ASSERT(ca);
9919 ci = rfs4_find_clntip(ca, &create);
9920 if (ci == NULL)
9921 return (0);
9922 is_downrev = ci->ri_no_referrals;
9923 rfs4_dbe_rele(ci->ri_dbe);
9924 return (is_downrev);
9925 }
9926
9927 /*
9928 * Do the main work of handling HA-NFSv4 Resource Group failover on
9929 * Sun Cluster.
9930 * We need to detect whether any RG admin paths have been added or removed,
9931 * and adjust resources accordingly.
9932 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9933 * order to scale, the list and array of paths need to be held in more
9934 * suitable data structures.
9935 */
9936 static void
9937 hanfsv4_failover(nfs4_srv_t *nsrv4)
9938 {
9939 int i, start_grace, numadded_paths = 0;
9940 char **added_paths = NULL;
9941 rfs4_dss_path_t *dss_path;
9942
9943 /*
9944 * Note: currently, dss_pathlist cannot be NULL, since
9945 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9946 * make the latter dynamically specified too, the following will
9947 * need to be adjusted.
9948 */
9949
9950 /*
9951 * First, look for removed paths: RGs that have been failed-over
9952 * away from this node.
9953 * Walk the "currently-serving" dss_pathlist and, for each
9954 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9955 * from nfsd. If not, that RG path has been removed.
9956 *
9957 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9958 * any duplicates.
9959 */
9960 dss_path = nsrv4->dss_pathlist;
9961 do {
9962 int found = 0;
9963 char *path = dss_path->path;
9964
9965 /* used only for non-HA so may not be removed */
9966 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9967 dss_path = dss_path->next;
9968 continue;
9969 }
9970
9971 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9972 int cmpret;
9973 char *newpath = rfs4_dss_newpaths[i];
9974
9975 /*
9976 * Since nfsd has sorted rfs4_dss_newpaths for us,
9977 * once the return from strcmp is negative we know
9978 * we've passed the point where "path" should be,
9979 * and can stop searching: "path" has been removed.
9980 */
9981 cmpret = strcmp(path, newpath);
9982 if (cmpret < 0)
9983 break;
9984 if (cmpret == 0) {
9985 found = 1;
9986 break;
9987 }
9988 }
9989
9990 if (found == 0) {
9991 unsigned index = dss_path->index;
9992 rfs4_servinst_t *sip = dss_path->sip;
9993 rfs4_dss_path_t *path_next = dss_path->next;
9994
9995 /*
9996 * This path has been removed.
9997 * We must clear out the servinst reference to
9998 * it, since it's now owned by another
9999 * node: we should not attempt to touch it.
10000 */
10001 ASSERT(dss_path == sip->dss_paths[index]);
10002 sip->dss_paths[index] = NULL;
10003
10004 /* remove from "currently-serving" list, and destroy */
10005 remque(dss_path);
10006 /* allow for NUL */
10007 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10008 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10009
10010 dss_path = path_next;
10011 } else {
10012 /* path was found; not removed */
10013 dss_path = dss_path->next;
10014 }
10015 } while (dss_path != nsrv4->dss_pathlist);
10016
10017 /*
10018 * Now, look for added paths: RGs that have been failed-over
10019 * to this node.
10020 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10021 * for each path, check if it is on the "currently-serving"
10022 * dss_pathlist. If not, that RG path has been added.
10023 *
10024 * Note: we don't do duplicate detection here; nfsd does that for us.
10025 *
10026 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10027 * an upper bound for the size needed for added_paths[numadded_paths].
10028 */
10029
10030 /* probably more space than we need, but guaranteed to be enough */
10031 if (rfs4_dss_numnewpaths > 0) {
10032 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10033 added_paths = kmem_zalloc(sz, KM_SLEEP);
10034 }
10035
10036 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10037 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10038 int found = 0;
10039 char *newpath = rfs4_dss_newpaths[i];
10040
10041 dss_path = nsrv4->dss_pathlist;
10042 do {
10043 char *path = dss_path->path;
10044
10045 /* used only for non-HA */
10046 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10047 dss_path = dss_path->next;
10048 continue;
10049 }
10050
10051 if (strncmp(path, newpath, strlen(path)) == 0) {
10052 found = 1;
10053 break;
10054 }
10055
10056 dss_path = dss_path->next;
10057 } while (dss_path != nsrv4->dss_pathlist);
10058
10059 if (found == 0) {
10060 added_paths[numadded_paths] = newpath;
10061 numadded_paths++;
10062 }
10063 }
10064
10065 /* did we find any added paths? */
10066 if (numadded_paths > 0) {
10067
10068 /* create a new server instance, and start its grace period */
10069 start_grace = 1;
10070 /* CSTYLED */
10071 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10072
10073 /* read in the stable storage state from these paths */
10074 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10075
10076 /*
10077 * Multiple failovers during a grace period will cause
10078 * clients of the same resource group to be partitioned
10079 * into different server instances, with different
10080 * grace periods. Since clients of the same resource
10081 * group must be subject to the same grace period,
10082 * we need to reset all currently active grace periods.
10083 */
10084 rfs4_grace_reset_all(nsrv4);
10085 }
10086
10087 if (rfs4_dss_numnewpaths > 0)
10088 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10089 }