1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 */
36
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/cred.h>
41 #include <sys/buf.h>
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/errno.h>
47 #include <sys/sysmacros.h>
48 #include <sys/statvfs.h>
49 #include <sys/kmem.h>
50 #include <sys/dirent.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/systeminfo.h>
54 #include <sys/flock.h>
55 #include <sys/pathname.h>
56 #include <sys/nbmlock.h>
57 #include <sys/share.h>
58 #include <sys/atomic.h>
59 #include <sys/policy.h>
60 #include <sys/fem.h>
61 #include <sys/sdt.h>
62 #include <sys/ddi.h>
63 #include <sys/zone.h>
64
65 #include <fs/fs_reparse.h>
66
67 #include <rpc/types.h>
68 #include <rpc/auth.h>
69 #include <rpc/rpcsec_gss.h>
70 #include <rpc/svc.h>
71
72 #include <nfs/nfs.h>
73 #include <nfs/nfssys.h>
74 #include <nfs/export.h>
75 #include <nfs/nfs_cmd.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_drc.h>
79
80 #include <sys/strsubr.h>
81 #include <sys/strsun.h>
82
83 #include <inet/common.h>
84 #include <inet/ip.h>
85 #include <inet/ip6.h>
86
87 #include <sys/tsol/label.h>
88 #include <sys/tsol/tndb.h>
89
90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 extern struct svc_ops rdma_svc_ops;
95 extern int nfs_loaned_buffers;
96 /* End of Tunables */
97
98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99
100 /*
101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 */
103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104
105 /*
106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 * maxcount that isn't large enough to hold the smallest possible
109 * XDR encoded dirent.
110 *
111 * sizeof cookie (8 bytes) +
112 * sizeof name_len (4 bytes) +
113 * sizeof smallest (padded) name (4 bytes) +
114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 * sizeof attrlist4_len (4 bytes) +
116 * sizeof next boolean (4 bytes)
117 *
118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 * the smallest possible entry4 (assumes no attrs requested).
120 * sizeof nfsstat4 (4 bytes) +
121 * sizeof verifier4 (8 bytes) +
122 * sizeof entry4list bool (4 bytes) +
123 * sizeof entry4 (36 bytes) +
124 * sizeof eof bool (4 bytes)
125 *
126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 * required for a given name length. MAXNAMELEN is the maximum
130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 * to hold ., .., and the largest possible solaris dirent64.
134 */
135 #define RFS4_MINLEN_ENTRY4 36
136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 #define RFS4_MINLEN_RDDIR_BUF \
138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139
140 /*
141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 * but the dirents UFS gives us are already padded to 8, so just take
143 * what we're given. Dircount is only a hint anyway. Currently the
144 * solaris kernel is ASCII only, so there's no point in calling the
145 * UTF8 functions.
146 *
147 * dirent64: named padded to provide 8 byte struct alignment
148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 *
150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 *
152 */
153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155
156 zone_key_t rfs4_zone_key;
157
158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159
160 u_longlong_t nfs4_srv_caller_id;
161 uint_t nfs4_srv_vkey = 0;
162
163 void rfs4_init_compound_state(struct compound_state *);
164
165 static void nullfree(caddr_t);
166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 struct compound_state *);
168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 struct compound_state *);
172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 struct compound_state *);
174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 struct compound_state *);
176 static void rfs4_op_create_free(nfs_resop4 *resop);
177 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
178 struct svc_req *, struct compound_state *);
179 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
180 struct svc_req *, struct compound_state *);
181 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
183 static void rfs4_op_getattr_free(nfs_resop4 *);
184 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 struct compound_state *);
186 static void rfs4_op_getfh_free(nfs_resop4 *);
187 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 struct compound_state *);
189 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 struct compound_state *);
191 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 struct compound_state *);
193 static void lock_denied_free(nfs_resop4 *);
194 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 struct compound_state *);
196 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 struct compound_state *);
198 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 struct compound_state *);
200 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 struct compound_state *);
202 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
203 struct svc_req *req, struct compound_state *cs);
204 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 struct compound_state *);
206 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 struct compound_state *);
208 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
209 struct svc_req *, struct compound_state *);
210 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
211 struct svc_req *, struct compound_state *);
212 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 struct compound_state *);
214 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 struct compound_state *);
216 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 struct compound_state *);
218 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
219 struct compound_state *);
220 static void rfs4_op_read_free(nfs_resop4 *);
221 static void rfs4_op_readdir_free(nfs_resop4 *resop);
222 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
223 struct compound_state *);
224 static void rfs4_op_readlink_free(nfs_resop4 *);
225 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
226 struct svc_req *, struct compound_state *);
227 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 struct compound_state *);
229 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 struct compound_state *);
231 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 struct compound_state *);
233 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 struct compound_state *);
235 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 struct compound_state *);
237 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 struct compound_state *);
239 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 struct compound_state *);
241 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 struct compound_state *);
243 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
244 struct svc_req *, struct compound_state *);
245 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
246 struct svc_req *req, struct compound_state *);
247 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
248 struct compound_state *);
249 static void rfs4_op_secinfo_free(nfs_resop4 *);
250
251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
252 struct svc_req *);
253 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
254 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
255
256
257 /*
258 * translation table for attrs
259 */
260 struct nfs4_ntov_table {
261 union nfs4_attr_u *na;
262 uint8_t amap[NFS4_MAXNUM_ATTRS];
263 int attrcnt;
264 bool_t vfsstat;
265 };
266
267 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
268 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
269 struct nfs4_svgetit_arg *sargp);
270
271 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
272 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
273 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
274
275 static void hanfsv4_failover(nfs4_srv_t *);
276
277 fem_t *deleg_rdops;
278 fem_t *deleg_wrops;
279
280 /*
281 * NFS4 op dispatch table
282 */
283
284 struct rfsv4disp {
285 void (*dis_proc)(); /* proc to call */
286 void (*dis_resfree)(); /* frees space allocated by proc */
287 int dis_flags; /* RPC_IDEMPOTENT, etc... */
288 };
289
290 static struct rfsv4disp rfsv4disptab[] = {
291 /*
292 * NFS VERSION 4
293 */
294
295 /* RFS_NULL = 0 */
296 {rfs4_op_illegal, nullfree, 0},
297
298 /* UNUSED = 1 */
299 {rfs4_op_illegal, nullfree, 0},
300
301 /* UNUSED = 2 */
302 {rfs4_op_illegal, nullfree, 0},
303
304 /* OP_ACCESS = 3 */
305 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
306
307 /* OP_CLOSE = 4 */
308 {rfs4_op_close, nullfree, 0},
309
310 /* OP_COMMIT = 5 */
311 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
312
313 /* OP_CREATE = 6 */
314 {rfs4_op_create, nullfree, 0},
315
316 /* OP_DELEGPURGE = 7 */
317 {rfs4_op_delegpurge, nullfree, 0},
318
319 /* OP_DELEGRETURN = 8 */
320 {rfs4_op_delegreturn, nullfree, 0},
321
322 /* OP_GETATTR = 9 */
323 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
324
325 /* OP_GETFH = 10 */
326 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
327
328 /* OP_LINK = 11 */
329 {rfs4_op_link, nullfree, 0},
330
331 /* OP_LOCK = 12 */
332 {rfs4_op_lock, lock_denied_free, 0},
333
334 /* OP_LOCKT = 13 */
335 {rfs4_op_lockt, lock_denied_free, 0},
336
337 /* OP_LOCKU = 14 */
338 {rfs4_op_locku, nullfree, 0},
339
340 /* OP_LOOKUP = 15 */
341 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
342
343 /* OP_LOOKUPP = 16 */
344 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
345
346 /* OP_NVERIFY = 17 */
347 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
348
349 /* OP_OPEN = 18 */
350 {rfs4_op_open, rfs4_free_reply, 0},
351
352 /* OP_OPENATTR = 19 */
353 {rfs4_op_openattr, nullfree, 0},
354
355 /* OP_OPEN_CONFIRM = 20 */
356 {rfs4_op_open_confirm, nullfree, 0},
357
358 /* OP_OPEN_DOWNGRADE = 21 */
359 {rfs4_op_open_downgrade, nullfree, 0},
360
361 /* OP_OPEN_PUTFH = 22 */
362 {rfs4_op_putfh, nullfree, RPC_ALL},
363
364 /* OP_PUTPUBFH = 23 */
365 {rfs4_op_putpubfh, nullfree, RPC_ALL},
366
367 /* OP_PUTROOTFH = 24 */
368 {rfs4_op_putrootfh, nullfree, RPC_ALL},
369
370 /* OP_READ = 25 */
371 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
372
373 /* OP_READDIR = 26 */
374 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
375
376 /* OP_READLINK = 27 */
377 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
378
379 /* OP_REMOVE = 28 */
380 {rfs4_op_remove, nullfree, 0},
381
382 /* OP_RENAME = 29 */
383 {rfs4_op_rename, nullfree, 0},
384
385 /* OP_RENEW = 30 */
386 {rfs4_op_renew, nullfree, 0},
387
388 /* OP_RESTOREFH = 31 */
389 {rfs4_op_restorefh, nullfree, RPC_ALL},
390
391 /* OP_SAVEFH = 32 */
392 {rfs4_op_savefh, nullfree, RPC_ALL},
393
394 /* OP_SECINFO = 33 */
395 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
396
397 /* OP_SETATTR = 34 */
398 {rfs4_op_setattr, nullfree, 0},
399
400 /* OP_SETCLIENTID = 35 */
401 {rfs4_op_setclientid, nullfree, 0},
402
403 /* OP_SETCLIENTID_CONFIRM = 36 */
404 {rfs4_op_setclientid_confirm, nullfree, 0},
405
406 /* OP_VERIFY = 37 */
407 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
408
409 /* OP_WRITE = 38 */
410 {rfs4_op_write, nullfree, 0},
411
412 /* OP_RELEASE_LOCKOWNER = 39 */
413 {rfs4_op_release_lockowner, nullfree, 0},
414 };
415
416 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
417
418 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
419
420 #ifdef DEBUG
421
422 int rfs4_fillone_debug = 0;
423 int rfs4_no_stub_access = 1;
424 int rfs4_rddir_debug = 0;
425
426 static char *rfs4_op_string[] = {
427 "rfs4_op_null",
428 "rfs4_op_1 unused",
429 "rfs4_op_2 unused",
430 "rfs4_op_access",
431 "rfs4_op_close",
432 "rfs4_op_commit",
433 "rfs4_op_create",
434 "rfs4_op_delegpurge",
435 "rfs4_op_delegreturn",
436 "rfs4_op_getattr",
437 "rfs4_op_getfh",
438 "rfs4_op_link",
439 "rfs4_op_lock",
440 "rfs4_op_lockt",
441 "rfs4_op_locku",
442 "rfs4_op_lookup",
443 "rfs4_op_lookupp",
444 "rfs4_op_nverify",
445 "rfs4_op_open",
446 "rfs4_op_openattr",
447 "rfs4_op_open_confirm",
448 "rfs4_op_open_downgrade",
449 "rfs4_op_putfh",
450 "rfs4_op_putpubfh",
451 "rfs4_op_putrootfh",
452 "rfs4_op_read",
453 "rfs4_op_readdir",
454 "rfs4_op_readlink",
455 "rfs4_op_remove",
456 "rfs4_op_rename",
457 "rfs4_op_renew",
458 "rfs4_op_restorefh",
459 "rfs4_op_savefh",
460 "rfs4_op_secinfo",
461 "rfs4_op_setattr",
462 "rfs4_op_setclientid",
463 "rfs4_op_setclient_confirm",
464 "rfs4_op_verify",
465 "rfs4_op_write",
466 "rfs4_op_release_lockowner",
467 "rfs4_op_illegal"
468 };
469 #endif
470
471 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
472
473 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
474
475 extern void rfs4_free_fs_locations4(fs_locations4 *);
476
477 #ifdef nextdp
478 #undef nextdp
479 #endif
480 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
481
482 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
483 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
484 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
485 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
486 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
487 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
488 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
489 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
490 NULL, NULL
491 };
492 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
493 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
494 VOPNAME_READ, { .femop_read = deleg_wr_read },
495 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
496 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
497 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
498 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
499 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
500 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
501 NULL, NULL
502 };
503
504 /* ARGSUSED */
505 static void *
506 rfs4_zone_init(zoneid_t zoneid)
507 {
508 nfs4_srv_t *nsrv4;
509 timespec32_t verf;
510
511 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
512
513 /*
514 * The following algorithm attempts to find a unique verifier
515 * to be used as the write verifier returned from the server
516 * to the client. It is important that this verifier change
517 * whenever the server reboots. Of secondary importance, it
518 * is important for the verifier to be unique between two
519 * different servers.
520 *
521 * Thus, an attempt is made to use the system hostid and the
522 * current time in seconds when the nfssrv kernel module is
523 * loaded. It is assumed that an NFS server will not be able
524 * to boot and then to reboot in less than a second. If the
525 * hostid has not been set, then the current high resolution
526 * time is used. This will ensure different verifiers each
527 * time the server reboots and minimize the chances that two
528 * different servers will have the same verifier.
529 * XXX - this is broken on LP64 kernels.
530 */
531 verf.tv_sec = (time_t)zone_get_hostid(NULL);
532 if (verf.tv_sec != 0) {
533 verf.tv_nsec = gethrestime_sec();
534 } else {
535 timespec_t tverf;
536
537 gethrestime(&tverf);
538 verf.tv_sec = (time_t)tverf.tv_sec;
539 verf.tv_nsec = tverf.tv_nsec;
540 }
541 nsrv4->write4verf = *(uint64_t *)&verf;
542
543 /* Used to manage create/destroy of server state */
544 nsrv4->nfs4_server_state = NULL;
545 nsrv4->nfs4_cur_servinst = NULL;
546 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
547 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
548 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
549 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
550 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
551
552 return (nsrv4);
553 }
554
555 /* ARGSUSED */
556 static void
557 rfs4_zone_fini(zoneid_t zoneid, void *data)
558 {
559 nfs4_srv_t *nsrv4 = data;
560
561 mutex_destroy(&nsrv4->deleg_lock);
562 mutex_destroy(&nsrv4->state_lock);
563 mutex_destroy(&nsrv4->servinst_lock);
564 rw_destroy(&nsrv4->deleg_policy_lock);
565
566 kmem_free(nsrv4, sizeof (*nsrv4));
567 }
568
569 void
570 rfs4_srvrinit(void)
571 {
572 extern void rfs4_attr_init();
573
574 zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini);
575
576 rfs4_attr_init();
577
578
579 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
580 rfs4_disable_delegation();
581 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
582 &deleg_wrops) != 0) {
583 rfs4_disable_delegation();
584 fem_free(deleg_rdops);
585 }
586
587 nfs4_srv_caller_id = fs_new_caller_id();
588 lockt_sysid = lm_alloc_sysidt();
589 vsd_create(&nfs4_srv_vkey, NULL);
590 rfs4_state_g_init();
591 }
592
593 void
594 rfs4_srvrfini(void)
595 {
596 if (lockt_sysid != LM_NOSYSID) {
597 lm_free_sysidt(lockt_sysid);
598 lockt_sysid = LM_NOSYSID;
599 }
600
601 rfs4_state_g_fini();
602
603 fem_free(deleg_rdops);
604 fem_free(deleg_wrops);
605
606 (void) zone_key_delete(rfs4_zone_key);
607 }
608
609 void
610 rfs4_do_server_start(int server_upordown,
611 int srv_delegation, int cluster_booted)
612 {
613 nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
614
615 /* Is this a warm start? */
616 if (server_upordown == NFS_SERVER_QUIESCED) {
617 cmn_err(CE_NOTE, "nfs4_srv: "
618 "server was previously quiesced; "
619 "existing NFSv4 state will be re-used");
620
621 /*
622 * HA-NFSv4: this is also the signal
623 * that a Resource Group failover has
624 * occurred.
625 */
626 if (cluster_booted)
627 hanfsv4_failover(nsrv4);
628 } else {
629 /* Cold start */
630 nsrv4->rfs4_start_time = 0;
631 rfs4_state_zone_init(nsrv4);
632 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
633 nfs4_drc_hash);
634
635 /*
636 * The nfsd service was started with the -s option
637 * we need to pull in any state from the paths indicated.
638 */
639 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
640 /* read in the stable storage state from these paths */
641 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
642 rfs4_dss_newpaths);
643 }
644 }
645
646 /* Check if delegation is to be enabled */
647 if (srv_delegation != FALSE)
648 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
649 }
650
651 void
652 rfs4_init_compound_state(struct compound_state *cs)
653 {
654 bzero(cs, sizeof (*cs));
655 cs->cont = TRUE;
656 cs->access = CS_ACCESS_DENIED;
657 cs->deleg = FALSE;
658 cs->mandlock = FALSE;
659 cs->fh.nfs_fh4_val = cs->fhbuf;
660 }
661
662 void
663 rfs4_grace_start(rfs4_servinst_t *sip)
664 {
665 rw_enter(&sip->rwlock, RW_WRITER);
666 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
667 sip->grace_period = rfs4_grace_period;
668 rw_exit(&sip->rwlock);
669 }
670
671 /*
672 * returns true if the instance's grace period has never been started
673 */
674 int
675 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
676 {
677 time_t start_time;
678
679 rw_enter(&sip->rwlock, RW_READER);
680 start_time = sip->start_time;
681 rw_exit(&sip->rwlock);
682
683 return (start_time == 0);
684 }
685
686 /*
687 * Indicates if server instance is within the
688 * grace period.
689 */
690 int
691 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
692 {
693 time_t grace_expiry;
694
695 rw_enter(&sip->rwlock, RW_READER);
696 grace_expiry = sip->start_time + sip->grace_period;
697 rw_exit(&sip->rwlock);
698
699 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
700 }
701
702 int
703 rfs4_clnt_in_grace(rfs4_client_t *cp)
704 {
705 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
706
707 return (rfs4_servinst_in_grace(cp->rc_server_instance));
708 }
709
710 /*
711 * reset all currently active grace periods
712 */
713 void
714 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
715 {
716 rfs4_servinst_t *sip;
717
718 mutex_enter(&nsrv4->servinst_lock);
719 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
720 if (rfs4_servinst_in_grace(sip))
721 rfs4_grace_start(sip);
722 mutex_exit(&nsrv4->servinst_lock);
723 }
724
725 /*
726 * start any new instances' grace periods
727 */
728 void
729 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
730 {
731 rfs4_servinst_t *sip;
732
733 mutex_enter(&nsrv4->servinst_lock);
734 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
735 if (rfs4_servinst_grace_new(sip))
736 rfs4_grace_start(sip);
737 mutex_exit(&nsrv4->servinst_lock);
738 }
739
740 static rfs4_dss_path_t *
741 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
742 char *path, unsigned index)
743 {
744 size_t len;
745 rfs4_dss_path_t *dss_path;
746
747 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
748
749 /*
750 * Take a copy of the string, since the original may be overwritten.
751 * Sadly, no strdup() in the kernel.
752 */
753 /* allow for NUL */
754 len = strlen(path) + 1;
755 dss_path->path = kmem_alloc(len, KM_SLEEP);
756 (void) strlcpy(dss_path->path, path, len);
757
758 /* associate with servinst */
759 dss_path->sip = sip;
760 dss_path->index = index;
761
762 /*
763 * Add to list of served paths.
764 * No locking required, as we're only ever called at startup.
765 */
766 if (nsrv4->dss_pathlist == NULL) {
767 /* this is the first dss_path_t */
768
769 /* needed for insque/remque */
770 dss_path->next = dss_path->prev = dss_path;
771
772 nsrv4->dss_pathlist = dss_path;
773 } else {
774 insque(dss_path, nsrv4->dss_pathlist);
775 }
776
777 return (dss_path);
778 }
779
780 /*
781 * Create a new server instance, and make it the currently active instance.
782 * Note that starting the grace period too early will reduce the clients'
783 * recovery window.
784 */
785 void
786 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
787 int dss_npaths, char **dss_paths)
788 {
789 unsigned i;
790 rfs4_servinst_t *sip;
791 rfs4_oldstate_t *oldstate;
792
793 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
794 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
795
796 sip->start_time = (time_t)0;
797 sip->grace_period = (time_t)0;
798 sip->next = NULL;
799 sip->prev = NULL;
800
801 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
802 /*
803 * This initial dummy entry is required to setup for insque/remque.
804 * It must be skipped over whenever the list is traversed.
805 */
806 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
807 /* insque/remque require initial list entry to be self-terminated */
808 oldstate->next = oldstate;
809 oldstate->prev = oldstate;
810 sip->oldstate = oldstate;
811
812
813 sip->dss_npaths = dss_npaths;
814 sip->dss_paths = kmem_alloc(dss_npaths *
815 sizeof (rfs4_dss_path_t *), KM_SLEEP);
816
817 for (i = 0; i < dss_npaths; i++) {
818 sip->dss_paths[i] =
819 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
820 }
821
822 mutex_enter(&nsrv4->servinst_lock);
823 if (nsrv4->nfs4_cur_servinst != NULL) {
824 /* add to linked list */
825 sip->prev = nsrv4->nfs4_cur_servinst;
826 nsrv4->nfs4_cur_servinst->next = sip;
827 }
828 if (start_grace)
829 rfs4_grace_start(sip);
830 /* make the new instance "current" */
831 nsrv4->nfs4_cur_servinst = sip;
832
833 mutex_exit(&nsrv4->servinst_lock);
834 }
835
836 /*
837 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
838 * all instances directly.
839 */
840 void
841 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
842 {
843 rfs4_servinst_t *sip, *prev, *current;
844 #ifdef DEBUG
845 int n = 0;
846 #endif
847
848 mutex_enter(&nsrv4->servinst_lock);
849 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
850 current = nsrv4->nfs4_cur_servinst;
851 nsrv4->nfs4_cur_servinst = NULL;
852 for (sip = current; sip != NULL; sip = prev) {
853 prev = sip->prev;
854 rw_destroy(&sip->rwlock);
855 if (sip->oldstate)
856 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
857 if (sip->dss_paths) {
858 int i = sip->dss_npaths;
859
860 while (i > 0) {
861 i--;
862 if (sip->dss_paths[i] != NULL) {
863 char *path = sip->dss_paths[i]->path;
864
865 if (path != NULL) {
866 kmem_free(path,
867 strlen(path) + 1);
868 }
869 kmem_free(sip->dss_paths[i],
870 sizeof (rfs4_dss_path_t));
871 }
872 }
873 kmem_free(sip->dss_paths,
874 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
875 }
876 kmem_free(sip, sizeof (rfs4_servinst_t));
877 #ifdef DEBUG
878 n++;
879 #endif
880 }
881 mutex_exit(&nsrv4->servinst_lock);
882 }
883
884 /*
885 * Assign the current server instance to a client_t.
886 * Should be called with cp->rc_dbe held.
887 */
888 void
889 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
890 rfs4_servinst_t *sip)
891 {
892 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
893
894 /*
895 * The lock ensures that if the current instance is in the process
896 * of changing, we will see the new one.
897 */
898 mutex_enter(&nsrv4->servinst_lock);
899 cp->rc_server_instance = sip;
900 mutex_exit(&nsrv4->servinst_lock);
901 }
902
903 rfs4_servinst_t *
904 rfs4_servinst(rfs4_client_t *cp)
905 {
906 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
907
908 return (cp->rc_server_instance);
909 }
910
911 /* ARGSUSED */
912 static void
913 nullfree(caddr_t resop)
914 {
915 }
916
917 /*
918 * This is a fall-through for invalid or not implemented (yet) ops
919 */
920 /* ARGSUSED */
921 static void
922 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
923 struct compound_state *cs)
924 {
925 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
926 }
927
928 /*
929 * Check if the security flavor, nfsnum, is in the flavor_list.
930 */
931 bool_t
932 in_flavor_list(int nfsnum, int *flavor_list, int count)
933 {
934 int i;
935
936 for (i = 0; i < count; i++) {
937 if (nfsnum == flavor_list[i])
938 return (TRUE);
939 }
940 return (FALSE);
941 }
942
943 /*
944 * Used by rfs4_op_secinfo to get the security information from the
945 * export structure associated with the component.
946 */
947 /* ARGSUSED */
948 static nfsstat4
949 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
950 {
951 int error, different_export = 0;
952 vnode_t *dvp, *vp;
953 struct exportinfo *exi = NULL;
954 fid_t fid;
955 uint_t count, i;
956 secinfo4 *resok_val;
957 struct secinfo *secp;
958 seconfig_t *si;
959 bool_t did_traverse = FALSE;
960 int dotdot, walk;
961 nfs_export_t *ne = nfs_get_export();
962
963 dvp = cs->vp;
964 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
965
966 /*
967 * If dotdotting, then need to check whether it's above the
968 * root of a filesystem, or above an export point.
969 */
970 if (dotdot) {
971
972 /*
973 * If dotdotting at the root of a filesystem, then
974 * need to traverse back to the mounted-on filesystem
975 * and do the dotdot lookup there.
976 */
977 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
978
979 /*
980 * If at the system root, then can
981 * go up no further.
982 */
983 if (VN_CMP(dvp, ZONE_ROOTVP()))
984 return (puterrno4(ENOENT));
985
986 /*
987 * Traverse back to the mounted-on filesystem
988 */
989 dvp = untraverse(cs->vp);
990
991 /*
992 * Set the different_export flag so we remember
993 * to pick up a new exportinfo entry for
994 * this new filesystem.
995 */
996 different_export = 1;
997 } else {
998
999 /*
1000 * If dotdotting above an export point then set
1001 * the different_export to get new export info.
1002 */
1003 different_export = nfs_exported(cs->exi, cs->vp);
1004 }
1005 }
1006
1007 /*
1008 * Get the vnode for the component "nm".
1009 */
1010 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1011 NULL, NULL, NULL);
1012 if (error)
1013 return (puterrno4(error));
1014
1015 /*
1016 * If the vnode is in a pseudo filesystem, or if the security flavor
1017 * used in the request is valid but not an explicitly shared flavor,
1018 * or the access bit indicates that this is a limited access,
1019 * check whether this vnode is visible.
1020 */
1021 if (!different_export &&
1022 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
1023 cs->access & CS_ACCESS_LIMITED)) {
1024 if (! nfs_visible(cs->exi, vp, &different_export)) {
1025 VN_RELE(vp);
1026 return (puterrno4(ENOENT));
1027 }
1028 }
1029
1030 /*
1031 * If it's a mountpoint, then traverse it.
1032 */
1033 if (vn_ismntpt(vp)) {
1034 if ((error = traverse(&vp)) != 0) {
1035 VN_RELE(vp);
1036 return (puterrno4(error));
1037 }
1038 /* remember that we had to traverse mountpoint */
1039 did_traverse = TRUE;
1040 different_export = 1;
1041 } else if (vp->v_vfsp != dvp->v_vfsp) {
1042 /*
1043 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1044 * then vp is probably an LOFS object. We don't need the
1045 * realvp, we just need to know that we might have crossed
1046 * a server fs boundary and need to call checkexport4.
1047 * (LOFS lookup hides server fs mountpoints, and actually calls
1048 * traverse)
1049 */
1050 different_export = 1;
1051 }
1052
1053 /*
1054 * Get the export information for it.
1055 */
1056 if (different_export) {
1057
1058 bzero(&fid, sizeof (fid));
1059 fid.fid_len = MAXFIDSZ;
1060 error = vop_fid_pseudo(vp, &fid);
1061 if (error) {
1062 VN_RELE(vp);
1063 return (puterrno4(error));
1064 }
1065
1066 if (dotdot)
1067 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1068 else
1069 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1070
1071 if (exi == NULL) {
1072 if (did_traverse == TRUE) {
1073 /*
1074 * If this vnode is a mounted-on vnode,
1075 * but the mounted-on file system is not
1076 * exported, send back the secinfo for
1077 * the exported node that the mounted-on
1078 * vnode lives in.
1079 */
1080 exi = cs->exi;
1081 } else {
1082 VN_RELE(vp);
1083 return (puterrno4(EACCES));
1084 }
1085 }
1086 } else {
1087 exi = cs->exi;
1088 }
1089 ASSERT(exi != NULL);
1090
1091
1092 /*
1093 * Create the secinfo result based on the security information
1094 * from the exportinfo structure (exi).
1095 *
1096 * Return all flavors for a pseudo node.
1097 * For a real export node, return the flavor that the client
1098 * has access with.
1099 */
1100 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1101 if (PSEUDO(exi)) {
1102 count = exi->exi_export.ex_seccnt; /* total sec count */
1103 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1104 secp = exi->exi_export.ex_secinfo;
1105
1106 for (i = 0; i < count; i++) {
1107 si = &secp[i].s_secinfo;
1108 resok_val[i].flavor = si->sc_rpcnum;
1109 if (resok_val[i].flavor == RPCSEC_GSS) {
1110 rpcsec_gss_info *info;
1111
1112 info = &resok_val[i].flavor_info;
1113 info->qop = si->sc_qop;
1114 info->service = (rpc_gss_svc_t)si->sc_service;
1115
1116 /* get oid opaque data */
1117 info->oid.sec_oid4_len =
1118 si->sc_gss_mech_type->length;
1119 info->oid.sec_oid4_val = kmem_alloc(
1120 si->sc_gss_mech_type->length, KM_SLEEP);
1121 bcopy(
1122 si->sc_gss_mech_type->elements,
1123 info->oid.sec_oid4_val,
1124 info->oid.sec_oid4_len);
1125 }
1126 }
1127 resp->SECINFO4resok_len = count;
1128 resp->SECINFO4resok_val = resok_val;
1129 } else {
1130 int ret_cnt = 0, k = 0;
1131 int *flavor_list;
1132
1133 count = exi->exi_export.ex_seccnt; /* total sec count */
1134 secp = exi->exi_export.ex_secinfo;
1135
1136 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1137 /* find out which flavors to return */
1138 for (i = 0; i < count; i ++) {
1139 int access, flavor, perm;
1140
1141 flavor = secp[i].s_secinfo.sc_nfsnum;
1142 perm = secp[i].s_flags;
1143
1144 access = nfsauth4_secinfo_access(exi, cs->req,
1145 flavor, perm, cs->basecr);
1146
1147 if (! (access & NFSAUTH_DENIED) &&
1148 ! (access & NFSAUTH_WRONGSEC)) {
1149 flavor_list[ret_cnt] = flavor;
1150 ret_cnt++;
1151 }
1152 }
1153
1154 /* Create the returning SECINFO value */
1155 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1156
1157 for (i = 0; i < count; i++) {
1158 /*
1159 * If the flavor is in the flavor list,
1160 * fill in resok_val.
1161 */
1162 si = &secp[i].s_secinfo;
1163 if (in_flavor_list(si->sc_nfsnum,
1164 flavor_list, ret_cnt)) {
1165 resok_val[k].flavor = si->sc_rpcnum;
1166 if (resok_val[k].flavor == RPCSEC_GSS) {
1167 rpcsec_gss_info *info;
1168
1169 info = &resok_val[k].flavor_info;
1170 info->qop = si->sc_qop;
1171 info->service = (rpc_gss_svc_t)
1172 si->sc_service;
1173
1174 /* get oid opaque data */
1175 info->oid.sec_oid4_len =
1176 si->sc_gss_mech_type->length;
1177 info->oid.sec_oid4_val = kmem_alloc(
1178 si->sc_gss_mech_type->length,
1179 KM_SLEEP);
1180 bcopy(si->sc_gss_mech_type->elements,
1181 info->oid.sec_oid4_val,
1182 info->oid.sec_oid4_len);
1183 }
1184 k++;
1185 }
1186 if (k >= ret_cnt)
1187 break;
1188 }
1189 resp->SECINFO4resok_len = ret_cnt;
1190 resp->SECINFO4resok_val = resok_val;
1191 kmem_free(flavor_list, count * sizeof (int));
1192 }
1193
1194 VN_RELE(vp);
1195 return (NFS4_OK);
1196 }
1197
1198 /*
1199 * SECINFO (Operation 33): Obtain required security information on
1200 * the component name in the format of (security-mechanism-oid, qop, service)
1201 * triplets.
1202 */
1203 /* ARGSUSED */
1204 static void
1205 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1206 struct compound_state *cs)
1207 {
1208 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1209 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1210 utf8string *utfnm = &args->name;
1211 uint_t len;
1212 char *nm;
1213 struct sockaddr *ca;
1214 char *name = NULL;
1215 nfsstat4 status = NFS4_OK;
1216
1217 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1218 SECINFO4args *, args);
1219
1220 /*
1221 * Current file handle (cfh) should have been set before getting
1222 * into this function. If not, return error.
1223 */
1224 if (cs->vp == NULL) {
1225 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1226 goto out;
1227 }
1228
1229 if (cs->vp->v_type != VDIR) {
1230 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1231 goto out;
1232 }
1233
1234 /*
1235 * Verify the component name. If failed, error out, but
1236 * do not error out if the component name is a "..".
1237 * SECINFO will return its parents secinfo data for SECINFO "..".
1238 */
1239 status = utf8_dir_verify(utfnm);
1240 if (status != NFS4_OK) {
1241 if (utfnm->utf8string_len != 2 ||
1242 utfnm->utf8string_val[0] != '.' ||
1243 utfnm->utf8string_val[1] != '.') {
1244 *cs->statusp = resp->status = status;
1245 goto out;
1246 }
1247 }
1248
1249 nm = utf8_to_str(utfnm, &len, NULL);
1250 if (nm == NULL) {
1251 *cs->statusp = resp->status = NFS4ERR_INVAL;
1252 goto out;
1253 }
1254
1255 if (len > MAXNAMELEN) {
1256 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1257 kmem_free(nm, len);
1258 goto out;
1259 }
1260
1261 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1262 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1263 MAXPATHLEN + 1);
1264
1265 if (name == NULL) {
1266 *cs->statusp = resp->status = NFS4ERR_INVAL;
1267 kmem_free(nm, len);
1268 goto out;
1269 }
1270
1271
1272 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1273
1274 if (name != nm)
1275 kmem_free(name, MAXPATHLEN + 1);
1276 kmem_free(nm, len);
1277
1278 out:
1279 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1280 SECINFO4res *, resp);
1281 }
1282
1283 /*
1284 * Free SECINFO result.
1285 */
1286 /* ARGSUSED */
1287 static void
1288 rfs4_op_secinfo_free(nfs_resop4 *resop)
1289 {
1290 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1291 int count, i;
1292 secinfo4 *resok_val;
1293
1294 /* If this is not an Ok result, nothing to free. */
1295 if (resp->status != NFS4_OK) {
1296 return;
1297 }
1298
1299 count = resp->SECINFO4resok_len;
1300 resok_val = resp->SECINFO4resok_val;
1301
1302 for (i = 0; i < count; i++) {
1303 if (resok_val[i].flavor == RPCSEC_GSS) {
1304 rpcsec_gss_info *info;
1305
1306 info = &resok_val[i].flavor_info;
1307 kmem_free(info->oid.sec_oid4_val,
1308 info->oid.sec_oid4_len);
1309 }
1310 }
1311 kmem_free(resok_val, count * sizeof (secinfo4));
1312 resp->SECINFO4resok_len = 0;
1313 resp->SECINFO4resok_val = NULL;
1314 }
1315
1316 /* ARGSUSED */
1317 static void
1318 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1319 struct compound_state *cs)
1320 {
1321 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1322 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1323 int error;
1324 vnode_t *vp;
1325 struct vattr va;
1326 int checkwriteperm;
1327 cred_t *cr = cs->cr;
1328 bslabel_t *clabel, *slabel;
1329 ts_label_t *tslabel;
1330 boolean_t admin_low_client;
1331
1332 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1333 ACCESS4args *, args);
1334
1335 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1336 if (cs->access == CS_ACCESS_DENIED) {
1337 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1338 goto out;
1339 }
1340 #endif
1341 if (cs->vp == NULL) {
1342 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1343 goto out;
1344 }
1345
1346 ASSERT(cr != NULL);
1347
1348 vp = cs->vp;
1349
1350 /*
1351 * If the file system is exported read only, it is not appropriate
1352 * to check write permissions for regular files and directories.
1353 * Special files are interpreted by the client, so the underlying
1354 * permissions are sent back to the client for interpretation.
1355 */
1356 if (rdonly4(req, cs) &&
1357 (vp->v_type == VREG || vp->v_type == VDIR))
1358 checkwriteperm = 0;
1359 else
1360 checkwriteperm = 1;
1361
1362 /*
1363 * XXX
1364 * We need the mode so that we can correctly determine access
1365 * permissions relative to a mandatory lock file. Access to
1366 * mandatory lock files is denied on the server, so it might
1367 * as well be reflected to the server during the open.
1368 */
1369 va.va_mask = AT_MODE;
1370 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1371 if (error) {
1372 *cs->statusp = resp->status = puterrno4(error);
1373 goto out;
1374 }
1375 resp->access = 0;
1376 resp->supported = 0;
1377
1378 if (is_system_labeled()) {
1379 ASSERT(req->rq_label != NULL);
1380 clabel = req->rq_label;
1381 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1382 "got client label from request(1)",
1383 struct svc_req *, req);
1384 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1385 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1386 *cs->statusp = resp->status = puterrno4(EACCES);
1387 goto out;
1388 }
1389 slabel = label2bslabel(tslabel);
1390 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1391 char *, "got server label(1) for vp(2)",
1392 bslabel_t *, slabel, vnode_t *, vp);
1393
1394 admin_low_client = B_FALSE;
1395 } else
1396 admin_low_client = B_TRUE;
1397 }
1398
1399 if (args->access & ACCESS4_READ) {
1400 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1401 if (!error && !MANDLOCK(vp, va.va_mode) &&
1402 (!is_system_labeled() || admin_low_client ||
1403 bldominates(clabel, slabel)))
1404 resp->access |= ACCESS4_READ;
1405 resp->supported |= ACCESS4_READ;
1406 }
1407 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1408 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1409 if (!error && (!is_system_labeled() || admin_low_client ||
1410 bldominates(clabel, slabel)))
1411 resp->access |= ACCESS4_LOOKUP;
1412 resp->supported |= ACCESS4_LOOKUP;
1413 }
1414 if (checkwriteperm &&
1415 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1416 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1417 if (!error && !MANDLOCK(vp, va.va_mode) &&
1418 (!is_system_labeled() || admin_low_client ||
1419 blequal(clabel, slabel)))
1420 resp->access |=
1421 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1422 resp->supported |=
1423 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1424 }
1425
1426 if (checkwriteperm &&
1427 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1428 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1429 if (!error && (!is_system_labeled() || admin_low_client ||
1430 blequal(clabel, slabel)))
1431 resp->access |= ACCESS4_DELETE;
1432 resp->supported |= ACCESS4_DELETE;
1433 }
1434 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1435 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1436 if (!error && !MANDLOCK(vp, va.va_mode) &&
1437 (!is_system_labeled() || admin_low_client ||
1438 bldominates(clabel, slabel)))
1439 resp->access |= ACCESS4_EXECUTE;
1440 resp->supported |= ACCESS4_EXECUTE;
1441 }
1442
1443 if (is_system_labeled() && !admin_low_client)
1444 label_rele(tslabel);
1445
1446 *cs->statusp = resp->status = NFS4_OK;
1447 out:
1448 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1449 ACCESS4res *, resp);
1450 }
1451
1452 /* ARGSUSED */
1453 static void
1454 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1455 struct compound_state *cs)
1456 {
1457 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1458 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1459 int error;
1460 vnode_t *vp = cs->vp;
1461 cred_t *cr = cs->cr;
1462 vattr_t va;
1463 nfs4_srv_t *nsrv4;
1464
1465 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1466 COMMIT4args *, args);
1467
1468 if (vp == NULL) {
1469 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1470 goto out;
1471 }
1472 if (cs->access == CS_ACCESS_DENIED) {
1473 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1474 goto out;
1475 }
1476
1477 if (args->offset + args->count < args->offset) {
1478 *cs->statusp = resp->status = NFS4ERR_INVAL;
1479 goto out;
1480 }
1481
1482 va.va_mask = AT_UID;
1483 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1484
1485 /*
1486 * If we can't get the attributes, then we can't do the
1487 * right access checking. So, we'll fail the request.
1488 */
1489 if (error) {
1490 *cs->statusp = resp->status = puterrno4(error);
1491 goto out;
1492 }
1493 if (rdonly4(req, cs)) {
1494 *cs->statusp = resp->status = NFS4ERR_ROFS;
1495 goto out;
1496 }
1497
1498 if (vp->v_type != VREG) {
1499 if (vp->v_type == VDIR)
1500 resp->status = NFS4ERR_ISDIR;
1501 else
1502 resp->status = NFS4ERR_INVAL;
1503 *cs->statusp = resp->status;
1504 goto out;
1505 }
1506
1507 if (crgetuid(cr) != va.va_uid &&
1508 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1509 *cs->statusp = resp->status = puterrno4(error);
1510 goto out;
1511 }
1512
1513 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1514
1515 if (error) {
1516 *cs->statusp = resp->status = puterrno4(error);
1517 goto out;
1518 }
1519
1520 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
1521 *cs->statusp = resp->status = NFS4_OK;
1522 resp->writeverf = nsrv4->write4verf;
1523 out:
1524 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1525 COMMIT4res *, resp);
1526 }
1527
1528 /*
1529 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1530 * was completed. It does the nfsv4 create for special files.
1531 */
1532 /* ARGSUSED */
1533 static vnode_t *
1534 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1535 struct compound_state *cs, vattr_t *vap, char *nm)
1536 {
1537 int error;
1538 cred_t *cr = cs->cr;
1539 vnode_t *dvp = cs->vp;
1540 vnode_t *vp = NULL;
1541 int mode;
1542 enum vcexcl excl;
1543
1544 switch (args->type) {
1545 case NF4CHR:
1546 case NF4BLK:
1547 if (secpolicy_sys_devices(cr) != 0) {
1548 *cs->statusp = resp->status = NFS4ERR_PERM;
1549 return (NULL);
1550 }
1551 if (args->type == NF4CHR)
1552 vap->va_type = VCHR;
1553 else
1554 vap->va_type = VBLK;
1555 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1556 args->ftype4_u.devdata.specdata2);
1557 vap->va_mask |= AT_RDEV;
1558 break;
1559 case NF4SOCK:
1560 vap->va_type = VSOCK;
1561 break;
1562 case NF4FIFO:
1563 vap->va_type = VFIFO;
1564 break;
1565 default:
1566 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1567 return (NULL);
1568 }
1569
1570 /*
1571 * Must specify the mode.
1572 */
1573 if (!(vap->va_mask & AT_MODE)) {
1574 *cs->statusp = resp->status = NFS4ERR_INVAL;
1575 return (NULL);
1576 }
1577
1578 excl = EXCL;
1579
1580 mode = 0;
1581
1582 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1583 if (error) {
1584 *cs->statusp = resp->status = puterrno4(error);
1585 return (NULL);
1586 }
1587 return (vp);
1588 }
1589
1590 /*
1591 * nfsv4 create is used to create non-regular files. For regular files,
1592 * use nfsv4 open.
1593 */
1594 /* ARGSUSED */
1595 static void
1596 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1597 struct compound_state *cs)
1598 {
1599 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1600 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1601 int error;
1602 struct vattr bva, iva, iva2, ava, *vap;
1603 cred_t *cr = cs->cr;
1604 vnode_t *dvp = cs->vp;
1605 vnode_t *vp = NULL;
1606 vnode_t *realvp;
1607 char *nm, *lnm;
1608 uint_t len, llen;
1609 int syncval = 0;
1610 struct nfs4_svgetit_arg sarg;
1611 struct nfs4_ntov_table ntov;
1612 struct statvfs64 sb;
1613 nfsstat4 status;
1614 struct sockaddr *ca;
1615 char *name = NULL;
1616 char *lname = NULL;
1617
1618 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1619 CREATE4args *, args);
1620
1621 resp->attrset = 0;
1622
1623 if (dvp == NULL) {
1624 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1625 goto out;
1626 }
1627
1628 /*
1629 * If there is an unshared filesystem mounted on this vnode,
1630 * do not allow to create an object in this directory.
1631 */
1632 if (vn_ismntpt(dvp)) {
1633 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1634 goto out;
1635 }
1636
1637 /* Verify that type is correct */
1638 switch (args->type) {
1639 case NF4LNK:
1640 case NF4BLK:
1641 case NF4CHR:
1642 case NF4SOCK:
1643 case NF4FIFO:
1644 case NF4DIR:
1645 break;
1646 default:
1647 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1648 goto out;
1649 };
1650
1651 if (cs->access == CS_ACCESS_DENIED) {
1652 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1653 goto out;
1654 }
1655 if (dvp->v_type != VDIR) {
1656 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1657 goto out;
1658 }
1659 status = utf8_dir_verify(&args->objname);
1660 if (status != NFS4_OK) {
1661 *cs->statusp = resp->status = status;
1662 goto out;
1663 }
1664
1665 if (rdonly4(req, cs)) {
1666 *cs->statusp = resp->status = NFS4ERR_ROFS;
1667 goto out;
1668 }
1669
1670 /*
1671 * Name of newly created object
1672 */
1673 nm = utf8_to_fn(&args->objname, &len, NULL);
1674 if (nm == NULL) {
1675 *cs->statusp = resp->status = NFS4ERR_INVAL;
1676 goto out;
1677 }
1678
1679 if (len > MAXNAMELEN) {
1680 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1681 kmem_free(nm, len);
1682 goto out;
1683 }
1684
1685 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1686 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1687 MAXPATHLEN + 1);
1688
1689 if (name == NULL) {
1690 *cs->statusp = resp->status = NFS4ERR_INVAL;
1691 kmem_free(nm, len);
1692 goto out;
1693 }
1694
1695 resp->attrset = 0;
1696
1697 sarg.sbp = &sb;
1698 sarg.is_referral = B_FALSE;
1699 nfs4_ntov_table_init(&ntov);
1700
1701 status = do_rfs4_set_attrs(&resp->attrset,
1702 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1703
1704 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1705 status = NFS4ERR_INVAL;
1706
1707 if (status != NFS4_OK) {
1708 *cs->statusp = resp->status = status;
1709 if (name != nm)
1710 kmem_free(name, MAXPATHLEN + 1);
1711 kmem_free(nm, len);
1712 nfs4_ntov_table_free(&ntov, &sarg);
1713 resp->attrset = 0;
1714 goto out;
1715 }
1716
1717 /* Get "before" change value */
1718 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1719 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1720 if (error) {
1721 *cs->statusp = resp->status = puterrno4(error);
1722 if (name != nm)
1723 kmem_free(name, MAXPATHLEN + 1);
1724 kmem_free(nm, len);
1725 nfs4_ntov_table_free(&ntov, &sarg);
1726 resp->attrset = 0;
1727 goto out;
1728 }
1729 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1730
1731 vap = sarg.vap;
1732
1733 /*
1734 * Set the default initial values for attributes when the parent
1735 * directory does not have the VSUID/VSGID bit set and they have
1736 * not been specified in createattrs.
1737 */
1738 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1739 vap->va_uid = crgetuid(cr);
1740 vap->va_mask |= AT_UID;
1741 }
1742 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1743 vap->va_gid = crgetgid(cr);
1744 vap->va_mask |= AT_GID;
1745 }
1746
1747 vap->va_mask |= AT_TYPE;
1748 switch (args->type) {
1749 case NF4DIR:
1750 vap->va_type = VDIR;
1751 if ((vap->va_mask & AT_MODE) == 0) {
1752 vap->va_mode = 0700; /* default: owner rwx only */
1753 vap->va_mask |= AT_MODE;
1754 }
1755 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1756 if (error)
1757 break;
1758
1759 /*
1760 * Get the initial "after" sequence number, if it fails,
1761 * set to zero
1762 */
1763 iva.va_mask = AT_SEQ;
1764 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1765 iva.va_seq = 0;
1766 break;
1767 case NF4LNK:
1768 vap->va_type = VLNK;
1769 if ((vap->va_mask & AT_MODE) == 0) {
1770 vap->va_mode = 0700; /* default: owner rwx only */
1771 vap->va_mask |= AT_MODE;
1772 }
1773
1774 /*
1775 * symlink names must be treated as data
1776 */
1777 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1778 &llen, NULL);
1779
1780 if (lnm == NULL) {
1781 *cs->statusp = resp->status = NFS4ERR_INVAL;
1782 if (name != nm)
1783 kmem_free(name, MAXPATHLEN + 1);
1784 kmem_free(nm, len);
1785 nfs4_ntov_table_free(&ntov, &sarg);
1786 resp->attrset = 0;
1787 goto out;
1788 }
1789
1790 if (llen > MAXPATHLEN) {
1791 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1792 if (name != nm)
1793 kmem_free(name, MAXPATHLEN + 1);
1794 kmem_free(nm, len);
1795 kmem_free(lnm, llen);
1796 nfs4_ntov_table_free(&ntov, &sarg);
1797 resp->attrset = 0;
1798 goto out;
1799 }
1800
1801 lname = nfscmd_convname(ca, cs->exi, lnm,
1802 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1803
1804 if (lname == NULL) {
1805 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1806 if (name != nm)
1807 kmem_free(name, MAXPATHLEN + 1);
1808 kmem_free(nm, len);
1809 kmem_free(lnm, llen);
1810 nfs4_ntov_table_free(&ntov, &sarg);
1811 resp->attrset = 0;
1812 goto out;
1813 }
1814
1815 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1816 if (lname != lnm)
1817 kmem_free(lname, MAXPATHLEN + 1);
1818 kmem_free(lnm, llen);
1819 if (error)
1820 break;
1821
1822 /*
1823 * Get the initial "after" sequence number, if it fails,
1824 * set to zero
1825 */
1826 iva.va_mask = AT_SEQ;
1827 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1828 iva.va_seq = 0;
1829
1830 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1831 NULL, NULL, NULL);
1832 if (error)
1833 break;
1834
1835 /*
1836 * va_seq is not safe over VOP calls, check it again
1837 * if it has changed zero out iva to force atomic = FALSE.
1838 */
1839 iva2.va_mask = AT_SEQ;
1840 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1841 iva2.va_seq != iva.va_seq)
1842 iva.va_seq = 0;
1843 break;
1844 default:
1845 /*
1846 * probably a special file.
1847 */
1848 if ((vap->va_mask & AT_MODE) == 0) {
1849 vap->va_mode = 0600; /* default: owner rw only */
1850 vap->va_mask |= AT_MODE;
1851 }
1852 syncval = FNODSYNC;
1853 /*
1854 * We know this will only generate one VOP call
1855 */
1856 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1857
1858 if (vp == NULL) {
1859 if (name != nm)
1860 kmem_free(name, MAXPATHLEN + 1);
1861 kmem_free(nm, len);
1862 nfs4_ntov_table_free(&ntov, &sarg);
1863 resp->attrset = 0;
1864 goto out;
1865 }
1866
1867 /*
1868 * Get the initial "after" sequence number, if it fails,
1869 * set to zero
1870 */
1871 iva.va_mask = AT_SEQ;
1872 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1873 iva.va_seq = 0;
1874
1875 break;
1876 }
1877 if (name != nm)
1878 kmem_free(name, MAXPATHLEN + 1);
1879 kmem_free(nm, len);
1880
1881 if (error) {
1882 *cs->statusp = resp->status = puterrno4(error);
1883 }
1884
1885 /*
1886 * Force modified data and metadata out to stable storage.
1887 */
1888 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1889
1890 if (resp->status != NFS4_OK) {
1891 if (vp != NULL)
1892 VN_RELE(vp);
1893 nfs4_ntov_table_free(&ntov, &sarg);
1894 resp->attrset = 0;
1895 goto out;
1896 }
1897
1898 /*
1899 * Finish setup of cinfo response, "before" value already set.
1900 * Get "after" change value, if it fails, simply return the
1901 * before value.
1902 */
1903 ava.va_mask = AT_CTIME|AT_SEQ;
1904 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1905 ava.va_ctime = bva.va_ctime;
1906 ava.va_seq = 0;
1907 }
1908 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1909
1910 /*
1911 * True verification that object was created with correct
1912 * attrs is impossible. The attrs could have been changed
1913 * immediately after object creation. If attributes did
1914 * not verify, the only recourse for the server is to
1915 * destroy the object. Maybe if some attrs (like gid)
1916 * are set incorrectly, the object should be destroyed;
1917 * however, seems bad as a default policy. Do we really
1918 * want to destroy an object over one of the times not
1919 * verifying correctly? For these reasons, the server
1920 * currently sets bits in attrset for createattrs
1921 * that were set; however, no verification is done.
1922 *
1923 * vmask_to_nmask accounts for vattr bits set on create
1924 * [do_rfs4_set_attrs() only sets resp bits for
1925 * non-vattr/vfs bits.]
1926 * Mask off any bits set by default so as not to return
1927 * more attrset bits than were requested in createattrs
1928 */
1929 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1930 resp->attrset &= args->createattrs.attrmask;
1931 nfs4_ntov_table_free(&ntov, &sarg);
1932
1933 error = makefh4(&cs->fh, vp, cs->exi);
1934 if (error) {
1935 *cs->statusp = resp->status = puterrno4(error);
1936 }
1937
1938 /*
1939 * The cinfo.atomic = TRUE only if we got no errors, we have
1940 * non-zero va_seq's, and it has incremented by exactly one
1941 * during the creation and it didn't change during the VOP_LOOKUP
1942 * or VOP_FSYNC.
1943 */
1944 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1945 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1946 resp->cinfo.atomic = TRUE;
1947 else
1948 resp->cinfo.atomic = FALSE;
1949
1950 /*
1951 * Force modified metadata out to stable storage.
1952 *
1953 * if a underlying vp exists, pass it to VOP_FSYNC
1954 */
1955 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1956 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1957 else
1958 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1959
1960 if (resp->status != NFS4_OK) {
1961 VN_RELE(vp);
1962 goto out;
1963 }
1964 if (cs->vp)
1965 VN_RELE(cs->vp);
1966
1967 cs->vp = vp;
1968 *cs->statusp = resp->status = NFS4_OK;
1969 out:
1970 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1971 CREATE4res *, resp);
1972 }
1973
1974 /*ARGSUSED*/
1975 static void
1976 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1977 struct compound_state *cs)
1978 {
1979 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1980 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1981
1982 rfs4_op_inval(argop, resop, req, cs);
1983
1984 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1985 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1986 }
1987
1988 /*ARGSUSED*/
1989 static void
1990 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1991 struct compound_state *cs)
1992 {
1993 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1994 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1995 rfs4_deleg_state_t *dsp;
1996 nfsstat4 status;
1997
1998 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1999 DELEGRETURN4args *, args);
2000
2001 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2002 resp->status = *cs->statusp = status;
2003 if (status != NFS4_OK)
2004 goto out;
2005
2006 /* Ensure specified filehandle matches */
2007 if (cs->vp != dsp->rds_finfo->rf_vp) {
2008 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2009 } else
2010 rfs4_return_deleg(dsp, FALSE);
2011
2012 rfs4_update_lease(dsp->rds_client);
2013
2014 rfs4_deleg_state_rele(dsp);
2015 out:
2016 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2017 DELEGRETURN4res *, resp);
2018 }
2019
2020 /*
2021 * Check to see if a given "flavor" is an explicitly shared flavor.
2022 * The assumption of this routine is the "flavor" is already a valid
2023 * flavor in the secinfo list of "exi".
2024 *
2025 * e.g.
2026 * # share -o sec=flavor1 /export
2027 * # share -o sec=flavor2 /export/home
2028 *
2029 * flavor2 is not an explicitly shared flavor for /export,
2030 * however it is in the secinfo list for /export thru the
2031 * server namespace setup.
2032 */
2033 int
2034 is_exported_sec(int flavor, struct exportinfo *exi)
2035 {
2036 int i;
2037 struct secinfo *sp;
2038
2039 sp = exi->exi_export.ex_secinfo;
2040 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2041 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2042 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2043 return (SEC_REF_EXPORTED(&sp[i]));
2044 }
2045 }
2046
2047 /* Should not reach this point based on the assumption */
2048 return (0);
2049 }
2050
2051 /*
2052 * Check if the security flavor used in the request matches what is
2053 * required at the export point or at the root pseudo node (exi_root).
2054 *
2055 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2056 *
2057 */
2058 static int
2059 secinfo_match_or_authnone(struct compound_state *cs)
2060 {
2061 int i;
2062 struct secinfo *sp;
2063
2064 /*
2065 * Check cs->nfsflavor (from the request) against
2066 * the current export data in cs->exi.
2067 */
2068 sp = cs->exi->exi_export.ex_secinfo;
2069 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2070 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2071 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2072 return (1);
2073 }
2074
2075 return (0);
2076 }
2077
2078 /*
2079 * Check the access authority for the client and return the correct error.
2080 */
2081 nfsstat4
2082 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2083 {
2084 int authres;
2085
2086 /*
2087 * First, check if the security flavor used in the request
2088 * are among the flavors set in the server namespace.
2089 */
2090 if (!secinfo_match_or_authnone(cs)) {
2091 *cs->statusp = NFS4ERR_WRONGSEC;
2092 return (*cs->statusp);
2093 }
2094
2095 authres = checkauth4(cs, req);
2096
2097 if (authres > 0) {
2098 *cs->statusp = NFS4_OK;
2099 if (! (cs->access & CS_ACCESS_LIMITED))
2100 cs->access = CS_ACCESS_OK;
2101 } else if (authres == 0) {
2102 *cs->statusp = NFS4ERR_ACCESS;
2103 } else if (authres == -2) {
2104 *cs->statusp = NFS4ERR_WRONGSEC;
2105 } else {
2106 *cs->statusp = NFS4ERR_DELAY;
2107 }
2108 return (*cs->statusp);
2109 }
2110
2111 /*
2112 * bitmap4_to_attrmask is called by getattr and readdir.
2113 * It sets up the vattr mask and determines whether vfsstat call is needed
2114 * based on the input bitmap.
2115 * Returns nfsv4 status.
2116 */
2117 static nfsstat4
2118 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2119 {
2120 int i;
2121 uint_t va_mask;
2122 struct statvfs64 *sbp = sargp->sbp;
2123
2124 sargp->sbp = NULL;
2125 sargp->flag = 0;
2126 sargp->rdattr_error = NFS4_OK;
2127 sargp->mntdfid_set = FALSE;
2128 if (sargp->cs->vp)
2129 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2130 FH4_ATTRDIR | FH4_NAMEDATTR);
2131 else
2132 sargp->xattr = 0;
2133
2134 /*
2135 * Set rdattr_error_req to true if return error per
2136 * failed entry rather than fail the readdir.
2137 */
2138 if (breq & FATTR4_RDATTR_ERROR_MASK)
2139 sargp->rdattr_error_req = 1;
2140 else
2141 sargp->rdattr_error_req = 0;
2142
2143 /*
2144 * generate the va_mask
2145 * Handle the easy cases first
2146 */
2147 switch (breq) {
2148 case NFS4_NTOV_ATTR_MASK:
2149 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2150 return (NFS4_OK);
2151
2152 case NFS4_FS_ATTR_MASK:
2153 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2154 sargp->sbp = sbp;
2155 return (NFS4_OK);
2156
2157 case NFS4_NTOV_ATTR_CACHE_MASK:
2158 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2159 return (NFS4_OK);
2160
2161 case FATTR4_LEASE_TIME_MASK:
2162 sargp->vap->va_mask = 0;
2163 return (NFS4_OK);
2164
2165 default:
2166 va_mask = 0;
2167 for (i = 0; i < nfs4_ntov_map_size; i++) {
2168 if ((breq & nfs4_ntov_map[i].fbit) &&
2169 nfs4_ntov_map[i].vbit)
2170 va_mask |= nfs4_ntov_map[i].vbit;
2171 }
2172
2173 /*
2174 * Check is vfsstat is needed
2175 */
2176 if (breq & NFS4_FS_ATTR_MASK)
2177 sargp->sbp = sbp;
2178
2179 sargp->vap->va_mask = va_mask;
2180 return (NFS4_OK);
2181 }
2182 /* NOTREACHED */
2183 }
2184
2185 /*
2186 * bitmap4_get_sysattrs is called by getattr and readdir.
2187 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2188 * Returns nfsv4 status.
2189 */
2190 static nfsstat4
2191 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2192 {
2193 int error;
2194 struct compound_state *cs = sargp->cs;
2195 vnode_t *vp = cs->vp;
2196
2197 if (sargp->sbp != NULL) {
2198 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2199 sargp->sbp = NULL; /* to identify error */
2200 return (puterrno4(error));
2201 }
2202 }
2203
2204 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2205 }
2206
2207 static void
2208 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2209 {
2210 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2211 KM_SLEEP);
2212 ntovp->attrcnt = 0;
2213 ntovp->vfsstat = FALSE;
2214 }
2215
2216 static void
2217 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2218 struct nfs4_svgetit_arg *sargp)
2219 {
2220 int i;
2221 union nfs4_attr_u *na;
2222 uint8_t *amap;
2223
2224 /*
2225 * XXX Should do the same checks for whether the bit is set
2226 */
2227 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2228 i < ntovp->attrcnt; i++, na++, amap++) {
2229 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2230 NFS4ATTR_FREEIT, sargp, na);
2231 }
2232 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2233 /*
2234 * xdr_free for getattr will be done later
2235 */
2236 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2237 i < ntovp->attrcnt; i++, na++, amap++) {
2238 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2239 }
2240 }
2241 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2242 }
2243
2244 /*
2245 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2246 */
2247 static nfsstat4
2248 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2249 struct nfs4_svgetit_arg *sargp)
2250 {
2251 int error = 0;
2252 int i, k;
2253 struct nfs4_ntov_table ntov;
2254 XDR xdr;
2255 ulong_t xdr_size;
2256 char *xdr_attrs;
2257 nfsstat4 status = NFS4_OK;
2258 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2259 union nfs4_attr_u *na;
2260 uint8_t *amap;
2261
2262 sargp->op = NFS4ATTR_GETIT;
2263 sargp->flag = 0;
2264
2265 fattrp->attrmask = 0;
2266 /* if no bits requested, then return empty fattr4 */
2267 if (breq == 0) {
2268 fattrp->attrlist4_len = 0;
2269 fattrp->attrlist4 = NULL;
2270 return (NFS4_OK);
2271 }
2272
2273 /*
2274 * return NFS4ERR_INVAL when client requests write-only attrs
2275 */
2276 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2277 return (NFS4ERR_INVAL);
2278
2279 nfs4_ntov_table_init(&ntov);
2280 na = ntov.na;
2281 amap = ntov.amap;
2282
2283 /*
2284 * Now loop to get or verify the attrs
2285 */
2286 for (i = 0; i < nfs4_ntov_map_size; i++) {
2287 if (breq & nfs4_ntov_map[i].fbit) {
2288 if ((*nfs4_ntov_map[i].sv_getit)(
2289 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2290
2291 error = (*nfs4_ntov_map[i].sv_getit)(
2292 NFS4ATTR_GETIT, sargp, na);
2293
2294 /*
2295 * Possible error values:
2296 * >0 if sv_getit failed to
2297 * get the attr; 0 if succeeded;
2298 * <0 if rdattr_error and the
2299 * attribute cannot be returned.
2300 */
2301 if (error && !(sargp->rdattr_error_req))
2302 goto done;
2303 /*
2304 * If error then just for entry
2305 */
2306 if (error == 0) {
2307 fattrp->attrmask |=
2308 nfs4_ntov_map[i].fbit;
2309 *amap++ =
2310 (uint8_t)nfs4_ntov_map[i].nval;
2311 na++;
2312 (ntov.attrcnt)++;
2313 } else if ((error > 0) &&
2314 (sargp->rdattr_error == NFS4_OK)) {
2315 sargp->rdattr_error = puterrno4(error);
2316 }
2317 error = 0;
2318 }
2319 }
2320 }
2321
2322 /*
2323 * If rdattr_error was set after the return value for it was assigned,
2324 * update it.
2325 */
2326 if (prev_rdattr_error != sargp->rdattr_error) {
2327 na = ntov.na;
2328 amap = ntov.amap;
2329 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2330 k = *amap;
2331 if (k < FATTR4_RDATTR_ERROR) {
2332 continue;
2333 }
2334 if ((k == FATTR4_RDATTR_ERROR) &&
2335 ((*nfs4_ntov_map[k].sv_getit)(
2336 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2337
2338 (void) (*nfs4_ntov_map[k].sv_getit)(
2339 NFS4ATTR_GETIT, sargp, na);
2340 }
2341 break;
2342 }
2343 }
2344
2345 xdr_size = 0;
2346 na = ntov.na;
2347 amap = ntov.amap;
2348 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2349 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2350 }
2351
2352 fattrp->attrlist4_len = xdr_size;
2353 if (xdr_size) {
2354 /* freed by rfs4_op_getattr_free() */
2355 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2356
2357 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2358
2359 na = ntov.na;
2360 amap = ntov.amap;
2361 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2362 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2363 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2364 int, *amap);
2365 status = NFS4ERR_SERVERFAULT;
2366 break;
2367 }
2368 }
2369 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2370 } else {
2371 fattrp->attrlist4 = NULL;
2372 }
2373 done:
2374
2375 nfs4_ntov_table_free(&ntov, sargp);
2376
2377 if (error != 0)
2378 status = puterrno4(error);
2379
2380 return (status);
2381 }
2382
2383 /* ARGSUSED */
2384 static void
2385 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2386 struct compound_state *cs)
2387 {
2388 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2389 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2390 struct nfs4_svgetit_arg sarg;
2391 struct statvfs64 sb;
2392 nfsstat4 status;
2393
2394 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2395 GETATTR4args *, args);
2396
2397 if (cs->vp == NULL) {
2398 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2399 goto out;
2400 }
2401
2402 if (cs->access == CS_ACCESS_DENIED) {
2403 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2404 goto out;
2405 }
2406
2407 sarg.sbp = &sb;
2408 sarg.cs = cs;
2409 sarg.is_referral = B_FALSE;
2410
2411 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2412 if (status == NFS4_OK) {
2413
2414 status = bitmap4_get_sysattrs(&sarg);
2415 if (status == NFS4_OK) {
2416
2417 /* Is this a referral? */
2418 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2419 /* Older V4 Solaris client sees a link */
2420 if (client_is_downrev(req))
2421 sarg.vap->va_type = VLNK;
2422 else
2423 sarg.is_referral = B_TRUE;
2424 }
2425
2426 status = do_rfs4_op_getattr(args->attr_request,
2427 &resp->obj_attributes, &sarg);
2428 }
2429 }
2430 *cs->statusp = resp->status = status;
2431 out:
2432 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2433 GETATTR4res *, resp);
2434 }
2435
2436 static void
2437 rfs4_op_getattr_free(nfs_resop4 *resop)
2438 {
2439 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2440
2441 nfs4_fattr4_free(&resp->obj_attributes);
2442 }
2443
2444 /* ARGSUSED */
2445 static void
2446 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2447 struct compound_state *cs)
2448 {
2449 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2450
2451 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2452
2453 if (cs->vp == NULL) {
2454 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2455 goto out;
2456 }
2457 if (cs->access == CS_ACCESS_DENIED) {
2458 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2459 goto out;
2460 }
2461
2462 /* check for reparse point at the share point */
2463 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2464 /* it's all bad */
2465 cs->exi->exi_moved = 1;
2466 *cs->statusp = resp->status = NFS4ERR_MOVED;
2467 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2468 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2469 return;
2470 }
2471
2472 /* check for reparse point at vp */
2473 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2474 /* it's not all bad */
2475 *cs->statusp = resp->status = NFS4ERR_MOVED;
2476 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2477 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2478 return;
2479 }
2480
2481 resp->object.nfs_fh4_val =
2482 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2483 nfs_fh4_copy(&cs->fh, &resp->object);
2484 *cs->statusp = resp->status = NFS4_OK;
2485 out:
2486 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2487 GETFH4res *, resp);
2488 }
2489
2490 static void
2491 rfs4_op_getfh_free(nfs_resop4 *resop)
2492 {
2493 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2494
2495 if (resp->status == NFS4_OK &&
2496 resp->object.nfs_fh4_val != NULL) {
2497 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2498 resp->object.nfs_fh4_val = NULL;
2499 resp->object.nfs_fh4_len = 0;
2500 }
2501 }
2502
2503 /*
2504 * illegal: args: void
2505 * res : status (NFS4ERR_OP_ILLEGAL)
2506 */
2507 /* ARGSUSED */
2508 static void
2509 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2510 struct svc_req *req, struct compound_state *cs)
2511 {
2512 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2513
2514 resop->resop = OP_ILLEGAL;
2515 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2516 }
2517
2518 /*
2519 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2520 * res: status. If success - CURRENT_FH unchanged, return change_info
2521 */
2522 /* ARGSUSED */
2523 static void
2524 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2525 struct compound_state *cs)
2526 {
2527 LINK4args *args = &argop->nfs_argop4_u.oplink;
2528 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2529 int error;
2530 vnode_t *vp;
2531 vnode_t *dvp;
2532 struct vattr bdva, idva, adva;
2533 char *nm;
2534 uint_t len;
2535 struct sockaddr *ca;
2536 char *name = NULL;
2537 nfsstat4 status;
2538
2539 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2540 LINK4args *, args);
2541
2542 /* SAVED_FH: source object */
2543 vp = cs->saved_vp;
2544 if (vp == NULL) {
2545 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2546 goto out;
2547 }
2548
2549 /* CURRENT_FH: target directory */
2550 dvp = cs->vp;
2551 if (dvp == NULL) {
2552 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2553 goto out;
2554 }
2555
2556 /*
2557 * If there is a non-shared filesystem mounted on this vnode,
2558 * do not allow to link any file in this directory.
2559 */
2560 if (vn_ismntpt(dvp)) {
2561 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2562 goto out;
2563 }
2564
2565 if (cs->access == CS_ACCESS_DENIED) {
2566 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2567 goto out;
2568 }
2569
2570 /* Check source object's type validity */
2571 if (vp->v_type == VDIR) {
2572 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2573 goto out;
2574 }
2575
2576 /* Check target directory's type */
2577 if (dvp->v_type != VDIR) {
2578 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2579 goto out;
2580 }
2581
2582 if (cs->saved_exi != cs->exi) {
2583 *cs->statusp = resp->status = NFS4ERR_XDEV;
2584 goto out;
2585 }
2586
2587 status = utf8_dir_verify(&args->newname);
2588 if (status != NFS4_OK) {
2589 *cs->statusp = resp->status = status;
2590 goto out;
2591 }
2592
2593 nm = utf8_to_fn(&args->newname, &len, NULL);
2594 if (nm == NULL) {
2595 *cs->statusp = resp->status = NFS4ERR_INVAL;
2596 goto out;
2597 }
2598
2599 if (len > MAXNAMELEN) {
2600 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2601 kmem_free(nm, len);
2602 goto out;
2603 }
2604
2605 if (rdonly4(req, cs)) {
2606 *cs->statusp = resp->status = NFS4ERR_ROFS;
2607 kmem_free(nm, len);
2608 goto out;
2609 }
2610
2611 /* Get "before" change value */
2612 bdva.va_mask = AT_CTIME|AT_SEQ;
2613 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2614 if (error) {
2615 *cs->statusp = resp->status = puterrno4(error);
2616 kmem_free(nm, len);
2617 goto out;
2618 }
2619
2620 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2621 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2622 MAXPATHLEN + 1);
2623
2624 if (name == NULL) {
2625 *cs->statusp = resp->status = NFS4ERR_INVAL;
2626 kmem_free(nm, len);
2627 goto out;
2628 }
2629
2630 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2631
2632 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2633
2634 if (nm != name)
2635 kmem_free(name, MAXPATHLEN + 1);
2636 kmem_free(nm, len);
2637
2638 /*
2639 * Get the initial "after" sequence number, if it fails, set to zero
2640 */
2641 idva.va_mask = AT_SEQ;
2642 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2643 idva.va_seq = 0;
2644
2645 /*
2646 * Force modified data and metadata out to stable storage.
2647 */
2648 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2649 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2650
2651 if (error) {
2652 *cs->statusp = resp->status = puterrno4(error);
2653 goto out;
2654 }
2655
2656 /*
2657 * Get "after" change value, if it fails, simply return the
2658 * before value.
2659 */
2660 adva.va_mask = AT_CTIME|AT_SEQ;
2661 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2662 adva.va_ctime = bdva.va_ctime;
2663 adva.va_seq = 0;
2664 }
2665
2666 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2667
2668 /*
2669 * The cinfo.atomic = TRUE only if we have
2670 * non-zero va_seq's, and it has incremented by exactly one
2671 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2672 */
2673 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2674 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2675 resp->cinfo.atomic = TRUE;
2676 else
2677 resp->cinfo.atomic = FALSE;
2678
2679 *cs->statusp = resp->status = NFS4_OK;
2680 out:
2681 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2682 LINK4res *, resp);
2683 }
2684
2685 /*
2686 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2687 */
2688
2689 /* ARGSUSED */
2690 static nfsstat4
2691 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2692 {
2693 int error;
2694 int different_export = 0;
2695 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2696 struct exportinfo *exi = NULL, *pre_exi = NULL;
2697 nfsstat4 stat;
2698 fid_t fid;
2699 int attrdir, dotdot, walk;
2700 bool_t is_newvp = FALSE;
2701
2702 if (cs->vp->v_flag & V_XATTRDIR) {
2703 attrdir = 1;
2704 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2705 } else {
2706 attrdir = 0;
2707 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2708 }
2709
2710 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2711
2712 /*
2713 * If dotdotting, then need to check whether it's
2714 * above the root of a filesystem, or above an
2715 * export point.
2716 */
2717 if (dotdot) {
2718
2719 /*
2720 * If dotdotting at the root of a filesystem, then
2721 * need to traverse back to the mounted-on filesystem
2722 * and do the dotdot lookup there.
2723 */
2724 if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2725
2726 /*
2727 * If at the system root, then can
2728 * go up no further.
2729 */
2730 if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2731 return (puterrno4(ENOENT));
2732
2733 /*
2734 * Traverse back to the mounted-on filesystem
2735 */
2736 cs->vp = untraverse(cs->vp);
2737
2738 /*
2739 * Set the different_export flag so we remember
2740 * to pick up a new exportinfo entry for
2741 * this new filesystem.
2742 */
2743 different_export = 1;
2744 } else {
2745
2746 /*
2747 * If dotdotting above an export point then set
2748 * the different_export to get new export info.
2749 */
2750 different_export = nfs_exported(cs->exi, cs->vp);
2751 }
2752 }
2753
2754 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2755 NULL, NULL, NULL);
2756 if (error)
2757 return (puterrno4(error));
2758
2759 /*
2760 * If the vnode is in a pseudo filesystem, check whether it is visible.
2761 *
2762 * XXX if the vnode is a symlink and it is not visible in
2763 * a pseudo filesystem, return ENOENT (not following symlink).
2764 * V4 client can not mount such symlink. This is a regression
2765 * from V2/V3.
2766 *
2767 * In the same exported filesystem, if the security flavor used
2768 * is not an explicitly shared flavor, limit the view to the visible
2769 * list entries only. This is not a WRONGSEC case because it's already
2770 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2771 */
2772 if (!different_export &&
2773 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2774 cs->access & CS_ACCESS_LIMITED)) {
2775 if (! nfs_visible(cs->exi, vp, &different_export)) {
2776 VN_RELE(vp);
2777 return (puterrno4(ENOENT));
2778 }
2779 }
2780
2781 /*
2782 * If it's a mountpoint, then traverse it.
2783 */
2784 if (vn_ismntpt(vp)) {
2785 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2786 pre_tvp = vp; /* save pre-traversed vnode */
2787
2788 /*
2789 * hold pre_tvp to counteract rele by traverse. We will
2790 * need pre_tvp below if checkexport4 fails
2791 */
2792 VN_HOLD(pre_tvp);
2793 if ((error = traverse(&vp)) != 0) {
2794 VN_RELE(vp);
2795 VN_RELE(pre_tvp);
2796 return (puterrno4(error));
2797 }
2798 different_export = 1;
2799 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2800 /*
2801 * The vfsp comparison is to handle the case where
2802 * a LOFS mount is shared. lo_lookup traverses mount points,
2803 * and NFS is unaware of local fs transistions because
2804 * v_vfsmountedhere isn't set. For this special LOFS case,
2805 * the dir and the obj returned by lookup will have different
2806 * vfs ptrs.
2807 */
2808 different_export = 1;
2809 }
2810
2811 if (different_export) {
2812
2813 bzero(&fid, sizeof (fid));
2814 fid.fid_len = MAXFIDSZ;
2815 error = vop_fid_pseudo(vp, &fid);
2816 if (error) {
2817 VN_RELE(vp);
2818 if (pre_tvp)
2819 VN_RELE(pre_tvp);
2820 return (puterrno4(error));
2821 }
2822
2823 if (dotdot)
2824 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2825 else
2826 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2827
2828 if (exi == NULL) {
2829 if (pre_tvp) {
2830 /*
2831 * If this vnode is a mounted-on vnode,
2832 * but the mounted-on file system is not
2833 * exported, send back the filehandle for
2834 * the mounted-on vnode, not the root of
2835 * the mounted-on file system.
2836 */
2837 VN_RELE(vp);
2838 vp = pre_tvp;
2839 exi = pre_exi;
2840 } else {
2841 VN_RELE(vp);
2842 return (puterrno4(EACCES));
2843 }
2844 } else if (pre_tvp) {
2845 /* we're done with pre_tvp now. release extra hold */
2846 VN_RELE(pre_tvp);
2847 }
2848
2849 cs->exi = exi;
2850
2851 /*
2852 * Now we do a checkauth4. The reason is that
2853 * this client/user may not have access to the new
2854 * exported file system, and if they do,
2855 * the client/user may be mapped to a different uid.
2856 *
2857 * We start with a new cr, because the checkauth4 done
2858 * in the PUT*FH operation over wrote the cred's uid,
2859 * gid, etc, and we want the real thing before calling
2860 * checkauth4()
2861 */
2862 crfree(cs->cr);
2863 cs->cr = crdup(cs->basecr);
2864
2865 oldvp = cs->vp;
2866 cs->vp = vp;
2867 is_newvp = TRUE;
2868
2869 stat = call_checkauth4(cs, req);
2870 if (stat != NFS4_OK) {
2871 VN_RELE(cs->vp);
2872 cs->vp = oldvp;
2873 return (stat);
2874 }
2875 }
2876
2877 /*
2878 * After various NFS checks, do a label check on the path
2879 * component. The label on this path should either be the
2880 * global zone's label or a zone's label. We are only
2881 * interested in the zone's label because exported files
2882 * in global zone is accessible (though read-only) to
2883 * clients. The exportability/visibility check is already
2884 * done before reaching this code.
2885 */
2886 if (is_system_labeled()) {
2887 bslabel_t *clabel;
2888
2889 ASSERT(req->rq_label != NULL);
2890 clabel = req->rq_label;
2891 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2892 "got client label from request(1)", struct svc_req *, req);
2893
2894 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2895 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2896 cs->exi)) {
2897 error = EACCES;
2898 goto err_out;
2899 }
2900 } else {
2901 /*
2902 * We grant access to admin_low label clients
2903 * only if the client is trusted, i.e. also
2904 * running Solaris Trusted Extension.
2905 */
2906 struct sockaddr *ca;
2907 int addr_type;
2908 void *ipaddr;
2909 tsol_tpc_t *tp;
2910
2911 ca = (struct sockaddr *)svc_getrpccaller(
2912 req->rq_xprt)->buf;
2913 if (ca->sa_family == AF_INET) {
2914 addr_type = IPV4_VERSION;
2915 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2916 } else if (ca->sa_family == AF_INET6) {
2917 addr_type = IPV6_VERSION;
2918 ipaddr = &((struct sockaddr_in6 *)
2919 ca)->sin6_addr;
2920 }
2921 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2922 if (tp == NULL || tp->tpc_tp.tp_doi !=
2923 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2924 SUN_CIPSO) {
2925 if (tp != NULL)
2926 TPC_RELE(tp);
2927 error = EACCES;
2928 goto err_out;
2929 }
2930 TPC_RELE(tp);
2931 }
2932 }
2933
2934 error = makefh4(&cs->fh, vp, cs->exi);
2935
2936 err_out:
2937 if (error) {
2938 if (is_newvp) {
2939 VN_RELE(cs->vp);
2940 cs->vp = oldvp;
2941 } else
2942 VN_RELE(vp);
2943 return (puterrno4(error));
2944 }
2945
2946 if (!is_newvp) {
2947 if (cs->vp)
2948 VN_RELE(cs->vp);
2949 cs->vp = vp;
2950 } else if (oldvp)
2951 VN_RELE(oldvp);
2952
2953 /*
2954 * if did lookup on attrdir and didn't lookup .., set named
2955 * attr fh flag
2956 */
2957 if (attrdir && ! dotdot)
2958 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2959
2960 /* Assume false for now, open proc will set this */
2961 cs->mandlock = FALSE;
2962
2963 return (NFS4_OK);
2964 }
2965
2966 /* ARGSUSED */
2967 static void
2968 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2969 struct compound_state *cs)
2970 {
2971 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2972 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2973 char *nm;
2974 uint_t len;
2975 struct sockaddr *ca;
2976 char *name = NULL;
2977 nfsstat4 status;
2978
2979 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2980 LOOKUP4args *, args);
2981
2982 if (cs->vp == NULL) {
2983 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2984 goto out;
2985 }
2986
2987 if (cs->vp->v_type == VLNK) {
2988 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2989 goto out;
2990 }
2991
2992 if (cs->vp->v_type != VDIR) {
2993 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2994 goto out;
2995 }
2996
2997 status = utf8_dir_verify(&args->objname);
2998 if (status != NFS4_OK) {
2999 *cs->statusp = resp->status = status;
3000 goto out;
3001 }
3002
3003 nm = utf8_to_str(&args->objname, &len, NULL);
3004 if (nm == NULL) {
3005 *cs->statusp = resp->status = NFS4ERR_INVAL;
3006 goto out;
3007 }
3008
3009 if (len > MAXNAMELEN) {
3010 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3011 kmem_free(nm, len);
3012 goto out;
3013 }
3014
3015 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3016 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3017 MAXPATHLEN + 1);
3018
3019 if (name == NULL) {
3020 *cs->statusp = resp->status = NFS4ERR_INVAL;
3021 kmem_free(nm, len);
3022 goto out;
3023 }
3024
3025 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3026
3027 if (name != nm)
3028 kmem_free(name, MAXPATHLEN + 1);
3029 kmem_free(nm, len);
3030
3031 out:
3032 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3033 LOOKUP4res *, resp);
3034 }
3035
3036 /* ARGSUSED */
3037 static void
3038 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3039 struct compound_state *cs)
3040 {
3041 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3042
3043 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3044
3045 if (cs->vp == NULL) {
3046 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3047 goto out;
3048 }
3049
3050 if (cs->vp->v_type != VDIR) {
3051 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3052 goto out;
3053 }
3054
3055 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3056
3057 /*
3058 * From NFSV4 Specification, LOOKUPP should not check for
3059 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3060 */
3061 if (resp->status == NFS4ERR_WRONGSEC) {
3062 *cs->statusp = resp->status = NFS4_OK;
3063 }
3064
3065 out:
3066 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3067 LOOKUPP4res *, resp);
3068 }
3069
3070
3071 /*ARGSUSED2*/
3072 static void
3073 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3074 struct compound_state *cs)
3075 {
3076 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3077 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3078 vnode_t *avp = NULL;
3079 int lookup_flags = LOOKUP_XATTR, error;
3080 int exp_ro = 0;
3081
3082 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3083 OPENATTR4args *, args);
3084
3085 if (cs->vp == NULL) {
3086 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3087 goto out;
3088 }
3089
3090 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3091 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3092 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3093 goto out;
3094 }
3095
3096 /*
3097 * If file system supports passing ACE mask to VOP_ACCESS then
3098 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3099 */
3100
3101 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3102 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3103 V_ACE_MASK, cs->cr, NULL);
3104 else
3105 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3106 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3107 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3108
3109 if (error) {
3110 *cs->statusp = resp->status = puterrno4(EACCES);
3111 goto out;
3112 }
3113
3114 /*
3115 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3116 * the file system is exported read-only -- regardless of
3117 * createdir flag. Otherwise the attrdir would be created
3118 * (assuming server fs isn't mounted readonly locally). If
3119 * VOP_LOOKUP returns ENOENT in this case, the error will
3120 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3121 * because specfs has no VOP_LOOKUP op, so the macro would
3122 * return ENOSYS. EINVAL is returned by all (current)
3123 * Solaris file system implementations when any of their
3124 * restrictions are violated (xattr(dir) can't have xattrdir).
3125 * Returning NOTSUPP is more appropriate in this case
3126 * because the object will never be able to have an attrdir.
3127 */
3128 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3129 lookup_flags |= CREATE_XATTR_DIR;
3130
3131 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3132 NULL, NULL, NULL);
3133
3134 if (error) {
3135 if (error == ENOENT && args->createdir && exp_ro)
3136 *cs->statusp = resp->status = puterrno4(EROFS);
3137 else if (error == EINVAL || error == ENOSYS)
3138 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3139 else
3140 *cs->statusp = resp->status = puterrno4(error);
3141 goto out;
3142 }
3143
3144 ASSERT(avp->v_flag & V_XATTRDIR);
3145
3146 error = makefh4(&cs->fh, avp, cs->exi);
3147
3148 if (error) {
3149 VN_RELE(avp);
3150 *cs->statusp = resp->status = puterrno4(error);
3151 goto out;
3152 }
3153
3154 VN_RELE(cs->vp);
3155 cs->vp = avp;
3156
3157 /*
3158 * There is no requirement for an attrdir fh flag
3159 * because the attrdir has a vnode flag to distinguish
3160 * it from regular (non-xattr) directories. The
3161 * FH4_ATTRDIR flag is set for future sanity checks.
3162 */
3163 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3164 *cs->statusp = resp->status = NFS4_OK;
3165
3166 out:
3167 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3168 OPENATTR4res *, resp);
3169 }
3170
3171 static int
3172 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3173 caller_context_t *ct)
3174 {
3175 int error;
3176 int i;
3177 clock_t delaytime;
3178
3179 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3180
3181 /*
3182 * Don't block on mandatory locks. If this routine returns
3183 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3184 */
3185 uio->uio_fmode = FNONBLOCK;
3186
3187 for (i = 0; i < rfs4_maxlock_tries; i++) {
3188
3189
3190 if (direction == FREAD) {
3191 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3192 error = VOP_READ(vp, uio, ioflag, cred, ct);
3193 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3194 } else {
3195 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3196 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3197 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3198 }
3199
3200 if (error != EAGAIN)
3201 break;
3202
3203 if (i < rfs4_maxlock_tries - 1) {
3204 delay(delaytime);
3205 delaytime *= 2;
3206 }
3207 }
3208
3209 return (error);
3210 }
3211
3212 /* ARGSUSED */
3213 static void
3214 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3215 struct compound_state *cs)
3216 {
3217 READ4args *args = &argop->nfs_argop4_u.opread;
3218 READ4res *resp = &resop->nfs_resop4_u.opread;
3219 int error;
3220 int verror;
3221 vnode_t *vp;
3222 struct vattr va;
3223 struct iovec iov, *iovp = NULL;
3224 int iovcnt;
3225 struct uio uio;
3226 u_offset_t offset;
3227 bool_t *deleg = &cs->deleg;
3228 nfsstat4 stat;
3229 int in_crit = 0;
3230 mblk_t *mp = NULL;
3231 int alloc_err = 0;
3232 int rdma_used = 0;
3233 int loaned_buffers;
3234 caller_context_t ct;
3235 struct uio *uiop;
3236
3237 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3238 READ4args, args);
3239
3240 vp = cs->vp;
3241 if (vp == NULL) {
3242 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3243 goto out;
3244 }
3245 if (cs->access == CS_ACCESS_DENIED) {
3246 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3247 goto out;
3248 }
3249
3250 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3251 deleg, TRUE, &ct)) != NFS4_OK) {
3252 *cs->statusp = resp->status = stat;
3253 goto out;
3254 }
3255
3256 /*
3257 * Enter the critical region before calling VOP_RWLOCK
3258 * to avoid a deadlock with write requests.
3259 */
3260 if (nbl_need_check(vp)) {
3261 nbl_start_crit(vp, RW_READER);
3262 in_crit = 1;
3263 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3264 &ct)) {
3265 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3266 goto out;
3267 }
3268 }
3269
3270 if (args->wlist) {
3271 if (args->count > clist_len(args->wlist)) {
3272 *cs->statusp = resp->status = NFS4ERR_INVAL;
3273 goto out;
3274 }
3275 rdma_used = 1;
3276 }
3277
3278 /* use loaned buffers for TCP */
3279 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3280
3281 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3282 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3283
3284 /*
3285 * If we can't get the attributes, then we can't do the
3286 * right access checking. So, we'll fail the request.
3287 */
3288 if (verror) {
3289 *cs->statusp = resp->status = puterrno4(verror);
3290 goto out;
3291 }
3292
3293 if (vp->v_type != VREG) {
3294 *cs->statusp = resp->status =
3295 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3296 goto out;
3297 }
3298
3299 if (crgetuid(cs->cr) != va.va_uid &&
3300 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3301 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3302 *cs->statusp = resp->status = puterrno4(error);
3303 goto out;
3304 }
3305
3306 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3307 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3308 goto out;
3309 }
3310
3311 offset = args->offset;
3312 if (offset >= va.va_size) {
3313 *cs->statusp = resp->status = NFS4_OK;
3314 resp->eof = TRUE;
3315 resp->data_len = 0;
3316 resp->data_val = NULL;
3317 resp->mblk = NULL;
3318 /* RDMA */
3319 resp->wlist = args->wlist;
3320 resp->wlist_len = resp->data_len;
3321 *cs->statusp = resp->status = NFS4_OK;
3322 if (resp->wlist)
3323 clist_zero_len(resp->wlist);
3324 goto out;
3325 }
3326
3327 if (args->count == 0) {
3328 *cs->statusp = resp->status = NFS4_OK;
3329 resp->eof = FALSE;
3330 resp->data_len = 0;
3331 resp->data_val = NULL;
3332 resp->mblk = NULL;
3333 /* RDMA */
3334 resp->wlist = args->wlist;
3335 resp->wlist_len = resp->data_len;
3336 if (resp->wlist)
3337 clist_zero_len(resp->wlist);
3338 goto out;
3339 }
3340
3341 /*
3342 * Do not allocate memory more than maximum allowed
3343 * transfer size
3344 */
3345 if (args->count > rfs4_tsize(req))
3346 args->count = rfs4_tsize(req);
3347
3348 if (loaned_buffers) {
3349 uiop = (uio_t *)rfs_setup_xuio(vp);
3350 ASSERT(uiop != NULL);
3351 uiop->uio_segflg = UIO_SYSSPACE;
3352 uiop->uio_loffset = args->offset;
3353 uiop->uio_resid = args->count;
3354
3355 /* Jump to do the read if successful */
3356 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3357 /*
3358 * Need to hold the vnode until after VOP_RETZCBUF()
3359 * is called.
3360 */
3361 VN_HOLD(vp);
3362 goto doio_read;
3363 }
3364
3365 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3366 uiop->uio_loffset, int, uiop->uio_resid);
3367
3368 uiop->uio_extflg = 0;
3369
3370 /* failure to setup for zero copy */
3371 rfs_free_xuio((void *)uiop);
3372 loaned_buffers = 0;
3373 }
3374
3375 /*
3376 * If returning data via RDMA Write, then grab the chunk list. If we
3377 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3378 */
3379 if (rdma_used) {
3380 mp = NULL;
3381 (void) rdma_get_wchunk(req, &iov, args->wlist);
3382 uio.uio_iov = &iov;
3383 uio.uio_iovcnt = 1;
3384 } else {
3385 /*
3386 * mp will contain the data to be sent out in the read reply.
3387 * It will be freed after the reply has been sent.
3388 */
3389 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3390 ASSERT(mp != NULL);
3391 ASSERT(alloc_err == 0);
3392 uio.uio_iov = iovp;
3393 uio.uio_iovcnt = iovcnt;
3394 }
3395
3396 uio.uio_segflg = UIO_SYSSPACE;
3397 uio.uio_extflg = UIO_COPY_CACHED;
3398 uio.uio_loffset = args->offset;
3399 uio.uio_resid = args->count;
3400 uiop = &uio;
3401
3402 doio_read:
3403 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3404
3405 va.va_mask = AT_SIZE;
3406 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3407
3408 if (error) {
3409 if (mp)
3410 freemsg(mp);
3411 *cs->statusp = resp->status = puterrno4(error);
3412 goto out;
3413 }
3414
3415 /* make mblk using zc buffers */
3416 if (loaned_buffers) {
3417 mp = uio_to_mblk(uiop);
3418 ASSERT(mp != NULL);
3419 }
3420
3421 *cs->statusp = resp->status = NFS4_OK;
3422
3423 ASSERT(uiop->uio_resid >= 0);
3424 resp->data_len = args->count - uiop->uio_resid;
3425 if (mp) {
3426 resp->data_val = (char *)mp->b_datap->db_base;
3427 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3428 } else {
3429 resp->data_val = (caddr_t)iov.iov_base;
3430 }
3431
3432 resp->mblk = mp;
3433
3434 if (!verror && offset + resp->data_len == va.va_size)
3435 resp->eof = TRUE;
3436 else
3437 resp->eof = FALSE;
3438
3439 if (rdma_used) {
3440 if (!rdma_setup_read_data4(args, resp)) {
3441 *cs->statusp = resp->status = NFS4ERR_INVAL;
3442 }
3443 } else {
3444 resp->wlist = NULL;
3445 }
3446
3447 out:
3448 if (in_crit)
3449 nbl_end_crit(vp);
3450
3451 if (iovp != NULL)
3452 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3453
3454 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3455 READ4res *, resp);
3456 }
3457
3458 static void
3459 rfs4_op_read_free(nfs_resop4 *resop)
3460 {
3461 READ4res *resp = &resop->nfs_resop4_u.opread;
3462
3463 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3464 freemsg(resp->mblk);
3465 resp->mblk = NULL;
3466 resp->data_val = NULL;
3467 resp->data_len = 0;
3468 }
3469 }
3470
3471 static void
3472 rfs4_op_readdir_free(nfs_resop4 * resop)
3473 {
3474 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3475
3476 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3477 freeb(resp->mblk);
3478 resp->mblk = NULL;
3479 resp->data_len = 0;
3480 }
3481 }
3482
3483
3484 /* ARGSUSED */
3485 static void
3486 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3487 struct compound_state *cs)
3488 {
3489 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3490 int error;
3491 vnode_t *vp;
3492 struct exportinfo *exi, *sav_exi;
3493 nfs_fh4_fmt_t *fh_fmtp;
3494 nfs_export_t *ne = nfs_get_export();
3495
3496 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3497
3498 if (cs->vp) {
3499 VN_RELE(cs->vp);
3500 cs->vp = NULL;
3501 }
3502
3503 if (cs->cr)
3504 crfree(cs->cr);
3505
3506 cs->cr = crdup(cs->basecr);
3507
3508 vp = ne->exi_public->exi_vp;
3509 if (vp == NULL) {
3510 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3511 goto out;
3512 }
3513
3514 error = makefh4(&cs->fh, vp, ne->exi_public);
3515 if (error != 0) {
3516 *cs->statusp = resp->status = puterrno4(error);
3517 goto out;
3518 }
3519 sav_exi = cs->exi;
3520 if (ne->exi_public == ne->exi_root) {
3521 /*
3522 * No filesystem is actually shared public, so we default
3523 * to exi_root. In this case, we must check whether root
3524 * is exported.
3525 */
3526 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3527
3528 /*
3529 * if root filesystem is exported, the exportinfo struct that we
3530 * should use is what checkexport4 returns, because root_exi is
3531 * actually a mostly empty struct.
3532 */
3533 exi = checkexport4(&fh_fmtp->fh4_fsid,
3534 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3535 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3536 } else {
3537 /*
3538 * it's a properly shared filesystem
3539 */
3540 cs->exi = ne->exi_public;
3541 }
3542
3543 if (is_system_labeled()) {
3544 bslabel_t *clabel;
3545
3546 ASSERT(req->rq_label != NULL);
3547 clabel = req->rq_label;
3548 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3549 "got client label from request(1)",
3550 struct svc_req *, req);
3551 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3552 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3553 cs->exi)) {
3554 *cs->statusp = resp->status =
3555 NFS4ERR_SERVERFAULT;
3556 goto out;
3557 }
3558 }
3559 }
3560
3561 VN_HOLD(vp);
3562 cs->vp = vp;
3563
3564 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3565 VN_RELE(cs->vp);
3566 cs->vp = NULL;
3567 cs->exi = sav_exi;
3568 goto out;
3569 }
3570
3571 *cs->statusp = resp->status = NFS4_OK;
3572 out:
3573 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3574 PUTPUBFH4res *, resp);
3575 }
3576
3577 /*
3578 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3579 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3580 * or joe have restrictive search permissions, then we shouldn't let
3581 * the client get a file handle. This is easy to enforce. However, we
3582 * don't know what security flavor should be used until we resolve the
3583 * path name. Another complication is uid mapping. If root is
3584 * the user, then it will be mapped to the anonymous user by default,
3585 * but we won't know that till we've resolved the path name. And we won't
3586 * know what the anonymous user is.
3587 * Luckily, SECINFO is specified to take a full filename.
3588 * So what we will have to in rfs4_op_lookup is check that flavor of
3589 * the target object matches that of the request, and if root was the
3590 * caller, check for the root= and anon= options, and if necessary,
3591 * repeat the lookup using the right cred_t. But that's not done yet.
3592 */
3593 /* ARGSUSED */
3594 static void
3595 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3596 struct compound_state *cs)
3597 {
3598 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3599 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3600 nfs_fh4_fmt_t *fh_fmtp;
3601
3602 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3603 PUTFH4args *, args);
3604
3605 if (cs->vp) {
3606 VN_RELE(cs->vp);
3607 cs->vp = NULL;
3608 }
3609
3610 if (cs->cr) {
3611 crfree(cs->cr);
3612 cs->cr = NULL;
3613 }
3614
3615
3616 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3617 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3618 goto out;
3619 }
3620
3621 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3622 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3623 NULL);
3624
3625 if (cs->exi == NULL) {
3626 *cs->statusp = resp->status = NFS4ERR_STALE;
3627 goto out;
3628 }
3629
3630 cs->cr = crdup(cs->basecr);
3631
3632 ASSERT(cs->cr != NULL);
3633
3634 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3635 *cs->statusp = resp->status;
3636 goto out;
3637 }
3638
3639 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3640 VN_RELE(cs->vp);
3641 cs->vp = NULL;
3642 goto out;
3643 }
3644
3645 nfs_fh4_copy(&args->object, &cs->fh);
3646 *cs->statusp = resp->status = NFS4_OK;
3647 cs->deleg = FALSE;
3648
3649 out:
3650 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3651 PUTFH4res *, resp);
3652 }
3653
3654 /* ARGSUSED */
3655 static void
3656 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3657 struct compound_state *cs)
3658 {
3659 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3660 int error;
3661 fid_t fid;
3662 struct exportinfo *exi, *sav_exi;
3663
3664 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3665
3666 if (cs->vp) {
3667 VN_RELE(cs->vp);
3668 cs->vp = NULL;
3669 }
3670
3671 if (cs->cr)
3672 crfree(cs->cr);
3673
3674 cs->cr = crdup(cs->basecr);
3675
3676 /*
3677 * Using rootdir, the system root vnode,
3678 * get its fid.
3679 */
3680 bzero(&fid, sizeof (fid));
3681 fid.fid_len = MAXFIDSZ;
3682 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3683 if (error != 0) {
3684 *cs->statusp = resp->status = puterrno4(error);
3685 goto out;
3686 }
3687
3688 /*
3689 * Then use the root fsid & fid it to find out if it's exported
3690 *
3691 * If the server root isn't exported directly, then
3692 * it should at least be a pseudo export based on
3693 * one or more exports further down in the server's
3694 * file tree.
3695 */
3696 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3697 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3698 NFS4_DEBUG(rfs4_debug,
3699 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3700 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3701 goto out;
3702 }
3703
3704 /*
3705 * Now make a filehandle based on the root
3706 * export and root vnode.
3707 */
3708 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3709 if (error != 0) {
3710 *cs->statusp = resp->status = puterrno4(error);
3711 goto out;
3712 }
3713
3714 sav_exi = cs->exi;
3715 cs->exi = exi;
3716
3717 VN_HOLD(ZONE_ROOTVP());
3718 cs->vp = ZONE_ROOTVP();
3719
3720 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3721 VN_RELE(cs->vp);
3722 cs->vp = NULL;
3723 cs->exi = sav_exi;
3724 goto out;
3725 }
3726
3727 *cs->statusp = resp->status = NFS4_OK;
3728 cs->deleg = FALSE;
3729 out:
3730 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3731 PUTROOTFH4res *, resp);
3732 }
3733
3734 /*
3735 * readlink: args: CURRENT_FH.
3736 * res: status. If success - CURRENT_FH unchanged, return linktext.
3737 */
3738
3739 /* ARGSUSED */
3740 static void
3741 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3742 struct compound_state *cs)
3743 {
3744 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3745 int error;
3746 vnode_t *vp;
3747 struct iovec iov;
3748 struct vattr va;
3749 struct uio uio;
3750 char *data;
3751 struct sockaddr *ca;
3752 char *name = NULL;
3753 int is_referral;
3754
3755 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3756
3757 /* CURRENT_FH: directory */
3758 vp = cs->vp;
3759 if (vp == NULL) {
3760 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3761 goto out;
3762 }
3763
3764 if (cs->access == CS_ACCESS_DENIED) {
3765 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3766 goto out;
3767 }
3768
3769 /* Is it a referral? */
3770 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3771
3772 is_referral = 1;
3773
3774 } else {
3775
3776 is_referral = 0;
3777
3778 if (vp->v_type == VDIR) {
3779 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3780 goto out;
3781 }
3782
3783 if (vp->v_type != VLNK) {
3784 *cs->statusp = resp->status = NFS4ERR_INVAL;
3785 goto out;
3786 }
3787
3788 }
3789
3790 va.va_mask = AT_MODE;
3791 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3792 if (error) {
3793 *cs->statusp = resp->status = puterrno4(error);
3794 goto out;
3795 }
3796
3797 if (MANDLOCK(vp, va.va_mode)) {
3798 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3799 goto out;
3800 }
3801
3802 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3803
3804 if (is_referral) {
3805 char *s;
3806 size_t strsz;
3807
3808 /* Get an artificial symlink based on a referral */
3809 s = build_symlink(vp, cs->cr, &strsz);
3810 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3811 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3812 vnode_t *, vp, char *, s);
3813 if (s == NULL)
3814 error = EINVAL;
3815 else {
3816 error = 0;
3817 (void) strlcpy(data, s, MAXPATHLEN + 1);
3818 kmem_free(s, strsz);
3819 }
3820
3821 } else {
3822
3823 iov.iov_base = data;
3824 iov.iov_len = MAXPATHLEN;
3825 uio.uio_iov = &iov;
3826 uio.uio_iovcnt = 1;
3827 uio.uio_segflg = UIO_SYSSPACE;
3828 uio.uio_extflg = UIO_COPY_CACHED;
3829 uio.uio_loffset = 0;
3830 uio.uio_resid = MAXPATHLEN;
3831
3832 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3833
3834 if (!error)
3835 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3836 }
3837
3838 if (error) {
3839 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3840 *cs->statusp = resp->status = puterrno4(error);
3841 goto out;
3842 }
3843
3844 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3845 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3846 MAXPATHLEN + 1);
3847
3848 if (name == NULL) {
3849 /*
3850 * Even though the conversion failed, we return
3851 * something. We just don't translate it.
3852 */
3853 name = data;
3854 }
3855
3856 /*
3857 * treat link name as data
3858 */
3859 (void) str_to_utf8(name, (utf8string *)&resp->link);
3860
3861 if (name != data)
3862 kmem_free(name, MAXPATHLEN + 1);
3863 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3864 *cs->statusp = resp->status = NFS4_OK;
3865
3866 out:
3867 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3868 READLINK4res *, resp);
3869 }
3870
3871 static void
3872 rfs4_op_readlink_free(nfs_resop4 *resop)
3873 {
3874 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3875 utf8string *symlink = (utf8string *)&resp->link;
3876
3877 if (symlink->utf8string_val) {
3878 UTF8STRING_FREE(*symlink)
3879 }
3880 }
3881
3882 /*
3883 * release_lockowner:
3884 * Release any state associated with the supplied
3885 * lockowner. Note if any lo_state is holding locks we will not
3886 * rele that lo_state and thus the lockowner will not be destroyed.
3887 * A client using lock after the lock owner stateid has been released
3888 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3889 * to reissue the lock with new_lock_owner set to TRUE.
3890 * args: lock_owner
3891 * res: status
3892 */
3893 /* ARGSUSED */
3894 static void
3895 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3896 struct svc_req *req, struct compound_state *cs)
3897 {
3898 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3899 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3900 rfs4_lockowner_t *lo;
3901 rfs4_openowner_t *oo;
3902 rfs4_state_t *sp;
3903 rfs4_lo_state_t *lsp;
3904 rfs4_client_t *cp;
3905 bool_t create = FALSE;
3906 locklist_t *llist;
3907 sysid_t sysid;
3908
3909 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3910 cs, RELEASE_LOCKOWNER4args *, ap);
3911
3912 /* Make sure there is a clientid around for this request */
3913 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3914
3915 if (cp == NULL) {
3916 *cs->statusp = resp->status =
3917 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3918 goto out;
3919 }
3920 rfs4_client_rele(cp);
3921
3922 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3923 if (lo == NULL) {
3924 *cs->statusp = resp->status = NFS4_OK;
3925 goto out;
3926 }
3927 ASSERT(lo->rl_client != NULL);
3928
3929 /*
3930 * Check for EXPIRED client. If so will reap state with in a lease
3931 * period or on next set_clientid_confirm step
3932 */
3933 if (rfs4_lease_expired(lo->rl_client)) {
3934 rfs4_lockowner_rele(lo);
3935 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3936 goto out;
3937 }
3938
3939 /*
3940 * If no sysid has been assigned, then no locks exist; just return.
3941 */
3942 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3943 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3944 rfs4_lockowner_rele(lo);
3945 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3946 goto out;
3947 }
3948
3949 sysid = lo->rl_client->rc_sysidt;
3950 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951
3952 /*
3953 * Mark the lockowner invalid.
3954 */
3955 rfs4_dbe_hide(lo->rl_dbe);
3956
3957 /*
3958 * sysid-pid pair should now not be used since the lockowner is
3959 * invalid. If the client were to instantiate the lockowner again
3960 * it would be assigned a new pid. Thus we can get the list of
3961 * current locks.
3962 */
3963
3964 llist = flk_get_active_locks(sysid, lo->rl_pid);
3965 /* If we are still holding locks fail */
3966 if (llist != NULL) {
3967
3968 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3969
3970 flk_free_locklist(llist);
3971 /*
3972 * We need to unhide the lockowner so the client can
3973 * try it again. The bad thing here is if the client
3974 * has a logic error that took it here in the first place
3975 * they probably have lost accounting of the locks that it
3976 * is holding. So we may have dangling state until the
3977 * open owner state is reaped via close. One scenario
3978 * that could possibly occur is that the client has
3979 * sent the unlock request(s) in separate threads
3980 * and has not waited for the replies before sending the
3981 * RELEASE_LOCKOWNER request. Presumably, it would expect
3982 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3983 * reissuing the request.
3984 */
3985 rfs4_dbe_unhide(lo->rl_dbe);
3986 rfs4_lockowner_rele(lo);
3987 goto out;
3988 }
3989
3990 /*
3991 * For the corresponding client we need to check each open
3992 * owner for any opens that have lockowner state associated
3993 * with this lockowner.
3994 */
3995
3996 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3997 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3998 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3999
4000 rfs4_dbe_lock(oo->ro_dbe);
4001 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4002 sp = list_next(&oo->ro_statelist, sp)) {
4003
4004 rfs4_dbe_lock(sp->rs_dbe);
4005 for (lsp = list_head(&sp->rs_lostatelist);
4006 lsp != NULL;
4007 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4008 if (lsp->rls_locker == lo) {
4009 rfs4_dbe_lock(lsp->rls_dbe);
4010 rfs4_dbe_invalidate(lsp->rls_dbe);
4011 rfs4_dbe_unlock(lsp->rls_dbe);
4012 }
4013 }
4014 rfs4_dbe_unlock(sp->rs_dbe);
4015 }
4016 rfs4_dbe_unlock(oo->ro_dbe);
4017 }
4018 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4019
4020 rfs4_lockowner_rele(lo);
4021
4022 *cs->statusp = resp->status = NFS4_OK;
4023
4024 out:
4025 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4026 cs, RELEASE_LOCKOWNER4res *, resp);
4027 }
4028
4029 /*
4030 * short utility function to lookup a file and recall the delegation
4031 */
4032 static rfs4_file_t *
4033 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4034 int *lkup_error, cred_t *cr)
4035 {
4036 vnode_t *vp;
4037 rfs4_file_t *fp = NULL;
4038 bool_t fcreate = FALSE;
4039 int error;
4040
4041 if (vpp)
4042 *vpp = NULL;
4043
4044 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4045 NULL)) == 0) {
4046 if (vp->v_type == VREG)
4047 fp = rfs4_findfile(vp, NULL, &fcreate);
4048 if (vpp)
4049 *vpp = vp;
4050 else
4051 VN_RELE(vp);
4052 }
4053
4054 if (lkup_error)
4055 *lkup_error = error;
4056
4057 return (fp);
4058 }
4059
4060 /*
4061 * remove: args: CURRENT_FH: directory; name.
4062 * res: status. If success - CURRENT_FH unchanged, return change_info
4063 * for directory.
4064 */
4065 /* ARGSUSED */
4066 static void
4067 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4068 struct compound_state *cs)
4069 {
4070 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4071 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4072 int error;
4073 vnode_t *dvp, *vp;
4074 struct vattr bdva, idva, adva;
4075 char *nm;
4076 uint_t len;
4077 rfs4_file_t *fp;
4078 int in_crit = 0;
4079 bslabel_t *clabel;
4080 struct sockaddr *ca;
4081 char *name = NULL;
4082 nfsstat4 status;
4083
4084 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4085 REMOVE4args *, args);
4086
4087 /* CURRENT_FH: directory */
4088 dvp = cs->vp;
4089 if (dvp == NULL) {
4090 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4091 goto out;
4092 }
4093
4094 if (cs->access == CS_ACCESS_DENIED) {
4095 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4096 goto out;
4097 }
4098
4099 /*
4100 * If there is an unshared filesystem mounted on this vnode,
4101 * Do not allow to remove anything in this directory.
4102 */
4103 if (vn_ismntpt(dvp)) {
4104 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4105 goto out;
4106 }
4107
4108 if (dvp->v_type != VDIR) {
4109 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4110 goto out;
4111 }
4112
4113 status = utf8_dir_verify(&args->target);
4114 if (status != NFS4_OK) {
4115 *cs->statusp = resp->status = status;
4116 goto out;
4117 }
4118
4119 /*
4120 * Lookup the file so that we can check if it's a directory
4121 */
4122 nm = utf8_to_fn(&args->target, &len, NULL);
4123 if (nm == NULL) {
4124 *cs->statusp = resp->status = NFS4ERR_INVAL;
4125 goto out;
4126 }
4127
4128 if (len > MAXNAMELEN) {
4129 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4130 kmem_free(nm, len);
4131 goto out;
4132 }
4133
4134 if (rdonly4(req, cs)) {
4135 *cs->statusp = resp->status = NFS4ERR_ROFS;
4136 kmem_free(nm, len);
4137 goto out;
4138 }
4139
4140 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4141 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4142 MAXPATHLEN + 1);
4143
4144 if (name == NULL) {
4145 *cs->statusp = resp->status = NFS4ERR_INVAL;
4146 kmem_free(nm, len);
4147 goto out;
4148 }
4149
4150 /*
4151 * Lookup the file to determine type and while we are see if
4152 * there is a file struct around and check for delegation.
4153 * We don't need to acquire va_seq before this lookup, if
4154 * it causes an update, cinfo.before will not match, which will
4155 * trigger a cache flush even if atomic is TRUE.
4156 */
4157 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4158 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4159 NULL)) {
4160 VN_RELE(vp);
4161 rfs4_file_rele(fp);
4162 *cs->statusp = resp->status = NFS4ERR_DELAY;
4163 if (nm != name)
4164 kmem_free(name, MAXPATHLEN + 1);
4165 kmem_free(nm, len);
4166 goto out;
4167 }
4168 }
4169
4170 /* Didn't find anything to remove */
4171 if (vp == NULL) {
4172 *cs->statusp = resp->status = error;
4173 if (nm != name)
4174 kmem_free(name, MAXPATHLEN + 1);
4175 kmem_free(nm, len);
4176 goto out;
4177 }
4178
4179 if (nbl_need_check(vp)) {
4180 nbl_start_crit(vp, RW_READER);
4181 in_crit = 1;
4182 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4183 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4184 if (nm != name)
4185 kmem_free(name, MAXPATHLEN + 1);
4186 kmem_free(nm, len);
4187 nbl_end_crit(vp);
4188 VN_RELE(vp);
4189 if (fp) {
4190 rfs4_clear_dont_grant(fp);
4191 rfs4_file_rele(fp);
4192 }
4193 goto out;
4194 }
4195 }
4196
4197 /* check label before allowing removal */
4198 if (is_system_labeled()) {
4199 ASSERT(req->rq_label != NULL);
4200 clabel = req->rq_label;
4201 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4202 "got client label from request(1)",
4203 struct svc_req *, req);
4204 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4205 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4206 cs->exi)) {
4207 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4208 if (name != nm)
4209 kmem_free(name, MAXPATHLEN + 1);
4210 kmem_free(nm, len);
4211 if (in_crit)
4212 nbl_end_crit(vp);
4213 VN_RELE(vp);
4214 if (fp) {
4215 rfs4_clear_dont_grant(fp);
4216 rfs4_file_rele(fp);
4217 }
4218 goto out;
4219 }
4220 }
4221 }
4222
4223 /* Get dir "before" change value */
4224 bdva.va_mask = AT_CTIME|AT_SEQ;
4225 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4226 if (error) {
4227 *cs->statusp = resp->status = puterrno4(error);
4228 if (nm != name)
4229 kmem_free(name, MAXPATHLEN + 1);
4230 kmem_free(nm, len);
4231 if (in_crit)
4232 nbl_end_crit(vp);
4233 VN_RELE(vp);
4234 if (fp) {
4235 rfs4_clear_dont_grant(fp);
4236 rfs4_file_rele(fp);
4237 }
4238 goto out;
4239 }
4240 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4241
4242 /* Actually do the REMOVE operation */
4243 if (vp->v_type == VDIR) {
4244 /*
4245 * Can't remove a directory that has a mounted-on filesystem.
4246 */
4247 if (vn_ismntpt(vp)) {
4248 error = EACCES;
4249 } else {
4250 /*
4251 * System V defines rmdir to return EEXIST,
4252 * not ENOTEMPTY, if the directory is not
4253 * empty. A System V NFS server needs to map
4254 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4255 * transmit over the wire.
4256 */
4257 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4258 NULL, 0)) == EEXIST)
4259 error = ENOTEMPTY;
4260 }
4261 } else {
4262 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4263 fp != NULL) {
4264 struct vattr va;
4265 vnode_t *tvp;
4266
4267 rfs4_dbe_lock(fp->rf_dbe);
4268 tvp = fp->rf_vp;
4269 if (tvp)
4270 VN_HOLD(tvp);
4271 rfs4_dbe_unlock(fp->rf_dbe);
4272
4273 if (tvp) {
4274 /*
4275 * This is va_seq safe because we are not
4276 * manipulating dvp.
4277 */
4278 va.va_mask = AT_NLINK;
4279 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4280 va.va_nlink == 0) {
4281 /* Remove state on file remove */
4282 if (in_crit) {
4283 nbl_end_crit(vp);
4284 in_crit = 0;
4285 }
4286 rfs4_close_all_state(fp);
4287 }
4288 VN_RELE(tvp);
4289 }
4290 }
4291 }
4292
4293 if (in_crit)
4294 nbl_end_crit(vp);
4295 VN_RELE(vp);
4296
4297 if (fp) {
4298 rfs4_clear_dont_grant(fp);
4299 rfs4_file_rele(fp);
4300 }
4301 if (nm != name)
4302 kmem_free(name, MAXPATHLEN + 1);
4303 kmem_free(nm, len);
4304
4305 if (error) {
4306 *cs->statusp = resp->status = puterrno4(error);
4307 goto out;
4308 }
4309
4310 /*
4311 * Get the initial "after" sequence number, if it fails, set to zero
4312 */
4313 idva.va_mask = AT_SEQ;
4314 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4315 idva.va_seq = 0;
4316
4317 /*
4318 * Force modified data and metadata out to stable storage.
4319 */
4320 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4321
4322 /*
4323 * Get "after" change value, if it fails, simply return the
4324 * before value.
4325 */
4326 adva.va_mask = AT_CTIME|AT_SEQ;
4327 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4328 adva.va_ctime = bdva.va_ctime;
4329 adva.va_seq = 0;
4330 }
4331
4332 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4333
4334 /*
4335 * The cinfo.atomic = TRUE only if we have
4336 * non-zero va_seq's, and it has incremented by exactly one
4337 * during the VOP_REMOVE/RMDIR and it didn't change during
4338 * the VOP_FSYNC.
4339 */
4340 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4341 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4342 resp->cinfo.atomic = TRUE;
4343 else
4344 resp->cinfo.atomic = FALSE;
4345
4346 *cs->statusp = resp->status = NFS4_OK;
4347
4348 out:
4349 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4350 REMOVE4res *, resp);
4351 }
4352
4353 /*
4354 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4355 * oldname and newname.
4356 * res: status. If success - CURRENT_FH unchanged, return change_info
4357 * for both from and target directories.
4358 */
4359 /* ARGSUSED */
4360 static void
4361 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4362 struct compound_state *cs)
4363 {
4364 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4365 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4366 int error;
4367 vnode_t *odvp;
4368 vnode_t *ndvp;
4369 vnode_t *srcvp, *targvp, *tvp;
4370 struct vattr obdva, oidva, oadva;
4371 struct vattr nbdva, nidva, nadva;
4372 char *onm, *nnm;
4373 uint_t olen, nlen;
4374 rfs4_file_t *fp, *sfp;
4375 int in_crit_src, in_crit_targ;
4376 int fp_rele_grant_hold, sfp_rele_grant_hold;
4377 int unlinked;
4378 bslabel_t *clabel;
4379 struct sockaddr *ca;
4380 char *converted_onm = NULL;
4381 char *converted_nnm = NULL;
4382 nfsstat4 status;
4383
4384 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4385 RENAME4args *, args);
4386
4387 fp = sfp = NULL;
4388 srcvp = targvp = tvp = NULL;
4389 in_crit_src = in_crit_targ = 0;
4390 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4391 unlinked = 0;
4392
4393 /* CURRENT_FH: target directory */
4394 ndvp = cs->vp;
4395 if (ndvp == NULL) {
4396 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4397 goto out;
4398 }
4399
4400 /* SAVED_FH: from directory */
4401 odvp = cs->saved_vp;
4402 if (odvp == NULL) {
4403 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4404 goto out;
4405 }
4406
4407 if (cs->access == CS_ACCESS_DENIED) {
4408 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4409 goto out;
4410 }
4411
4412 /*
4413 * If there is an unshared filesystem mounted on this vnode,
4414 * do not allow to rename objects in this directory.
4415 */
4416 if (vn_ismntpt(odvp)) {
4417 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4418 goto out;
4419 }
4420
4421 /*
4422 * If there is an unshared filesystem mounted on this vnode,
4423 * do not allow to rename to this directory.
4424 */
4425 if (vn_ismntpt(ndvp)) {
4426 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4427 goto out;
4428 }
4429
4430 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4431 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4432 goto out;
4433 }
4434
4435 if (cs->saved_exi != cs->exi) {
4436 *cs->statusp = resp->status = NFS4ERR_XDEV;
4437 goto out;
4438 }
4439
4440 status = utf8_dir_verify(&args->oldname);
4441 if (status != NFS4_OK) {
4442 *cs->statusp = resp->status = status;
4443 goto out;
4444 }
4445
4446 status = utf8_dir_verify(&args->newname);
4447 if (status != NFS4_OK) {
4448 *cs->statusp = resp->status = status;
4449 goto out;
4450 }
4451
4452 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4453 if (onm == NULL) {
4454 *cs->statusp = resp->status = NFS4ERR_INVAL;
4455 goto out;
4456 }
4457 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4458 nlen = MAXPATHLEN + 1;
4459 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4460 nlen);
4461
4462 if (converted_onm == NULL) {
4463 *cs->statusp = resp->status = NFS4ERR_INVAL;
4464 kmem_free(onm, olen);
4465 goto out;
4466 }
4467
4468 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4469 if (nnm == NULL) {
4470 *cs->statusp = resp->status = NFS4ERR_INVAL;
4471 if (onm != converted_onm)
4472 kmem_free(converted_onm, MAXPATHLEN + 1);
4473 kmem_free(onm, olen);
4474 goto out;
4475 }
4476 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4477 MAXPATHLEN + 1);
4478
4479 if (converted_nnm == NULL) {
4480 *cs->statusp = resp->status = NFS4ERR_INVAL;
4481 kmem_free(nnm, nlen);
4482 nnm = NULL;
4483 if (onm != converted_onm)
4484 kmem_free(converted_onm, MAXPATHLEN + 1);
4485 kmem_free(onm, olen);
4486 goto out;
4487 }
4488
4489
4490 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4491 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4492 kmem_free(onm, olen);
4493 kmem_free(nnm, nlen);
4494 goto out;
4495 }
4496
4497
4498 if (rdonly4(req, cs)) {
4499 *cs->statusp = resp->status = NFS4ERR_ROFS;
4500 if (onm != converted_onm)
4501 kmem_free(converted_onm, MAXPATHLEN + 1);
4502 kmem_free(onm, olen);
4503 if (nnm != converted_nnm)
4504 kmem_free(converted_nnm, MAXPATHLEN + 1);
4505 kmem_free(nnm, nlen);
4506 goto out;
4507 }
4508
4509 /* check label of the target dir */
4510 if (is_system_labeled()) {
4511 ASSERT(req->rq_label != NULL);
4512 clabel = req->rq_label;
4513 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4514 "got client label from request(1)",
4515 struct svc_req *, req);
4516 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4517 if (!do_rfs_label_check(clabel, ndvp,
4518 EQUALITY_CHECK, cs->exi)) {
4519 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4520 goto err_out;
4521 }
4522 }
4523 }
4524
4525 /*
4526 * Is the source a file and have a delegation?
4527 * We don't need to acquire va_seq before these lookups, if
4528 * it causes an update, cinfo.before will not match, which will
4529 * trigger a cache flush even if atomic is TRUE.
4530 */
4531 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4532 &error, cs->cr)) {
4533 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4534 NULL)) {
4535 *cs->statusp = resp->status = NFS4ERR_DELAY;
4536 goto err_out;
4537 }
4538 }
4539
4540 if (srcvp == NULL) {
4541 *cs->statusp = resp->status = puterrno4(error);
4542 if (onm != converted_onm)
4543 kmem_free(converted_onm, MAXPATHLEN + 1);
4544 kmem_free(onm, olen);
4545 if (nnm != converted_nnm)
4546 kmem_free(converted_nnm, MAXPATHLEN + 1);
4547 kmem_free(nnm, nlen);
4548 goto out;
4549 }
4550
4551 sfp_rele_grant_hold = 1;
4552
4553 /* Does the destination exist and a file and have a delegation? */
4554 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4555 NULL, cs->cr)) {
4556 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4557 NULL)) {
4558 *cs->statusp = resp->status = NFS4ERR_DELAY;
4559 goto err_out;
4560 }
4561 }
4562 fp_rele_grant_hold = 1;
4563
4564 /* Check for NBMAND lock on both source and target */
4565 if (nbl_need_check(srcvp)) {
4566 nbl_start_crit(srcvp, RW_READER);
4567 in_crit_src = 1;
4568 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4569 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570 goto err_out;
4571 }
4572 }
4573
4574 if (targvp && nbl_need_check(targvp)) {
4575 nbl_start_crit(targvp, RW_READER);
4576 in_crit_targ = 1;
4577 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4578 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4579 goto err_out;
4580 }
4581 }
4582
4583 /* Get source "before" change value */
4584 obdva.va_mask = AT_CTIME|AT_SEQ;
4585 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4586 if (!error) {
4587 nbdva.va_mask = AT_CTIME|AT_SEQ;
4588 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4589 }
4590 if (error) {
4591 *cs->statusp = resp->status = puterrno4(error);
4592 goto err_out;
4593 }
4594
4595 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4596 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4597
4598 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4599 NULL, 0);
4600
4601 /*
4602 * If target existed and was unlinked by VOP_RENAME, state will need
4603 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4604 * any necessary nbl_end_crit on srcvp and tgtvp.
4605 */
4606 if (error == 0 && fp != NULL) {
4607 rfs4_dbe_lock(fp->rf_dbe);
4608 tvp = fp->rf_vp;
4609 if (tvp)
4610 VN_HOLD(tvp);
4611 rfs4_dbe_unlock(fp->rf_dbe);
4612
4613 if (tvp) {
4614 struct vattr va;
4615 va.va_mask = AT_NLINK;
4616
4617 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4618 va.va_nlink == 0) {
4619 unlinked = 1;
4620
4621 /* DEBUG data */
4622 if ((srcvp == targvp) || (tvp != targvp)) {
4623 cmn_err(CE_WARN, "rfs4_op_rename: "
4624 "srcvp %p, targvp: %p, tvp: %p",
4625 (void *)srcvp, (void *)targvp,
4626 (void *)tvp);
4627 }
4628 } else {
4629 VN_RELE(tvp);
4630 }
4631 }
4632 }
4633 if (error == 0)
4634 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4635
4636 if (in_crit_src)
4637 nbl_end_crit(srcvp);
4638 if (srcvp)
4639 VN_RELE(srcvp);
4640 if (in_crit_targ)
4641 nbl_end_crit(targvp);
4642 if (targvp)
4643 VN_RELE(targvp);
4644
4645 if (unlinked) {
4646 ASSERT(fp != NULL);
4647 ASSERT(tvp != NULL);
4648
4649 /* DEBUG data */
4650 if (RW_READ_HELD(&tvp->v_nbllock)) {
4651 cmn_err(CE_WARN, "rfs4_op_rename: "
4652 "RW_READ_HELD(%p)", (void *)tvp);
4653 }
4654
4655 /* The file is gone and so should the state */
4656 rfs4_close_all_state(fp);
4657 VN_RELE(tvp);
4658 }
4659
4660 if (sfp) {
4661 rfs4_clear_dont_grant(sfp);
4662 rfs4_file_rele(sfp);
4663 }
4664 if (fp) {
4665 rfs4_clear_dont_grant(fp);
4666 rfs4_file_rele(fp);
4667 }
4668
4669 if (converted_onm != onm)
4670 kmem_free(converted_onm, MAXPATHLEN + 1);
4671 kmem_free(onm, olen);
4672 if (converted_nnm != nnm)
4673 kmem_free(converted_nnm, MAXPATHLEN + 1);
4674 kmem_free(nnm, nlen);
4675
4676 /*
4677 * Get the initial "after" sequence number, if it fails, set to zero
4678 */
4679 oidva.va_mask = AT_SEQ;
4680 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4681 oidva.va_seq = 0;
4682
4683 nidva.va_mask = AT_SEQ;
4684 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4685 nidva.va_seq = 0;
4686
4687 /*
4688 * Force modified data and metadata out to stable storage.
4689 */
4690 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4691 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4692
4693 if (error) {
4694 *cs->statusp = resp->status = puterrno4(error);
4695 goto out;
4696 }
4697
4698 /*
4699 * Get "after" change values, if it fails, simply return the
4700 * before value.
4701 */
4702 oadva.va_mask = AT_CTIME|AT_SEQ;
4703 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4704 oadva.va_ctime = obdva.va_ctime;
4705 oadva.va_seq = 0;
4706 }
4707
4708 nadva.va_mask = AT_CTIME|AT_SEQ;
4709 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4710 nadva.va_ctime = nbdva.va_ctime;
4711 nadva.va_seq = 0;
4712 }
4713
4714 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4715 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4716
4717 /*
4718 * The cinfo.atomic = TRUE only if we have
4719 * non-zero va_seq's, and it has incremented by exactly one
4720 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4721 */
4722 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4723 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4724 resp->source_cinfo.atomic = TRUE;
4725 else
4726 resp->source_cinfo.atomic = FALSE;
4727
4728 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4729 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4730 resp->target_cinfo.atomic = TRUE;
4731 else
4732 resp->target_cinfo.atomic = FALSE;
4733
4734 #ifdef VOLATILE_FH_TEST
4735 {
4736 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4737
4738 /*
4739 * Add the renamed file handle to the volatile rename list
4740 */
4741 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4742 /* file handles may expire on rename */
4743 vnode_t *vp;
4744
4745 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4746 /*
4747 * Already know that nnm will be a valid string
4748 */
4749 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4750 NULL, NULL, NULL);
4751 kmem_free(nnm, nlen);
4752 if (!error) {
4753 add_volrnm_fh(cs->exi, vp);
4754 VN_RELE(vp);
4755 }
4756 }
4757 }
4758 #endif /* VOLATILE_FH_TEST */
4759
4760 *cs->statusp = resp->status = NFS4_OK;
4761 out:
4762 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4763 RENAME4res *, resp);
4764 return;
4765
4766 err_out:
4767 if (onm != converted_onm)
4768 kmem_free(converted_onm, MAXPATHLEN + 1);
4769 if (onm != NULL)
4770 kmem_free(onm, olen);
4771 if (nnm != converted_nnm)
4772 kmem_free(converted_nnm, MAXPATHLEN + 1);
4773 if (nnm != NULL)
4774 kmem_free(nnm, nlen);
4775
4776 if (in_crit_src) nbl_end_crit(srcvp);
4777 if (in_crit_targ) nbl_end_crit(targvp);
4778 if (targvp) VN_RELE(targvp);
4779 if (srcvp) VN_RELE(srcvp);
4780 if (sfp) {
4781 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4782 rfs4_file_rele(sfp);
4783 }
4784 if (fp) {
4785 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4786 rfs4_file_rele(fp);
4787 }
4788
4789 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4790 RENAME4res *, resp);
4791 }
4792
4793 /* ARGSUSED */
4794 static void
4795 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4796 struct compound_state *cs)
4797 {
4798 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4799 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4800 rfs4_client_t *cp;
4801
4802 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4803 RENEW4args *, args);
4804
4805 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4806 *cs->statusp = resp->status =
4807 rfs4_check_clientid(&args->clientid, 0);
4808 goto out;
4809 }
4810
4811 if (rfs4_lease_expired(cp)) {
4812 rfs4_client_rele(cp);
4813 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4814 goto out;
4815 }
4816
4817 rfs4_update_lease(cp);
4818
4819 mutex_enter(cp->rc_cbinfo.cb_lock);
4820 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4821 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4822 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4823 } else {
4824 *cs->statusp = resp->status = NFS4_OK;
4825 }
4826 mutex_exit(cp->rc_cbinfo.cb_lock);
4827
4828 rfs4_client_rele(cp);
4829
4830 out:
4831 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4832 RENEW4res *, resp);
4833 }
4834
4835 /* ARGSUSED */
4836 static void
4837 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4838 struct compound_state *cs)
4839 {
4840 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4841
4842 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4843
4844 /* No need to check cs->access - we are not accessing any object */
4845 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4846 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4847 goto out;
4848 }
4849 if (cs->vp != NULL) {
4850 VN_RELE(cs->vp);
4851 }
4852 cs->vp = cs->saved_vp;
4853 cs->saved_vp = NULL;
4854 cs->exi = cs->saved_exi;
4855 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4856 *cs->statusp = resp->status = NFS4_OK;
4857 cs->deleg = FALSE;
4858
4859 out:
4860 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4861 RESTOREFH4res *, resp);
4862 }
4863
4864 /* ARGSUSED */
4865 static void
4866 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4867 struct compound_state *cs)
4868 {
4869 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4870
4871 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4872
4873 /* No need to check cs->access - we are not accessing any object */
4874 if (cs->vp == NULL) {
4875 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4876 goto out;
4877 }
4878 if (cs->saved_vp != NULL) {
4879 VN_RELE(cs->saved_vp);
4880 }
4881 cs->saved_vp = cs->vp;
4882 VN_HOLD(cs->saved_vp);
4883 cs->saved_exi = cs->exi;
4884 /*
4885 * since SAVEFH is fairly rare, don't alloc space for its fh
4886 * unless necessary.
4887 */
4888 if (cs->saved_fh.nfs_fh4_val == NULL) {
4889 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4890 }
4891 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4892 *cs->statusp = resp->status = NFS4_OK;
4893
4894 out:
4895 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4896 SAVEFH4res *, resp);
4897 }
4898
4899 /*
4900 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4901 * return the bitmap of attrs that were set successfully. It is also
4902 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4903 * always be called only after rfs4_do_set_attrs().
4904 *
4905 * Verify that the attributes are same as the expected ones. sargp->vap
4906 * and sargp->sbp contain the input attributes as translated from fattr4.
4907 *
4908 * This function verifies only the attrs that correspond to a vattr or
4909 * vfsstat struct. That is because of the extra step needed to get the
4910 * corresponding system structs. Other attributes have already been set or
4911 * verified by do_rfs4_set_attrs.
4912 *
4913 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4914 */
4915 static int
4916 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4917 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4918 {
4919 int error, ret_error = 0;
4920 int i, k;
4921 uint_t sva_mask = sargp->vap->va_mask;
4922 uint_t vbit;
4923 union nfs4_attr_u *na;
4924 uint8_t *amap;
4925 bool_t getsb = ntovp->vfsstat;
4926
4927 if (sva_mask != 0) {
4928 /*
4929 * Okay to overwrite sargp->vap because we verify based
4930 * on the incoming values.
4931 */
4932 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4933 sargp->cs->cr, NULL);
4934 if (ret_error) {
4935 if (resp == NULL)
4936 return (ret_error);
4937 /*
4938 * Must return bitmap of successful attrs
4939 */
4940 sva_mask = 0; /* to prevent checking vap later */
4941 } else {
4942 /*
4943 * Some file systems clobber va_mask. it is probably
4944 * wrong of them to do so, nonethless we practice
4945 * defensive coding.
4946 * See bug id 4276830.
4947 */
4948 sargp->vap->va_mask = sva_mask;
4949 }
4950 }
4951
4952 if (getsb) {
4953 /*
4954 * Now get the superblock and loop on the bitmap, as there is
4955 * no simple way of translating from superblock to bitmap4.
4956 */
4957 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4958 if (ret_error) {
4959 if (resp == NULL)
4960 goto errout;
4961 getsb = FALSE;
4962 }
4963 }
4964
4965 /*
4966 * Now loop and verify each attribute which getattr returned
4967 * whether it's the same as the input.
4968 */
4969 if (resp == NULL && !getsb && (sva_mask == 0))
4970 goto errout;
4971
4972 na = ntovp->na;
4973 amap = ntovp->amap;
4974 k = 0;
4975 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4976 k = *amap;
4977 ASSERT(nfs4_ntov_map[k].nval == k);
4978 vbit = nfs4_ntov_map[k].vbit;
4979
4980 /*
4981 * If vattr attribute but VOP_GETATTR failed, or it's
4982 * superblock attribute but VFS_STATVFS failed, skip
4983 */
4984 if (vbit) {
4985 if ((vbit & sva_mask) == 0)
4986 continue;
4987 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4988 continue;
4989 }
4990 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4991 if (resp != NULL) {
4992 if (error)
4993 ret_error = -1; /* not all match */
4994 else /* update response bitmap */
4995 *resp |= nfs4_ntov_map[k].fbit;
4996 continue;
4997 }
4998 if (error) {
4999 ret_error = -1; /* not all match */
5000 break;
5001 }
5002 }
5003 errout:
5004 return (ret_error);
5005 }
5006
5007 /*
5008 * Decode the attribute to be set/verified. If the attr requires a sys op
5009 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5010 * call the sv_getit function for it, because the sys op hasn't yet been done.
5011 * Return 0 for success, error code if failed.
5012 *
5013 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5014 */
5015 static int
5016 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5017 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5018 {
5019 int error = 0;
5020 bool_t set_later;
5021
5022 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5023
5024 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5025 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5026 /*
5027 * don't verify yet if a vattr or sb dependent attr,
5028 * because we don't have their sys values yet.
5029 * Will be done later.
5030 */
5031 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5032 /*
5033 * ACLs are a special case, since setting the MODE
5034 * conflicts with setting the ACL. We delay setting
5035 * the ACL until all other attributes have been set.
5036 * The ACL gets set in do_rfs4_op_setattr().
5037 */
5038 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5039 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5040 sargp, nap);
5041 if (error) {
5042 xdr_free(nfs4_ntov_map[k].xfunc,
5043 (caddr_t)nap);
5044 }
5045 }
5046 }
5047 } else {
5048 #ifdef DEBUG
5049 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5050 "decoding attribute %d\n", k);
5051 #endif
5052 error = EINVAL;
5053 }
5054 if (!error && resp_bval && !set_later) {
5055 *resp_bval |= nfs4_ntov_map[k].fbit;
5056 }
5057
5058 return (error);
5059 }
5060
5061 /*
5062 * Set vattr based on incoming fattr4 attrs - used by setattr.
5063 * Set response mask. Ignore any values that are not writable vattr attrs.
5064 */
5065 static nfsstat4
5066 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5067 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5068 nfs4_attr_cmd_t cmd)
5069 {
5070 int error = 0;
5071 int i;
5072 char *attrs = fattrp->attrlist4;
5073 uint32_t attrslen = fattrp->attrlist4_len;
5074 XDR xdr;
5075 nfsstat4 status = NFS4_OK;
5076 vnode_t *vp = cs->vp;
5077 union nfs4_attr_u *na;
5078 uint8_t *amap;
5079
5080 #ifndef lint
5081 /*
5082 * Make sure that maximum attribute number can be expressed as an
5083 * 8 bit quantity.
5084 */
5085 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5086 #endif
5087
5088 if (vp == NULL) {
5089 if (resp)
5090 *resp = 0;
5091 return (NFS4ERR_NOFILEHANDLE);
5092 }
5093 if (cs->access == CS_ACCESS_DENIED) {
5094 if (resp)
5095 *resp = 0;
5096 return (NFS4ERR_ACCESS);
5097 }
5098
5099 sargp->op = cmd;
5100 sargp->cs = cs;
5101 sargp->flag = 0; /* may be set later */
5102 sargp->vap->va_mask = 0;
5103 sargp->rdattr_error = NFS4_OK;
5104 sargp->rdattr_error_req = FALSE;
5105 /* sargp->sbp is set by the caller */
5106
5107 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5108
5109 na = ntovp->na;
5110 amap = ntovp->amap;
5111
5112 /*
5113 * The following loop iterates on the nfs4_ntov_map checking
5114 * if the fbit is set in the requested bitmap.
5115 * If set then we process the arguments using the
5116 * rfs4_fattr4 conversion functions to populate the setattr
5117 * vattr and va_mask. Any settable attrs that are not using vattr
5118 * will be set in this loop.
5119 */
5120 for (i = 0; i < nfs4_ntov_map_size; i++) {
5121 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5122 continue;
5123 }
5124 /*
5125 * If setattr, must be a writable attr.
5126 * If verify/nverify, must be a readable attr.
5127 */
5128 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5129 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5130 /*
5131 * Client tries to set/verify an
5132 * unsupported attribute, tries to set
5133 * a read only attr or verify a write
5134 * only one - error!
5135 */
5136 break;
5137 }
5138 /*
5139 * Decode the attribute to set/verify
5140 */
5141 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5142 &xdr, resp ? resp : NULL, na);
5143 if (error)
5144 break;
5145 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5146 na++;
5147 (ntovp->attrcnt)++;
5148 if (nfs4_ntov_map[i].vfsstat)
5149 ntovp->vfsstat = TRUE;
5150 }
5151
5152 if (error != 0)
5153 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5154 puterrno4(error));
5155 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5156 return (status);
5157 }
5158
5159 static nfsstat4
5160 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5161 stateid4 *stateid)
5162 {
5163 int error = 0;
5164 struct nfs4_svgetit_arg sarg;
5165 bool_t trunc;
5166
5167 nfsstat4 status = NFS4_OK;
5168 cred_t *cr = cs->cr;
5169 vnode_t *vp = cs->vp;
5170 struct nfs4_ntov_table ntov;
5171 struct statvfs64 sb;
5172 struct vattr bva;
5173 struct flock64 bf;
5174 int in_crit = 0;
5175 uint_t saved_mask = 0;
5176 caller_context_t ct;
5177
5178 *resp = 0;
5179 sarg.sbp = &sb;
5180 sarg.is_referral = B_FALSE;
5181 nfs4_ntov_table_init(&ntov);
5182 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5183 NFS4ATTR_SETIT);
5184 if (status != NFS4_OK) {
5185 /*
5186 * failed set attrs
5187 */
5188 goto done;
5189 }
5190 if ((sarg.vap->va_mask == 0) &&
5191 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5192 /*
5193 * no further work to be done
5194 */
5195 goto done;
5196 }
5197
5198 /*
5199 * If we got a request to set the ACL and the MODE, only
5200 * allow changing VSUID, VSGID, and VSVTX. Attempting
5201 * to change any other bits, along with setting an ACL,
5202 * gives NFS4ERR_INVAL.
5203 */
5204 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5205 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5206 vattr_t va;
5207
5208 va.va_mask = AT_MODE;
5209 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5210 if (error) {
5211 status = puterrno4(error);
5212 goto done;
5213 }
5214 if ((sarg.vap->va_mode ^ va.va_mode) &
5215 ~(VSUID | VSGID | VSVTX)) {
5216 status = NFS4ERR_INVAL;
5217 goto done;
5218 }
5219 }
5220
5221 /* Check stateid only if size has been set */
5222 if (sarg.vap->va_mask & AT_SIZE) {
5223 trunc = (sarg.vap->va_size == 0);
5224 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5225 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5226 if (status != NFS4_OK)
5227 goto done;
5228 } else {
5229 ct.cc_sysid = 0;
5230 ct.cc_pid = 0;
5231 ct.cc_caller_id = nfs4_srv_caller_id;
5232 ct.cc_flags = CC_DONTBLOCK;
5233 }
5234
5235 /* XXX start of possible race with delegations */
5236
5237 /*
5238 * We need to specially handle size changes because it is
5239 * possible for the client to create a file with read-only
5240 * modes, but with the file opened for writing. If the client
5241 * then tries to set the file size, e.g. ftruncate(3C),
5242 * fcntl(F_FREESP), the normal access checking done in
5243 * VOP_SETATTR would prevent the client from doing it even though
5244 * it should be allowed to do so. To get around this, we do the
5245 * access checking for ourselves and use VOP_SPACE which doesn't
5246 * do the access checking.
5247 * Also the client should not be allowed to change the file
5248 * size if there is a conflicting non-blocking mandatory lock in
5249 * the region of the change.
5250 */
5251 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5252 u_offset_t offset;
5253 ssize_t length;
5254
5255 /*
5256 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5257 * before returning, sarg.vap->va_mask is used to
5258 * generate the setattr reply bitmap. We also clear
5259 * AT_SIZE below before calling VOP_SPACE. For both
5260 * of these cases, the va_mask needs to be saved here
5261 * and restored after calling VOP_SETATTR.
5262 */
5263 saved_mask = sarg.vap->va_mask;
5264
5265 /*
5266 * Check any possible conflict due to NBMAND locks.
5267 * Get into critical region before VOP_GETATTR, so the
5268 * size attribute is valid when checking conflicts.
5269 */
5270 if (nbl_need_check(vp)) {
5271 nbl_start_crit(vp, RW_READER);
5272 in_crit = 1;
5273 }
5274
5275 bva.va_mask = AT_UID|AT_SIZE;
5276 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5277 status = puterrno4(error);
5278 goto done;
5279 }
5280
5281 if (in_crit) {
5282 if (sarg.vap->va_size < bva.va_size) {
5283 offset = sarg.vap->va_size;
5284 length = bva.va_size - sarg.vap->va_size;
5285 } else {
5286 offset = bva.va_size;
5287 length = sarg.vap->va_size - bva.va_size;
5288 }
5289 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5290 &ct)) {
5291 status = NFS4ERR_LOCKED;
5292 goto done;
5293 }
5294 }
5295
5296 if (crgetuid(cr) == bva.va_uid) {
5297 sarg.vap->va_mask &= ~AT_SIZE;
5298 bf.l_type = F_WRLCK;
5299 bf.l_whence = 0;
5300 bf.l_start = (off64_t)sarg.vap->va_size;
5301 bf.l_len = 0;
5302 bf.l_sysid = 0;
5303 bf.l_pid = 0;
5304 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5305 (offset_t)sarg.vap->va_size, cr, &ct);
5306 }
5307 }
5308
5309 if (!error && sarg.vap->va_mask != 0)
5310 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5311
5312 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5313 if (saved_mask & AT_SIZE)
5314 sarg.vap->va_mask |= AT_SIZE;
5315
5316 /*
5317 * If an ACL was being set, it has been delayed until now,
5318 * in order to set the mode (via the VOP_SETATTR() above) first.
5319 */
5320 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5321 int i;
5322
5323 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5324 if (ntov.amap[i] == FATTR4_ACL)
5325 break;
5326 if (i < NFS4_MAXNUM_ATTRS) {
5327 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5328 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5329 if (error == 0) {
5330 *resp |= FATTR4_ACL_MASK;
5331 } else if (error == ENOTSUP) {
5332 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5333 status = NFS4ERR_ATTRNOTSUPP;
5334 goto done;
5335 }
5336 } else {
5337 NFS4_DEBUG(rfs4_debug,
5338 (CE_NOTE, "do_rfs4_op_setattr: "
5339 "unable to find ACL in fattr4"));
5340 error = EINVAL;
5341 }
5342 }
5343
5344 if (error) {
5345 /* check if a monitor detected a delegation conflict */
5346 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5347 status = NFS4ERR_DELAY;
5348 else
5349 status = puterrno4(error);
5350
5351 /*
5352 * Set the response bitmap when setattr failed.
5353 * If VOP_SETATTR partially succeeded, test by doing a
5354 * VOP_GETATTR on the object and comparing the data
5355 * to the setattr arguments.
5356 */
5357 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5358 } else {
5359 /*
5360 * Force modified metadata out to stable storage.
5361 */
5362 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5363 /*
5364 * Set response bitmap
5365 */
5366 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5367 }
5368
5369 /* Return early and already have a NFSv4 error */
5370 done:
5371 /*
5372 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5373 * conversion sets both readable and writeable NFS4 attrs
5374 * for AT_MTIME and AT_ATIME. The line below masks out
5375 * unrequested attrs from the setattr result bitmap. This
5376 * is placed after the done: label to catch the ATTRNOTSUP
5377 * case.
5378 */
5379 *resp &= fattrp->attrmask;
5380
5381 if (in_crit)
5382 nbl_end_crit(vp);
5383
5384 nfs4_ntov_table_free(&ntov, &sarg);
5385
5386 return (status);
5387 }
5388
5389 /* ARGSUSED */
5390 static void
5391 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5392 struct compound_state *cs)
5393 {
5394 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5395 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5396 bslabel_t *clabel;
5397
5398 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5399 SETATTR4args *, args);
5400
5401 if (cs->vp == NULL) {
5402 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5403 goto out;
5404 }
5405
5406 /*
5407 * If there is an unshared filesystem mounted on this vnode,
5408 * do not allow to setattr on this vnode.
5409 */
5410 if (vn_ismntpt(cs->vp)) {
5411 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5412 goto out;
5413 }
5414
5415 resp->attrsset = 0;
5416
5417 if (rdonly4(req, cs)) {
5418 *cs->statusp = resp->status = NFS4ERR_ROFS;
5419 goto out;
5420 }
5421
5422 /* check label before setting attributes */
5423 if (is_system_labeled()) {
5424 ASSERT(req->rq_label != NULL);
5425 clabel = req->rq_label;
5426 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5427 "got client label from request(1)",
5428 struct svc_req *, req);
5429 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5430 if (!do_rfs_label_check(clabel, cs->vp,
5431 EQUALITY_CHECK, cs->exi)) {
5432 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5433 goto out;
5434 }
5435 }
5436 }
5437
5438 *cs->statusp = resp->status =
5439 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5440 &args->stateid);
5441
5442 out:
5443 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5444 SETATTR4res *, resp);
5445 }
5446
5447 /* ARGSUSED */
5448 static void
5449 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5450 struct compound_state *cs)
5451 {
5452 /*
5453 * verify and nverify are exactly the same, except that nverify
5454 * succeeds when some argument changed, and verify succeeds when
5455 * when none changed.
5456 */
5457
5458 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5459 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5460
5461 int error;
5462 struct nfs4_svgetit_arg sarg;
5463 struct statvfs64 sb;
5464 struct nfs4_ntov_table ntov;
5465
5466 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5467 VERIFY4args *, args);
5468
5469 if (cs->vp == NULL) {
5470 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5471 goto out;
5472 }
5473
5474 sarg.sbp = &sb;
5475 sarg.is_referral = B_FALSE;
5476 nfs4_ntov_table_init(&ntov);
5477 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5478 &sarg, &ntov, NFS4ATTR_VERIT);
5479 if (resp->status != NFS4_OK) {
5480 /*
5481 * do_rfs4_set_attrs will try to verify systemwide attrs,
5482 * so could return -1 for "no match".
5483 */
5484 if (resp->status == -1)
5485 resp->status = NFS4ERR_NOT_SAME;
5486 goto done;
5487 }
5488 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5489 switch (error) {
5490 case 0:
5491 resp->status = NFS4_OK;
5492 break;
5493 case -1:
5494 resp->status = NFS4ERR_NOT_SAME;
5495 break;
5496 default:
5497 resp->status = puterrno4(error);
5498 break;
5499 }
5500 done:
5501 *cs->statusp = resp->status;
5502 nfs4_ntov_table_free(&ntov, &sarg);
5503 out:
5504 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5505 VERIFY4res *, resp);
5506 }
5507
5508 /* ARGSUSED */
5509 static void
5510 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5511 struct compound_state *cs)
5512 {
5513 /*
5514 * verify and nverify are exactly the same, except that nverify
5515 * succeeds when some argument changed, and verify succeeds when
5516 * when none changed.
5517 */
5518
5519 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5520 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5521
5522 int error;
5523 struct nfs4_svgetit_arg sarg;
5524 struct statvfs64 sb;
5525 struct nfs4_ntov_table ntov;
5526
5527 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5528 NVERIFY4args *, args);
5529
5530 if (cs->vp == NULL) {
5531 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5532 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533 NVERIFY4res *, resp);
5534 return;
5535 }
5536 sarg.sbp = &sb;
5537 sarg.is_referral = B_FALSE;
5538 nfs4_ntov_table_init(&ntov);
5539 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5540 &sarg, &ntov, NFS4ATTR_VERIT);
5541 if (resp->status != NFS4_OK) {
5542 /*
5543 * do_rfs4_set_attrs will try to verify systemwide attrs,
5544 * so could return -1 for "no match".
5545 */
5546 if (resp->status == -1)
5547 resp->status = NFS4_OK;
5548 goto done;
5549 }
5550 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5551 switch (error) {
5552 case 0:
5553 resp->status = NFS4ERR_SAME;
5554 break;
5555 case -1:
5556 resp->status = NFS4_OK;
5557 break;
5558 default:
5559 resp->status = puterrno4(error);
5560 break;
5561 }
5562 done:
5563 *cs->statusp = resp->status;
5564 nfs4_ntov_table_free(&ntov, &sarg);
5565
5566 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5567 NVERIFY4res *, resp);
5568 }
5569
5570 /*
5571 * XXX - This should live in an NFS header file.
5572 */
5573 #define MAX_IOVECS 12
5574
5575 /* ARGSUSED */
5576 static void
5577 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5578 struct compound_state *cs)
5579 {
5580 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5581 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5582 int error;
5583 vnode_t *vp;
5584 struct vattr bva;
5585 u_offset_t rlimit;
5586 struct uio uio;
5587 struct iovec iov[MAX_IOVECS];
5588 struct iovec *iovp;
5589 int iovcnt;
5590 int ioflag;
5591 cred_t *savecred, *cr;
5592 bool_t *deleg = &cs->deleg;
5593 nfsstat4 stat;
5594 int in_crit = 0;
5595 caller_context_t ct;
5596 nfs4_srv_t *nsrv4;
5597
5598 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5599 WRITE4args *, args);
5600
5601 vp = cs->vp;
5602 if (vp == NULL) {
5603 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5604 goto out;
5605 }
5606 if (cs->access == CS_ACCESS_DENIED) {
5607 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5608 goto out;
5609 }
5610
5611 cr = cs->cr;
5612
5613 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5614 deleg, TRUE, &ct)) != NFS4_OK) {
5615 *cs->statusp = resp->status = stat;
5616 goto out;
5617 }
5618
5619 /*
5620 * We have to enter the critical region before calling VOP_RWLOCK
5621 * to avoid a deadlock with ufs.
5622 */
5623 if (nbl_need_check(vp)) {
5624 nbl_start_crit(vp, RW_READER);
5625 in_crit = 1;
5626 if (nbl_conflict(vp, NBL_WRITE,
5627 args->offset, args->data_len, 0, &ct)) {
5628 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5629 goto out;
5630 }
5631 }
5632
5633 bva.va_mask = AT_MODE | AT_UID;
5634 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5635
5636 /*
5637 * If we can't get the attributes, then we can't do the
5638 * right access checking. So, we'll fail the request.
5639 */
5640 if (error) {
5641 *cs->statusp = resp->status = puterrno4(error);
5642 goto out;
5643 }
5644
5645 if (rdonly4(req, cs)) {
5646 *cs->statusp = resp->status = NFS4ERR_ROFS;
5647 goto out;
5648 }
5649
5650 if (vp->v_type != VREG) {
5651 *cs->statusp = resp->status =
5652 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5653 goto out;
5654 }
5655
5656 if (crgetuid(cr) != bva.va_uid &&
5657 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5658 *cs->statusp = resp->status = puterrno4(error);
5659 goto out;
5660 }
5661
5662 if (MANDLOCK(vp, bva.va_mode)) {
5663 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5664 goto out;
5665 }
5666
5667 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5668 if (args->data_len == 0) {
5669 *cs->statusp = resp->status = NFS4_OK;
5670 resp->count = 0;
5671 resp->committed = args->stable;
5672 resp->writeverf = nsrv4->write4verf;
5673 goto out;
5674 }
5675
5676 if (args->mblk != NULL) {
5677 mblk_t *m;
5678 uint_t bytes, round_len;
5679
5680 iovcnt = 0;
5681 bytes = 0;
5682 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5683 for (m = args->mblk;
5684 m != NULL && bytes < round_len;
5685 m = m->b_cont) {
5686 iovcnt++;
5687 bytes += MBLKL(m);
5688 }
5689 #ifdef DEBUG
5690 /* should have ended on an mblk boundary */
5691 if (bytes != round_len) {
5692 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5693 bytes, round_len, args->data_len);
5694 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5695 (void *)args->mblk, (void *)m);
5696 ASSERT(bytes == round_len);
5697 }
5698 #endif
5699 if (iovcnt <= MAX_IOVECS) {
5700 iovp = iov;
5701 } else {
5702 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5703 }
5704 mblk_to_iov(args->mblk, iovcnt, iovp);
5705 } else if (args->rlist != NULL) {
5706 iovcnt = 1;
5707 iovp = iov;
5708 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5709 iovp->iov_len = args->data_len;
5710 } else {
5711 iovcnt = 1;
5712 iovp = iov;
5713 iovp->iov_base = args->data_val;
5714 iovp->iov_len = args->data_len;
5715 }
5716
5717 uio.uio_iov = iovp;
5718 uio.uio_iovcnt = iovcnt;
5719
5720 uio.uio_segflg = UIO_SYSSPACE;
5721 uio.uio_extflg = UIO_COPY_DEFAULT;
5722 uio.uio_loffset = args->offset;
5723 uio.uio_resid = args->data_len;
5724 uio.uio_llimit = curproc->p_fsz_ctl;
5725 rlimit = uio.uio_llimit - args->offset;
5726 if (rlimit < (u_offset_t)uio.uio_resid)
5727 uio.uio_resid = (int)rlimit;
5728
5729 if (args->stable == UNSTABLE4)
5730 ioflag = 0;
5731 else if (args->stable == FILE_SYNC4)
5732 ioflag = FSYNC;
5733 else if (args->stable == DATA_SYNC4)
5734 ioflag = FDSYNC;
5735 else {
5736 if (iovp != iov)
5737 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738 *cs->statusp = resp->status = NFS4ERR_INVAL;
5739 goto out;
5740 }
5741
5742 /*
5743 * We're changing creds because VM may fault and we need
5744 * the cred of the current thread to be used if quota
5745 * checking is enabled.
5746 */
5747 savecred = curthread->t_cred;
5748 curthread->t_cred = cr;
5749 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5750 curthread->t_cred = savecred;
5751
5752 if (iovp != iov)
5753 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5754
5755 if (error) {
5756 *cs->statusp = resp->status = puterrno4(error);
5757 goto out;
5758 }
5759
5760 *cs->statusp = resp->status = NFS4_OK;
5761 resp->count = args->data_len - uio.uio_resid;
5762
5763 if (ioflag == 0)
5764 resp->committed = UNSTABLE4;
5765 else
5766 resp->committed = FILE_SYNC4;
5767
5768 resp->writeverf = nsrv4->write4verf;
5769
5770 out:
5771 if (in_crit)
5772 nbl_end_crit(vp);
5773
5774 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5775 WRITE4res *, resp);
5776 }
5777
5778
5779 /* XXX put in a header file */
5780 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5781
5782 void
5783 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5784 struct svc_req *req, cred_t *cr, int *rv)
5785 {
5786 uint_t i;
5787 struct compound_state cs;
5788 nfs4_srv_t *nsrv4;
5789 nfs_export_t *ne = nfs_get_export();
5790
5791 if (rv != NULL)
5792 *rv = 0;
5793 rfs4_init_compound_state(&cs);
5794 /*
5795 * Form a reply tag by copying over the reqeuest tag.
5796 */
5797 resp->tag.utf8string_val =
5798 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5799 resp->tag.utf8string_len = args->tag.utf8string_len;
5800 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5801 resp->tag.utf8string_len);
5802
5803 cs.statusp = &resp->status;
5804 cs.req = req;
5805 resp->array = NULL;
5806 resp->array_len = 0;
5807
5808 /*
5809 * XXX for now, minorversion should be zero
5810 */
5811 if (args->minorversion != NFS4_MINORVERSION) {
5812 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5813 &cs, COMPOUND4args *, args);
5814 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5815 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5816 &cs, COMPOUND4res *, resp);
5817 return;
5818 }
5819
5820 if (args->array_len == 0) {
5821 resp->status = NFS4_OK;
5822 return;
5823 }
5824
5825 ASSERT(exi == NULL);
5826 ASSERT(cr == NULL);
5827
5828 cr = crget();
5829 ASSERT(cr != NULL);
5830
5831 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5832 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5833 &cs, COMPOUND4args *, args);
5834 crfree(cr);
5835 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5836 &cs, COMPOUND4res *, resp);
5837 svcerr_badcred(req->rq_xprt);
5838 if (rv != NULL)
5839 *rv = 1;
5840 return;
5841 }
5842 resp->array_len = args->array_len;
5843 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5844 KM_SLEEP);
5845
5846 cs.basecr = cr;
5847 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
5848
5849 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5850 COMPOUND4args *, args);
5851
5852 /*
5853 * For now, NFS4 compound processing must be protected by
5854 * exported_lock because it can access more than one exportinfo
5855 * per compound and share/unshare can now change multiple
5856 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5857 * per proc (excluding public exinfo), and exi_count design
5858 * is sufficient to protect concurrent execution of NFS2/3
5859 * ops along with unexport. This lock will be removed as
5860 * part of the NFSv4 phase 2 namespace redesign work.
5861 */
5862 rw_enter(&ne->exported_lock, RW_READER);
5863
5864 /*
5865 * If this is the first compound we've seen, we need to start all
5866 * new instances' grace periods.
5867 */
5868 if (nsrv4->seen_first_compound == 0) {
5869 rfs4_grace_start_new(nsrv4);
5870 /*
5871 * This must be set after rfs4_grace_start_new(), otherwise
5872 * another thread could proceed past here before the former
5873 * is finished.
5874 */
5875 nsrv4->seen_first_compound = 1;
5876 }
5877
5878 for (i = 0; i < args->array_len && cs.cont; i++) {
5879 nfs_argop4 *argop;
5880 nfs_resop4 *resop;
5881 uint_t op;
5882
5883 argop = &args->array[i];
5884 resop = &resp->array[i];
5885 resop->resop = argop->argop;
5886 op = (uint_t)resop->resop;
5887
5888 if (op < rfsv4disp_cnt) {
5889 /*
5890 * Count the individual ops here; NULL and COMPOUND
5891 * are counted in common_dispatch()
5892 */
5893 rfsproccnt_v4_ptr[op].value.ui64++;
5894
5895 NFS4_DEBUG(rfs4_debug > 1,
5896 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5897 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5898 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5899 rfs4_op_string[op], *cs.statusp));
5900 if (*cs.statusp != NFS4_OK)
5901 cs.cont = FALSE;
5902 } else {
5903 /*
5904 * This is effectively dead code since XDR code
5905 * will have already returned BADXDR if op doesn't
5906 * decode to legal value. This only done for a
5907 * day when XDR code doesn't verify v4 opcodes.
5908 */
5909 op = OP_ILLEGAL;
5910 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5911
5912 rfs4_op_illegal(argop, resop, req, &cs);
5913 cs.cont = FALSE;
5914 }
5915
5916 /*
5917 * If not at last op, and if we are to stop, then
5918 * compact the results array.
5919 */
5920 if ((i + 1) < args->array_len && !cs.cont) {
5921 nfs_resop4 *new_res = kmem_alloc(
5922 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5923 bcopy(resp->array,
5924 new_res, (i+1) * sizeof (nfs_resop4));
5925 kmem_free(resp->array,
5926 args->array_len * sizeof (nfs_resop4));
5927
5928 resp->array_len = i + 1;
5929 resp->array = new_res;
5930 }
5931 }
5932
5933 rw_exit(&ne->exported_lock);
5934
5935 /*
5936 * clear exportinfo and vnode fields from compound_state before dtrace
5937 * probe, to avoid tracing residual values for path and share path.
5938 */
5939 if (cs.vp)
5940 VN_RELE(cs.vp);
5941 if (cs.saved_vp)
5942 VN_RELE(cs.saved_vp);
5943 cs.exi = cs.saved_exi = NULL;
5944 cs.vp = cs.saved_vp = NULL;
5945
5946 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5947 COMPOUND4res *, resp);
5948
5949 if (cs.saved_fh.nfs_fh4_val)
5950 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5951
5952 if (cs.basecr)
5953 crfree(cs.basecr);
5954 if (cs.cr)
5955 crfree(cs.cr);
5956 /*
5957 * done with this compound request, free the label
5958 */
5959
5960 if (req->rq_label != NULL) {
5961 kmem_free(req->rq_label, sizeof (bslabel_t));
5962 req->rq_label = NULL;
5963 }
5964 }
5965
5966 /*
5967 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5968 * XXX zero out the tag and array values. Need to investigate why the
5969 * XXX calls occur, but at least prevent the panic for now.
5970 */
5971 void
5972 rfs4_compound_free(COMPOUND4res *resp)
5973 {
5974 uint_t i;
5975
5976 if (resp->tag.utf8string_val) {
5977 UTF8STRING_FREE(resp->tag)
5978 }
5979
5980 for (i = 0; i < resp->array_len; i++) {
5981 nfs_resop4 *resop;
5982 uint_t op;
5983
5984 resop = &resp->array[i];
5985 op = (uint_t)resop->resop;
5986 if (op < rfsv4disp_cnt) {
5987 (*rfsv4disptab[op].dis_resfree)(resop);
5988 }
5989 }
5990 if (resp->array != NULL) {
5991 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5992 }
5993 }
5994
5995 /*
5996 * Process the value of the compound request rpc flags, as a bit-AND
5997 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5998 */
5999 void
6000 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6001 {
6002 int i;
6003 int flag = RPC_ALL;
6004
6005 for (i = 0; flag && i < args->array_len; i++) {
6006 uint_t op;
6007
6008 op = (uint_t)args->array[i].argop;
6009
6010 if (op < rfsv4disp_cnt)
6011 flag &= rfsv4disptab[op].dis_flags;
6012 else
6013 flag = 0;
6014 }
6015 *flagp = flag;
6016 }
6017
6018 nfsstat4
6019 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6020 {
6021 nfsstat4 e;
6022
6023 rfs4_dbe_lock(cp->rc_dbe);
6024
6025 if (cp->rc_sysidt != LM_NOSYSID) {
6026 *sp = cp->rc_sysidt;
6027 e = NFS4_OK;
6028
6029 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6030 *sp = cp->rc_sysidt;
6031 e = NFS4_OK;
6032
6033 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6034 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6035 } else
6036 e = NFS4ERR_DELAY;
6037
6038 rfs4_dbe_unlock(cp->rc_dbe);
6039 return (e);
6040 }
6041
6042 #if defined(DEBUG) && ! defined(lint)
6043 static void lock_print(char *str, int operation, struct flock64 *flk)
6044 {
6045 char *op, *type;
6046
6047 switch (operation) {
6048 case F_GETLK: op = "F_GETLK";
6049 break;
6050 case F_SETLK: op = "F_SETLK";
6051 break;
6052 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6053 break;
6054 default: op = "F_UNKNOWN";
6055 break;
6056 }
6057 switch (flk->l_type) {
6058 case F_UNLCK: type = "F_UNLCK";
6059 break;
6060 case F_RDLCK: type = "F_RDLCK";
6061 break;
6062 case F_WRLCK: type = "F_WRLCK";
6063 break;
6064 default: type = "F_UNKNOWN";
6065 break;
6066 }
6067
6068 ASSERT(flk->l_whence == 0);
6069 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6070 str, op, type, (longlong_t)flk->l_start,
6071 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6072 }
6073
6074 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6075 #else
6076 #define LOCK_PRINT(d, s, t, f)
6077 #endif
6078
6079 /*ARGSUSED*/
6080 static bool_t
6081 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6082 {
6083 return (TRUE);
6084 }
6085
6086 /*
6087 * Look up the pathname using the vp in cs as the directory vnode.
6088 * cs->vp will be the vnode for the file on success
6089 */
6090
6091 static nfsstat4
6092 rfs4_lookup(component4 *component, struct svc_req *req,
6093 struct compound_state *cs)
6094 {
6095 char *nm;
6096 uint32_t len;
6097 nfsstat4 status;
6098 struct sockaddr *ca;
6099 char *name;
6100
6101 if (cs->vp == NULL) {
6102 return (NFS4ERR_NOFILEHANDLE);
6103 }
6104 if (cs->vp->v_type != VDIR) {
6105 return (NFS4ERR_NOTDIR);
6106 }
6107
6108 status = utf8_dir_verify(component);
6109 if (status != NFS4_OK)
6110 return (status);
6111
6112 nm = utf8_to_fn(component, &len, NULL);
6113 if (nm == NULL) {
6114 return (NFS4ERR_INVAL);
6115 }
6116
6117 if (len > MAXNAMELEN) {
6118 kmem_free(nm, len);
6119 return (NFS4ERR_NAMETOOLONG);
6120 }
6121
6122 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6123 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6124 MAXPATHLEN + 1);
6125
6126 if (name == NULL) {
6127 kmem_free(nm, len);
6128 return (NFS4ERR_INVAL);
6129 }
6130
6131 status = do_rfs4_op_lookup(name, req, cs);
6132
6133 if (name != nm)
6134 kmem_free(name, MAXPATHLEN + 1);
6135
6136 kmem_free(nm, len);
6137
6138 return (status);
6139 }
6140
6141 static nfsstat4
6142 rfs4_lookupfile(component4 *component, struct svc_req *req,
6143 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6144 {
6145 nfsstat4 status;
6146 vnode_t *dvp = cs->vp;
6147 vattr_t bva, ava, fva;
6148 int error;
6149
6150 /* Get "before" change value */
6151 bva.va_mask = AT_CTIME|AT_SEQ;
6152 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6153 if (error)
6154 return (puterrno4(error));
6155
6156 /* rfs4_lookup may VN_RELE directory */
6157 VN_HOLD(dvp);
6158
6159 status = rfs4_lookup(component, req, cs);
6160 if (status != NFS4_OK) {
6161 VN_RELE(dvp);
6162 return (status);
6163 }
6164
6165 /*
6166 * Get "after" change value, if it fails, simply return the
6167 * before value.
6168 */
6169 ava.va_mask = AT_CTIME|AT_SEQ;
6170 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6171 ava.va_ctime = bva.va_ctime;
6172 ava.va_seq = 0;
6173 }
6174 VN_RELE(dvp);
6175
6176 /*
6177 * Validate the file is a file
6178 */
6179 fva.va_mask = AT_TYPE|AT_MODE;
6180 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6181 if (error)
6182 return (puterrno4(error));
6183
6184 if (fva.va_type != VREG) {
6185 if (fva.va_type == VDIR)
6186 return (NFS4ERR_ISDIR);
6187 if (fva.va_type == VLNK)
6188 return (NFS4ERR_SYMLINK);
6189 return (NFS4ERR_INVAL);
6190 }
6191
6192 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6193 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6194
6195 /*
6196 * It is undefined if VOP_LOOKUP will change va_seq, so
6197 * cinfo.atomic = TRUE only if we have
6198 * non-zero va_seq's, and they have not changed.
6199 */
6200 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6201 cinfo->atomic = TRUE;
6202 else
6203 cinfo->atomic = FALSE;
6204
6205 /* Check for mandatory locking */
6206 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6207 return (check_open_access(access, cs, req));
6208 }
6209
6210 static nfsstat4
6211 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6212 cred_t *cr, vnode_t **vpp, bool_t *created)
6213 {
6214 int error;
6215 nfsstat4 status = NFS4_OK;
6216 vattr_t va;
6217
6218 tryagain:
6219
6220 /*
6221 * The file open mode used is VWRITE. If the client needs
6222 * some other semantic, then it should do the access checking
6223 * itself. It would have been nice to have the file open mode
6224 * passed as part of the arguments.
6225 */
6226
6227 *created = TRUE;
6228 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6229
6230 if (error) {
6231 *created = FALSE;
6232
6233 /*
6234 * If we got something other than file already exists
6235 * then just return this error. Otherwise, we got
6236 * EEXIST. If we were doing a GUARDED create, then
6237 * just return this error. Otherwise, we need to
6238 * make sure that this wasn't a duplicate of an
6239 * exclusive create request.
6240 *
6241 * The assumption is made that a non-exclusive create
6242 * request will never return EEXIST.
6243 */
6244
6245 if (error != EEXIST || mode == GUARDED4) {
6246 status = puterrno4(error);
6247 return (status);
6248 }
6249 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6250 NULL, NULL, NULL);
6251
6252 if (error) {
6253 /*
6254 * We couldn't find the file that we thought that
6255 * we just created. So, we'll just try creating
6256 * it again.
6257 */
6258 if (error == ENOENT)
6259 goto tryagain;
6260
6261 status = puterrno4(error);
6262 return (status);
6263 }
6264
6265 if (mode == UNCHECKED4) {
6266 /* existing object must be regular file */
6267 if ((*vpp)->v_type != VREG) {
6268 if ((*vpp)->v_type == VDIR)
6269 status = NFS4ERR_ISDIR;
6270 else if ((*vpp)->v_type == VLNK)
6271 status = NFS4ERR_SYMLINK;
6272 else
6273 status = NFS4ERR_INVAL;
6274 VN_RELE(*vpp);
6275 return (status);
6276 }
6277
6278 return (NFS4_OK);
6279 }
6280
6281 /* Check for duplicate request */
6282 va.va_mask = AT_MTIME;
6283 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6284 if (!error) {
6285 /* We found the file */
6286 const timestruc_t *mtime = &vap->va_mtime;
6287
6288 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6289 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6290 /* but its not our creation */
6291 VN_RELE(*vpp);
6292 return (NFS4ERR_EXIST);
6293 }
6294 *created = TRUE; /* retrans of create == created */
6295 return (NFS4_OK);
6296 }
6297 VN_RELE(*vpp);
6298 return (NFS4ERR_EXIST);
6299 }
6300
6301 return (NFS4_OK);
6302 }
6303
6304 static nfsstat4
6305 check_open_access(uint32_t access, struct compound_state *cs,
6306 struct svc_req *req)
6307 {
6308 int error;
6309 vnode_t *vp;
6310 bool_t readonly;
6311 cred_t *cr = cs->cr;
6312
6313 /* For now we don't allow mandatory locking as per V2/V3 */
6314 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6315 return (NFS4ERR_ACCESS);
6316 }
6317
6318 vp = cs->vp;
6319 ASSERT(cr != NULL && vp->v_type == VREG);
6320
6321 /*
6322 * If the file system is exported read only and we are trying
6323 * to open for write, then return NFS4ERR_ROFS
6324 */
6325
6326 readonly = rdonly4(req, cs);
6327
6328 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6329 return (NFS4ERR_ROFS);
6330
6331 if (access & OPEN4_SHARE_ACCESS_READ) {
6332 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6333 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6334 return (NFS4ERR_ACCESS);
6335 }
6336 }
6337
6338 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6339 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6340 if (error)
6341 return (NFS4ERR_ACCESS);
6342 }
6343
6344 return (NFS4_OK);
6345 }
6346
6347 static nfsstat4
6348 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6349 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6350 {
6351 struct nfs4_svgetit_arg sarg;
6352 struct nfs4_ntov_table ntov;
6353
6354 bool_t ntov_table_init = FALSE;
6355 struct statvfs64 sb;
6356 nfsstat4 status;
6357 vnode_t *vp;
6358 vattr_t bva, ava, iva, cva, *vap;
6359 vnode_t *dvp;
6360 timespec32_t *mtime;
6361 char *nm = NULL;
6362 uint_t buflen;
6363 bool_t created;
6364 bool_t setsize = FALSE;
6365 len_t reqsize;
6366 int error;
6367 bool_t trunc;
6368 caller_context_t ct;
6369 component4 *component;
6370 bslabel_t *clabel;
6371 struct sockaddr *ca;
6372 char *name = NULL;
6373
6374 sarg.sbp = &sb;
6375 sarg.is_referral = B_FALSE;
6376
6377 dvp = cs->vp;
6378
6379 /* Check if the file system is read only */
6380 if (rdonly4(req, cs))
6381 return (NFS4ERR_ROFS);
6382
6383 /* check the label of including directory */
6384 if (is_system_labeled()) {
6385 ASSERT(req->rq_label != NULL);
6386 clabel = req->rq_label;
6387 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6388 "got client label from request(1)",
6389 struct svc_req *, req);
6390 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6391 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6392 cs->exi)) {
6393 return (NFS4ERR_ACCESS);
6394 }
6395 }
6396 }
6397
6398 /*
6399 * Get the last component of path name in nm. cs will reference
6400 * the including directory on success.
6401 */
6402 component = &args->open_claim4_u.file;
6403 status = utf8_dir_verify(component);
6404 if (status != NFS4_OK)
6405 return (status);
6406
6407 nm = utf8_to_fn(component, &buflen, NULL);
6408
6409 if (nm == NULL)
6410 return (NFS4ERR_RESOURCE);
6411
6412 if (buflen > MAXNAMELEN) {
6413 kmem_free(nm, buflen);
6414 return (NFS4ERR_NAMETOOLONG);
6415 }
6416
6417 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6418 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6419 if (error) {
6420 kmem_free(nm, buflen);
6421 return (puterrno4(error));
6422 }
6423
6424 if (bva.va_type != VDIR) {
6425 kmem_free(nm, buflen);
6426 return (NFS4ERR_NOTDIR);
6427 }
6428
6429 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6430
6431 switch (args->mode) {
6432 case GUARDED4:
6433 /*FALLTHROUGH*/
6434 case UNCHECKED4:
6435 nfs4_ntov_table_init(&ntov);
6436 ntov_table_init = TRUE;
6437
6438 *attrset = 0;
6439 status = do_rfs4_set_attrs(attrset,
6440 &args->createhow4_u.createattrs,
6441 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6442
6443 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6444 sarg.vap->va_type != VREG) {
6445 if (sarg.vap->va_type == VDIR)
6446 status = NFS4ERR_ISDIR;
6447 else if (sarg.vap->va_type == VLNK)
6448 status = NFS4ERR_SYMLINK;
6449 else
6450 status = NFS4ERR_INVAL;
6451 }
6452
6453 if (status != NFS4_OK) {
6454 kmem_free(nm, buflen);
6455 nfs4_ntov_table_free(&ntov, &sarg);
6456 *attrset = 0;
6457 return (status);
6458 }
6459
6460 vap = sarg.vap;
6461 vap->va_type = VREG;
6462 vap->va_mask |= AT_TYPE;
6463
6464 if ((vap->va_mask & AT_MODE) == 0) {
6465 vap->va_mask |= AT_MODE;
6466 vap->va_mode = (mode_t)0600;
6467 }
6468
6469 if (vap->va_mask & AT_SIZE) {
6470
6471 /* Disallow create with a non-zero size */
6472
6473 if ((reqsize = sarg.vap->va_size) != 0) {
6474 kmem_free(nm, buflen);
6475 nfs4_ntov_table_free(&ntov, &sarg);
6476 *attrset = 0;
6477 return (NFS4ERR_INVAL);
6478 }
6479 setsize = TRUE;
6480 }
6481 break;
6482
6483 case EXCLUSIVE4:
6484 /* prohibit EXCL create of named attributes */
6485 if (dvp->v_flag & V_XATTRDIR) {
6486 kmem_free(nm, buflen);
6487 *attrset = 0;
6488 return (NFS4ERR_INVAL);
6489 }
6490
6491 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6492 cva.va_type = VREG;
6493 /*
6494 * Ensure no time overflows. Assumes underlying
6495 * filesystem supports at least 32 bits.
6496 * Truncate nsec to usec resolution to allow valid
6497 * compares even if the underlying filesystem truncates.
6498 */
6499 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6500 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6501 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6502 cva.va_mode = (mode_t)0;
6503 vap = &cva;
6504
6505 /*
6506 * For EXCL create, attrset is set to the server attr
6507 * used to cache the client's verifier.
6508 */
6509 *attrset = FATTR4_TIME_MODIFY_MASK;
6510 break;
6511 }
6512
6513 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6514 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6515 MAXPATHLEN + 1);
6516
6517 if (name == NULL) {
6518 kmem_free(nm, buflen);
6519 return (NFS4ERR_SERVERFAULT);
6520 }
6521
6522 status = create_vnode(dvp, name, vap, args->mode,
6523 cs->cr, &vp, &created);
6524 if (nm != name)
6525 kmem_free(name, MAXPATHLEN + 1);
6526 kmem_free(nm, buflen);
6527
6528 if (status != NFS4_OK) {
6529 if (ntov_table_init)
6530 nfs4_ntov_table_free(&ntov, &sarg);
6531 *attrset = 0;
6532 return (status);
6533 }
6534
6535 trunc = (setsize && !created);
6536
6537 if (args->mode != EXCLUSIVE4) {
6538 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6539
6540 /*
6541 * True verification that object was created with correct
6542 * attrs is impossible. The attrs could have been changed
6543 * immediately after object creation. If attributes did
6544 * not verify, the only recourse for the server is to
6545 * destroy the object. Maybe if some attrs (like gid)
6546 * are set incorrectly, the object should be destroyed;
6547 * however, seems bad as a default policy. Do we really
6548 * want to destroy an object over one of the times not
6549 * verifying correctly? For these reasons, the server
6550 * currently sets bits in attrset for createattrs
6551 * that were set; however, no verification is done.
6552 *
6553 * vmask_to_nmask accounts for vattr bits set on create
6554 * [do_rfs4_set_attrs() only sets resp bits for
6555 * non-vattr/vfs bits.]
6556 * Mask off any bits we set by default so as not to return
6557 * more attrset bits than were requested in createattrs
6558 */
6559 if (created) {
6560 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6561 *attrset &= createmask;
6562 } else {
6563 /*
6564 * We did not create the vnode (we tried but it
6565 * already existed). In this case, the only createattr
6566 * that the spec allows the server to set is size,
6567 * and even then, it can only be set if it is 0.
6568 */
6569 *attrset = 0;
6570 if (trunc)
6571 *attrset = FATTR4_SIZE_MASK;
6572 }
6573 }
6574 if (ntov_table_init)
6575 nfs4_ntov_table_free(&ntov, &sarg);
6576
6577 /*
6578 * Get the initial "after" sequence number, if it fails,
6579 * set to zero, time to before.
6580 */
6581 iva.va_mask = AT_CTIME|AT_SEQ;
6582 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6583 iva.va_seq = 0;
6584 iva.va_ctime = bva.va_ctime;
6585 }
6586
6587 /*
6588 * create_vnode attempts to create the file exclusive,
6589 * if it already exists the VOP_CREATE will fail and
6590 * may not increase va_seq. It is atomic if
6591 * we haven't changed the directory, but if it has changed
6592 * we don't know what changed it.
6593 */
6594 if (!created) {
6595 if (bva.va_seq && iva.va_seq &&
6596 bva.va_seq == iva.va_seq)
6597 cinfo->atomic = TRUE;
6598 else
6599 cinfo->atomic = FALSE;
6600 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6601 } else {
6602 /*
6603 * The entry was created, we need to sync the
6604 * directory metadata.
6605 */
6606 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6607
6608 /*
6609 * Get "after" change value, if it fails, simply return the
6610 * before value.
6611 */
6612 ava.va_mask = AT_CTIME|AT_SEQ;
6613 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6614 ava.va_ctime = bva.va_ctime;
6615 ava.va_seq = 0;
6616 }
6617
6618 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6619
6620 /*
6621 * The cinfo->atomic = TRUE only if we have
6622 * non-zero va_seq's, and it has incremented by exactly one
6623 * during the create_vnode and it didn't
6624 * change during the VOP_FSYNC.
6625 */
6626 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6627 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6628 cinfo->atomic = TRUE;
6629 else
6630 cinfo->atomic = FALSE;
6631 }
6632
6633 /* Check for mandatory locking and that the size gets set. */
6634 cva.va_mask = AT_MODE;
6635 if (setsize)
6636 cva.va_mask |= AT_SIZE;
6637
6638 /* Assume the worst */
6639 cs->mandlock = TRUE;
6640
6641 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6642 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6643
6644 /*
6645 * Truncate the file if necessary; this would be
6646 * the case for create over an existing file.
6647 */
6648
6649 if (trunc) {
6650 int in_crit = 0;
6651 rfs4_file_t *fp;
6652 nfs4_srv_t *nsrv4;
6653 bool_t create = FALSE;
6654
6655 /*
6656 * We are writing over an existing file.
6657 * Check to see if we need to recall a delegation.
6658 */
6659 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
6660 rfs4_hold_deleg_policy(nsrv4);
6661 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6662 if (rfs4_check_delegated_byfp(FWRITE, fp,
6663 (reqsize == 0), FALSE, FALSE, &clientid)) {
6664 rfs4_file_rele(fp);
6665 rfs4_rele_deleg_policy(nsrv4);
6666 VN_RELE(vp);
6667 *attrset = 0;
6668 return (NFS4ERR_DELAY);
6669 }
6670 rfs4_file_rele(fp);
6671 }
6672 rfs4_rele_deleg_policy(nsrv4);
6673
6674 if (nbl_need_check(vp)) {
6675 in_crit = 1;
6676
6677 ASSERT(reqsize == 0);
6678
6679 nbl_start_crit(vp, RW_READER);
6680 if (nbl_conflict(vp, NBL_WRITE, 0,
6681 cva.va_size, 0, NULL)) {
6682 in_crit = 0;
6683 nbl_end_crit(vp);
6684 VN_RELE(vp);
6685 *attrset = 0;
6686 return (NFS4ERR_ACCESS);
6687 }
6688 }
6689 ct.cc_sysid = 0;
6690 ct.cc_pid = 0;
6691 ct.cc_caller_id = nfs4_srv_caller_id;
6692 ct.cc_flags = CC_DONTBLOCK;
6693
6694 cva.va_mask = AT_SIZE;
6695 cva.va_size = reqsize;
6696 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6697 if (in_crit)
6698 nbl_end_crit(vp);
6699 }
6700 }
6701
6702 error = makefh4(&cs->fh, vp, cs->exi);
6703
6704 /*
6705 * Force modified data and metadata out to stable storage.
6706 */
6707 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6708
6709 if (error) {
6710 VN_RELE(vp);
6711 *attrset = 0;
6712 return (puterrno4(error));
6713 }
6714
6715 /* if parent dir is attrdir, set namedattr fh flag */
6716 if (dvp->v_flag & V_XATTRDIR)
6717 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6718
6719 if (cs->vp)
6720 VN_RELE(cs->vp);
6721
6722 cs->vp = vp;
6723
6724 /*
6725 * if we did not create the file, we will need to check
6726 * the access bits on the file
6727 */
6728
6729 if (!created) {
6730 if (setsize)
6731 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6732 status = check_open_access(args->share_access, cs, req);
6733 if (status != NFS4_OK)
6734 *attrset = 0;
6735 }
6736 return (status);
6737 }
6738
6739 /*ARGSUSED*/
6740 static void
6741 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6742 rfs4_openowner_t *oo, delegreq_t deleg,
6743 uint32_t access, uint32_t deny,
6744 OPEN4res *resp, int deleg_cur)
6745 {
6746 /* XXX Currently not using req */
6747 rfs4_state_t *sp;
6748 rfs4_file_t *fp;
6749 bool_t screate = TRUE;
6750 bool_t fcreate = TRUE;
6751 uint32_t open_a, share_a;
6752 uint32_t open_d, share_d;
6753 rfs4_deleg_state_t *dsp;
6754 sysid_t sysid;
6755 nfsstat4 status;
6756 caller_context_t ct;
6757 int fflags = 0;
6758 int recall = 0;
6759 int err;
6760 int first_open;
6761
6762 /* get the file struct and hold a lock on it during initial open */
6763 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6764 if (fp == NULL) {
6765 resp->status = NFS4ERR_RESOURCE;
6766 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6767 return;
6768 }
6769
6770 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6771 if (sp == NULL) {
6772 resp->status = NFS4ERR_RESOURCE;
6773 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6774 /* No need to keep any reference */
6775 rw_exit(&fp->rf_file_rwlock);
6776 rfs4_file_rele(fp);
6777 return;
6778 }
6779
6780 /* try to get the sysid before continuing */
6781 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6782 resp->status = status;
6783 rfs4_file_rele(fp);
6784 /* Not a fully formed open; "close" it */
6785 if (screate == TRUE)
6786 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6787 rfs4_state_rele(sp);
6788 return;
6789 }
6790
6791 /* Calculate the fflags for this OPEN. */
6792 if (access & OPEN4_SHARE_ACCESS_READ)
6793 fflags |= FREAD;
6794 if (access & OPEN4_SHARE_ACCESS_WRITE)
6795 fflags |= FWRITE;
6796
6797 rfs4_dbe_lock(sp->rs_dbe);
6798
6799 /*
6800 * Calculate the new deny and access mode that this open is adding to
6801 * the file for this open owner;
6802 */
6803 open_d = (deny & ~sp->rs_open_deny);
6804 open_a = (access & ~sp->rs_open_access);
6805
6806 /*
6807 * Calculate the new share access and share deny modes that this open
6808 * is adding to the file for this open owner;
6809 */
6810 share_a = (access & ~sp->rs_share_access);
6811 share_d = (deny & ~sp->rs_share_deny);
6812
6813 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6814
6815 /*
6816 * Check to see the client has already sent an open for this
6817 * open owner on this file with the same share/deny modes.
6818 * If so, we don't need to check for a conflict and we don't
6819 * need to add another shrlock. If not, then we need to
6820 * check for conflicts in deny and access before checking for
6821 * conflicts in delegation. We don't want to recall a
6822 * delegation based on an open that will eventually fail based
6823 * on shares modes.
6824 */
6825
6826 if (share_a || share_d) {
6827 if ((err = rfs4_share(sp, access, deny)) != 0) {
6828 rfs4_dbe_unlock(sp->rs_dbe);
6829 resp->status = err;
6830
6831 rfs4_file_rele(fp);
6832 /* Not a fully formed open; "close" it */
6833 if (screate == TRUE)
6834 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6835 rfs4_state_rele(sp);
6836 return;
6837 }
6838 }
6839
6840 rfs4_dbe_lock(fp->rf_dbe);
6841
6842 /*
6843 * Check to see if this file is delegated and if so, if a
6844 * recall needs to be done.
6845 */
6846 if (rfs4_check_recall(sp, access)) {
6847 rfs4_dbe_unlock(fp->rf_dbe);
6848 rfs4_dbe_unlock(sp->rs_dbe);
6849 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6850 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6851 rfs4_dbe_lock(sp->rs_dbe);
6852
6853 /* if state closed while lock was dropped */
6854 if (sp->rs_closed) {
6855 if (share_a || share_d)
6856 (void) rfs4_unshare(sp);
6857 rfs4_dbe_unlock(sp->rs_dbe);
6858 rfs4_file_rele(fp);
6859 /* Not a fully formed open; "close" it */
6860 if (screate == TRUE)
6861 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6862 rfs4_state_rele(sp);
6863 resp->status = NFS4ERR_OLD_STATEID;
6864 return;
6865 }
6866
6867 rfs4_dbe_lock(fp->rf_dbe);
6868 /* Let's see if the delegation was returned */
6869 if (rfs4_check_recall(sp, access)) {
6870 rfs4_dbe_unlock(fp->rf_dbe);
6871 if (share_a || share_d)
6872 (void) rfs4_unshare(sp);
6873 rfs4_dbe_unlock(sp->rs_dbe);
6874 rfs4_file_rele(fp);
6875 rfs4_update_lease(sp->rs_owner->ro_client);
6876
6877 /* Not a fully formed open; "close" it */
6878 if (screate == TRUE)
6879 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6880 rfs4_state_rele(sp);
6881 resp->status = NFS4ERR_DELAY;
6882 return;
6883 }
6884 }
6885 /*
6886 * the share check passed and any delegation conflict has been
6887 * taken care of, now call vop_open.
6888 * if this is the first open then call vop_open with fflags.
6889 * if not, call vn_open_upgrade with just the upgrade flags.
6890 *
6891 * if the file has been opened already, it will have the current
6892 * access mode in the state struct. if it has no share access, then
6893 * this is a new open.
6894 *
6895 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6896 * call VOP_OPEN(), just do the open upgrade.
6897 */
6898 if (first_open && !deleg_cur) {
6899 ct.cc_sysid = sysid;
6900 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6901 ct.cc_caller_id = nfs4_srv_caller_id;
6902 ct.cc_flags = CC_DONTBLOCK;
6903 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6904 if (err) {
6905 rfs4_dbe_unlock(fp->rf_dbe);
6906 if (share_a || share_d)
6907 (void) rfs4_unshare(sp);
6908 rfs4_dbe_unlock(sp->rs_dbe);
6909 rfs4_file_rele(fp);
6910
6911 /* Not a fully formed open; "close" it */
6912 if (screate == TRUE)
6913 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6914 rfs4_state_rele(sp);
6915 /* check if a monitor detected a delegation conflict */
6916 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6917 resp->status = NFS4ERR_DELAY;
6918 else
6919 resp->status = NFS4ERR_SERVERFAULT;
6920 return;
6921 }
6922 } else { /* open upgrade */
6923 /*
6924 * calculate the fflags for the new mode that is being added
6925 * by this upgrade.
6926 */
6927 fflags = 0;
6928 if (open_a & OPEN4_SHARE_ACCESS_READ)
6929 fflags |= FREAD;
6930 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6931 fflags |= FWRITE;
6932 vn_open_upgrade(cs->vp, fflags);
6933 }
6934 sp->rs_open_access |= access;
6935 sp->rs_open_deny |= deny;
6936
6937 if (open_d & OPEN4_SHARE_DENY_READ)
6938 fp->rf_deny_read++;
6939 if (open_d & OPEN4_SHARE_DENY_WRITE)
6940 fp->rf_deny_write++;
6941 fp->rf_share_deny |= deny;
6942
6943 if (open_a & OPEN4_SHARE_ACCESS_READ)
6944 fp->rf_access_read++;
6945 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6946 fp->rf_access_write++;
6947 fp->rf_share_access |= access;
6948
6949 /*
6950 * Check for delegation here. if the deleg argument is not
6951 * DELEG_ANY, then this is a reclaim from a client and
6952 * we must honor the delegation requested. If necessary we can
6953 * set the recall flag.
6954 */
6955
6956 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6957
6958 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6959
6960 next_stateid(&sp->rs_stateid);
6961
6962 resp->stateid = sp->rs_stateid.stateid;
6963
6964 rfs4_dbe_unlock(fp->rf_dbe);
6965 rfs4_dbe_unlock(sp->rs_dbe);
6966
6967 if (dsp) {
6968 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6969 rfs4_deleg_state_rele(dsp);
6970 }
6971
6972 rfs4_file_rele(fp);
6973 rfs4_state_rele(sp);
6974
6975 resp->status = NFS4_OK;
6976 }
6977
6978 /*ARGSUSED*/
6979 static void
6980 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6981 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6982 {
6983 change_info4 *cinfo = &resp->cinfo;
6984 bitmap4 *attrset = &resp->attrset;
6985
6986 if (args->opentype == OPEN4_NOCREATE)
6987 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6988 req, cs, args->share_access, cinfo);
6989 else {
6990 /* inhibit delegation grants during exclusive create */
6991
6992 if (args->mode == EXCLUSIVE4)
6993 rfs4_disable_delegation();
6994
6995 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6996 oo->ro_client->rc_clientid);
6997 }
6998
6999 if (resp->status == NFS4_OK) {
7000
7001 /* cs->vp cs->fh now reference the desired file */
7002
7003 rfs4_do_open(cs, req, oo,
7004 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7005 args->share_access, args->share_deny, resp, 0);
7006
7007 /*
7008 * If rfs4_createfile set attrset, we must
7009 * clear this attrset before the response is copied.
7010 */
7011 if (resp->status != NFS4_OK && resp->attrset) {
7012 resp->attrset = 0;
7013 }
7014 }
7015 else
7016 *cs->statusp = resp->status;
7017
7018 if (args->mode == EXCLUSIVE4)
7019 rfs4_enable_delegation();
7020 }
7021
7022 /*ARGSUSED*/
7023 static void
7024 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7025 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7026 {
7027 change_info4 *cinfo = &resp->cinfo;
7028 vattr_t va;
7029 vtype_t v_type = cs->vp->v_type;
7030 int error = 0;
7031
7032 /* Verify that we have a regular file */
7033 if (v_type != VREG) {
7034 if (v_type == VDIR)
7035 resp->status = NFS4ERR_ISDIR;
7036 else if (v_type == VLNK)
7037 resp->status = NFS4ERR_SYMLINK;
7038 else
7039 resp->status = NFS4ERR_INVAL;
7040 return;
7041 }
7042
7043 va.va_mask = AT_MODE|AT_UID;
7044 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7045 if (error) {
7046 resp->status = puterrno4(error);
7047 return;
7048 }
7049
7050 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7051
7052 /*
7053 * Check if we have access to the file, Note the the file
7054 * could have originally been open UNCHECKED or GUARDED
7055 * with mode bits that will now fail, but there is nothing
7056 * we can really do about that except in the case that the
7057 * owner of the file is the one requesting the open.
7058 */
7059 if (crgetuid(cs->cr) != va.va_uid) {
7060 resp->status = check_open_access(args->share_access, cs, req);
7061 if (resp->status != NFS4_OK) {
7062 return;
7063 }
7064 }
7065
7066 /*
7067 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7068 */
7069 cinfo->before = 0;
7070 cinfo->after = 0;
7071 cinfo->atomic = FALSE;
7072
7073 rfs4_do_open(cs, req, oo,
7074 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7075 args->share_access, args->share_deny, resp, 0);
7076 }
7077
7078 static void
7079 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7080 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7081 {
7082 int error;
7083 nfsstat4 status;
7084 stateid4 stateid =
7085 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7086 rfs4_deleg_state_t *dsp;
7087
7088 /*
7089 * Find the state info from the stateid and confirm that the
7090 * file is delegated. If the state openowner is the same as
7091 * the supplied openowner we're done. If not, get the file
7092 * info from the found state info. Use that file info to
7093 * create the state for this lock owner. Note solaris doen't
7094 * really need the pathname to find the file. We may want to
7095 * lookup the pathname and make sure that the vp exist and
7096 * matches the vp in the file structure. However it is
7097 * possible that the pathname nolonger exists (local process
7098 * unlinks the file), so this may not be that useful.
7099 */
7100
7101 status = rfs4_get_deleg_state(&stateid, &dsp);
7102 if (status != NFS4_OK) {
7103 resp->status = status;
7104 return;
7105 }
7106
7107 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7108
7109 /*
7110 * New lock owner, create state. Since this was probably called
7111 * in response to a CB_RECALL we set deleg to DELEG_NONE
7112 */
7113
7114 ASSERT(cs->vp != NULL);
7115 VN_RELE(cs->vp);
7116 VN_HOLD(dsp->rds_finfo->rf_vp);
7117 cs->vp = dsp->rds_finfo->rf_vp;
7118
7119 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7120 rfs4_deleg_state_rele(dsp);
7121 *cs->statusp = resp->status = puterrno4(error);
7122 return;
7123 }
7124
7125 /* Mark progress for delegation returns */
7126 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7127 rfs4_deleg_state_rele(dsp);
7128 rfs4_do_open(cs, req, oo, DELEG_NONE,
7129 args->share_access, args->share_deny, resp, 1);
7130 }
7131
7132 /*ARGSUSED*/
7133 static void
7134 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7135 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7136 {
7137 /*
7138 * Lookup the pathname, it must already exist since this file
7139 * was delegated.
7140 *
7141 * Find the file and state info for this vp and open owner pair.
7142 * check that they are in fact delegated.
7143 * check that the state access and deny modes are the same.
7144 *
7145 * Return the delgation possibly seting the recall flag.
7146 */
7147 rfs4_file_t *fp;
7148 rfs4_state_t *sp;
7149 bool_t create = FALSE;
7150 bool_t dcreate = FALSE;
7151 rfs4_deleg_state_t *dsp;
7152 nfsace4 *ace;
7153
7154 /* Note we ignore oflags */
7155 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7156 req, cs, args->share_access, &resp->cinfo);
7157
7158 if (resp->status != NFS4_OK) {
7159 return;
7160 }
7161
7162 /* get the file struct and hold a lock on it during initial open */
7163 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7164 if (fp == NULL) {
7165 resp->status = NFS4ERR_RESOURCE;
7166 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7167 return;
7168 }
7169
7170 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7171 if (sp == NULL) {
7172 resp->status = NFS4ERR_SERVERFAULT;
7173 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7174 rw_exit(&fp->rf_file_rwlock);
7175 rfs4_file_rele(fp);
7176 return;
7177 }
7178
7179 rfs4_dbe_lock(sp->rs_dbe);
7180 rfs4_dbe_lock(fp->rf_dbe);
7181 if (args->share_access != sp->rs_share_access ||
7182 args->share_deny != sp->rs_share_deny ||
7183 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7184 NFS4_DEBUG(rfs4_debug,
7185 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7186 rfs4_dbe_unlock(fp->rf_dbe);
7187 rfs4_dbe_unlock(sp->rs_dbe);
7188 rfs4_file_rele(fp);
7189 rfs4_state_rele(sp);
7190 resp->status = NFS4ERR_SERVERFAULT;
7191 return;
7192 }
7193 rfs4_dbe_unlock(fp->rf_dbe);
7194 rfs4_dbe_unlock(sp->rs_dbe);
7195
7196 dsp = rfs4_finddeleg(sp, &dcreate);
7197 if (dsp == NULL) {
7198 rfs4_state_rele(sp);
7199 rfs4_file_rele(fp);
7200 resp->status = NFS4ERR_SERVERFAULT;
7201 return;
7202 }
7203
7204 next_stateid(&sp->rs_stateid);
7205
7206 resp->stateid = sp->rs_stateid.stateid;
7207
7208 resp->delegation.delegation_type = dsp->rds_dtype;
7209
7210 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7211 open_read_delegation4 *rv =
7212 &resp->delegation.open_delegation4_u.read;
7213
7214 rv->stateid = dsp->rds_delegid.stateid;
7215 rv->recall = FALSE; /* no policy in place to set to TRUE */
7216 ace = &rv->permissions;
7217 } else {
7218 open_write_delegation4 *rv =
7219 &resp->delegation.open_delegation4_u.write;
7220
7221 rv->stateid = dsp->rds_delegid.stateid;
7222 rv->recall = FALSE; /* no policy in place to set to TRUE */
7223 ace = &rv->permissions;
7224 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7225 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7226 }
7227
7228 /* XXX For now */
7229 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7230 ace->flag = 0;
7231 ace->access_mask = 0;
7232 ace->who.utf8string_len = 0;
7233 ace->who.utf8string_val = 0;
7234
7235 rfs4_deleg_state_rele(dsp);
7236 rfs4_state_rele(sp);
7237 rfs4_file_rele(fp);
7238 }
7239
7240 typedef enum {
7241 NFS4_CHKSEQ_OKAY = 0,
7242 NFS4_CHKSEQ_REPLAY = 1,
7243 NFS4_CHKSEQ_BAD = 2
7244 } rfs4_chkseq_t;
7245
7246 /*
7247 * Generic function for sequence number checks.
7248 */
7249 static rfs4_chkseq_t
7250 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7251 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7252 {
7253 /* Same sequence ids and matching operations? */
7254 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7255 if (copyres == TRUE) {
7256 rfs4_free_reply(resop);
7257 rfs4_copy_reply(resop, lastop);
7258 }
7259 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7260 "Replayed SEQID %d\n", seqid));
7261 return (NFS4_CHKSEQ_REPLAY);
7262 }
7263
7264 /* If the incoming sequence is not the next expected then it is bad */
7265 if (rqst_seq != seqid + 1) {
7266 if (rqst_seq == seqid) {
7267 NFS4_DEBUG(rfs4_debug,
7268 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7269 "but last op was %d current op is %d\n",
7270 lastop->resop, resop->resop));
7271 return (NFS4_CHKSEQ_BAD);
7272 }
7273 NFS4_DEBUG(rfs4_debug,
7274 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7275 rqst_seq, seqid));
7276 return (NFS4_CHKSEQ_BAD);
7277 }
7278
7279 /* Everything okay -- next expected */
7280 return (NFS4_CHKSEQ_OKAY);
7281 }
7282
7283
7284 static rfs4_chkseq_t
7285 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7286 {
7287 rfs4_chkseq_t rc;
7288
7289 rfs4_dbe_lock(op->ro_dbe);
7290 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7291 TRUE);
7292 rfs4_dbe_unlock(op->ro_dbe);
7293
7294 if (rc == NFS4_CHKSEQ_OKAY)
7295 rfs4_update_lease(op->ro_client);
7296
7297 return (rc);
7298 }
7299
7300 static rfs4_chkseq_t
7301 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7302 {
7303 rfs4_chkseq_t rc;
7304
7305 rfs4_dbe_lock(op->ro_dbe);
7306 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7307 olo_seqid, resop, FALSE);
7308 rfs4_dbe_unlock(op->ro_dbe);
7309
7310 return (rc);
7311 }
7312
7313 static rfs4_chkseq_t
7314 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7315 {
7316 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7317
7318 rfs4_dbe_lock(lsp->rls_dbe);
7319 if (!lsp->rls_skip_seqid_check)
7320 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7321 resop, TRUE);
7322 rfs4_dbe_unlock(lsp->rls_dbe);
7323
7324 return (rc);
7325 }
7326
7327 static void
7328 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7329 struct svc_req *req, struct compound_state *cs)
7330 {
7331 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7332 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7333 open_owner4 *owner = &args->owner;
7334 open_claim_type4 claim = args->claim;
7335 rfs4_client_t *cp;
7336 rfs4_openowner_t *oo;
7337 bool_t create;
7338 bool_t replay = FALSE;
7339 int can_reclaim;
7340
7341 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7342 OPEN4args *, args);
7343
7344 if (cs->vp == NULL) {
7345 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7346 goto end;
7347 }
7348
7349 /*
7350 * Need to check clientid and lease expiration first based on
7351 * error ordering and incrementing sequence id.
7352 */
7353 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7354 if (cp == NULL) {
7355 *cs->statusp = resp->status =
7356 rfs4_check_clientid(&owner->clientid, 0);
7357 goto end;
7358 }
7359
7360 if (rfs4_lease_expired(cp)) {
7361 rfs4_client_close(cp);
7362 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7363 goto end;
7364 }
7365 can_reclaim = cp->rc_can_reclaim;
7366
7367 /*
7368 * Find the open_owner for use from this point forward. Take
7369 * care in updating the sequence id based on the type of error
7370 * being returned.
7371 */
7372 retry:
7373 create = TRUE;
7374 oo = rfs4_findopenowner(owner, &create, args->seqid);
7375 if (oo == NULL) {
7376 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7377 rfs4_client_rele(cp);
7378 goto end;
7379 }
7380
7381 /* Hold off access to the sequence space while the open is done */
7382 rfs4_sw_enter(&oo->ro_sw);
7383
7384 /*
7385 * If the open_owner existed before at the server, then check
7386 * the sequence id.
7387 */
7388 if (!create && !oo->ro_postpone_confirm) {
7389 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7390 case NFS4_CHKSEQ_BAD:
7391 if ((args->seqid > oo->ro_open_seqid) &&
7392 oo->ro_need_confirm) {
7393 rfs4_free_opens(oo, TRUE, FALSE);
7394 rfs4_sw_exit(&oo->ro_sw);
7395 rfs4_openowner_rele(oo);
7396 goto retry;
7397 }
7398 resp->status = NFS4ERR_BAD_SEQID;
7399 goto out;
7400 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7401 replay = TRUE;
7402 goto out;
7403 default:
7404 break;
7405 }
7406
7407 /*
7408 * Sequence was ok and open owner exists
7409 * check to see if we have yet to see an
7410 * open_confirm.
7411 */
7412 if (oo->ro_need_confirm) {
7413 rfs4_free_opens(oo, TRUE, FALSE);
7414 rfs4_sw_exit(&oo->ro_sw);
7415 rfs4_openowner_rele(oo);
7416 goto retry;
7417 }
7418 }
7419 /* Grace only applies to regular-type OPENs */
7420 if (rfs4_clnt_in_grace(cp) &&
7421 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7422 *cs->statusp = resp->status = NFS4ERR_GRACE;
7423 goto out;
7424 }
7425
7426 /*
7427 * If previous state at the server existed then can_reclaim
7428 * will be set. If not reply NFS4ERR_NO_GRACE to the
7429 * client.
7430 */
7431 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7432 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7433 goto out;
7434 }
7435
7436
7437 /*
7438 * Reject the open if the client has missed the grace period
7439 */
7440 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7441 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7442 goto out;
7443 }
7444
7445 /* Couple of up-front bookkeeping items */
7446 if (oo->ro_need_confirm) {
7447 /*
7448 * If this is a reclaim OPEN then we should not ask
7449 * for a confirmation of the open_owner per the
7450 * protocol specification.
7451 */
7452 if (claim == CLAIM_PREVIOUS)
7453 oo->ro_need_confirm = FALSE;
7454 else
7455 resp->rflags |= OPEN4_RESULT_CONFIRM;
7456 }
7457 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7458
7459 /*
7460 * If there is an unshared filesystem mounted on this vnode,
7461 * do not allow to open/create in this directory.
7462 */
7463 if (vn_ismntpt(cs->vp)) {
7464 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7465 goto out;
7466 }
7467
7468 /*
7469 * access must READ, WRITE, or BOTH. No access is invalid.
7470 * deny can be READ, WRITE, BOTH, or NONE.
7471 * bits not defined for access/deny are invalid.
7472 */
7473 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7474 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7475 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7476 *cs->statusp = resp->status = NFS4ERR_INVAL;
7477 goto out;
7478 }
7479
7480
7481 /*
7482 * make sure attrset is zero before response is built.
7483 */
7484 resp->attrset = 0;
7485
7486 switch (claim) {
7487 case CLAIM_NULL:
7488 rfs4_do_opennull(cs, req, args, oo, resp);
7489 break;
7490 case CLAIM_PREVIOUS:
7491 rfs4_do_openprev(cs, req, args, oo, resp);
7492 break;
7493 case CLAIM_DELEGATE_CUR:
7494 rfs4_do_opendelcur(cs, req, args, oo, resp);
7495 break;
7496 case CLAIM_DELEGATE_PREV:
7497 rfs4_do_opendelprev(cs, req, args, oo, resp);
7498 break;
7499 default:
7500 resp->status = NFS4ERR_INVAL;
7501 break;
7502 }
7503
7504 out:
7505 rfs4_client_rele(cp);
7506
7507 /* Catch sequence id handling here to make it a little easier */
7508 switch (resp->status) {
7509 case NFS4ERR_BADXDR:
7510 case NFS4ERR_BAD_SEQID:
7511 case NFS4ERR_BAD_STATEID:
7512 case NFS4ERR_NOFILEHANDLE:
7513 case NFS4ERR_RESOURCE:
7514 case NFS4ERR_STALE_CLIENTID:
7515 case NFS4ERR_STALE_STATEID:
7516 /*
7517 * The protocol states that if any of these errors are
7518 * being returned, the sequence id should not be
7519 * incremented. Any other return requires an
7520 * increment.
7521 */
7522 break;
7523 default:
7524 /* Always update the lease in this case */
7525 rfs4_update_lease(oo->ro_client);
7526
7527 /* Regular response - copy the result */
7528 if (!replay)
7529 rfs4_update_open_resp(oo, resop, &cs->fh);
7530
7531 /*
7532 * REPLAY case: Only if the previous response was OK
7533 * do we copy the filehandle. If not OK, no
7534 * filehandle to copy.
7535 */
7536 if (replay == TRUE &&
7537 resp->status == NFS4_OK &&
7538 oo->ro_reply_fh.nfs_fh4_val) {
7539 /*
7540 * If this is a replay, we must restore the
7541 * current filehandle/vp to that of what was
7542 * returned originally. Try our best to do
7543 * it.
7544 */
7545 nfs_fh4_fmt_t *fh_fmtp =
7546 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7547
7548 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7549 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7550
7551 if (cs->exi == NULL) {
7552 resp->status = NFS4ERR_STALE;
7553 goto finish;
7554 }
7555
7556 VN_RELE(cs->vp);
7557
7558 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7559 &resp->status);
7560
7561 if (cs->vp == NULL)
7562 goto finish;
7563
7564 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7565 }
7566
7567 /*
7568 * If this was a replay, no need to update the
7569 * sequence id. If the open_owner was not created on
7570 * this pass, then update. The first use of an
7571 * open_owner will not bump the sequence id.
7572 */
7573 if (replay == FALSE && !create)
7574 rfs4_update_open_sequence(oo);
7575 /*
7576 * If the client is receiving an error and the
7577 * open_owner needs to be confirmed, there is no way
7578 * to notify the client of this fact ignoring the fact
7579 * that the server has no method of returning a
7580 * stateid to confirm. Therefore, the server needs to
7581 * mark this open_owner in a way as to avoid the
7582 * sequence id checking the next time the client uses
7583 * this open_owner.
7584 */
7585 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7586 oo->ro_postpone_confirm = TRUE;
7587 /*
7588 * If OK response then clear the postpone flag and
7589 * reset the sequence id to keep in sync with the
7590 * client.
7591 */
7592 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7593 oo->ro_postpone_confirm = FALSE;
7594 oo->ro_open_seqid = args->seqid;
7595 }
7596 break;
7597 }
7598
7599 finish:
7600 *cs->statusp = resp->status;
7601
7602 rfs4_sw_exit(&oo->ro_sw);
7603 rfs4_openowner_rele(oo);
7604
7605 end:
7606 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7607 OPEN4res *, resp);
7608 }
7609
7610 /*ARGSUSED*/
7611 void
7612 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7613 struct svc_req *req, struct compound_state *cs)
7614 {
7615 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7616 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7617 rfs4_state_t *sp;
7618 nfsstat4 status;
7619
7620 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7621 OPEN_CONFIRM4args *, args);
7622
7623 if (cs->vp == NULL) {
7624 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7625 goto out;
7626 }
7627
7628 if (cs->vp->v_type != VREG) {
7629 *cs->statusp = resp->status =
7630 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7631 return;
7632 }
7633
7634 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7635 if (status != NFS4_OK) {
7636 *cs->statusp = resp->status = status;
7637 goto out;
7638 }
7639
7640 /* Ensure specified filehandle matches */
7641 if (cs->vp != sp->rs_finfo->rf_vp) {
7642 rfs4_state_rele(sp);
7643 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7644 goto out;
7645 }
7646
7647 /* hold off other access to open_owner while we tinker */
7648 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7649
7650 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7651 case NFS4_CHECK_STATEID_OKAY:
7652 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7653 resop) != 0) {
7654 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7655 break;
7656 }
7657 /*
7658 * If it is the appropriate stateid and determined to
7659 * be "OKAY" then this means that the stateid does not
7660 * need to be confirmed and the client is in error for
7661 * sending an OPEN_CONFIRM.
7662 */
7663 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7664 break;
7665 case NFS4_CHECK_STATEID_OLD:
7666 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7667 break;
7668 case NFS4_CHECK_STATEID_BAD:
7669 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7670 break;
7671 case NFS4_CHECK_STATEID_EXPIRED:
7672 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7673 break;
7674 case NFS4_CHECK_STATEID_CLOSED:
7675 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7676 break;
7677 case NFS4_CHECK_STATEID_REPLAY:
7678 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7679 resop)) {
7680 case NFS4_CHKSEQ_OKAY:
7681 /*
7682 * This is replayed stateid; if seqid matches
7683 * next expected, then client is using wrong seqid.
7684 */
7685 /* fall through */
7686 case NFS4_CHKSEQ_BAD:
7687 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7688 break;
7689 case NFS4_CHKSEQ_REPLAY:
7690 /*
7691 * Note this case is the duplicate case so
7692 * resp->status is already set.
7693 */
7694 *cs->statusp = resp->status;
7695 rfs4_update_lease(sp->rs_owner->ro_client);
7696 break;
7697 }
7698 break;
7699 case NFS4_CHECK_STATEID_UNCONFIRMED:
7700 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7701 resop) != NFS4_CHKSEQ_OKAY) {
7702 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7703 break;
7704 }
7705 *cs->statusp = resp->status = NFS4_OK;
7706
7707 next_stateid(&sp->rs_stateid);
7708 resp->open_stateid = sp->rs_stateid.stateid;
7709 sp->rs_owner->ro_need_confirm = FALSE;
7710 rfs4_update_lease(sp->rs_owner->ro_client);
7711 rfs4_update_open_sequence(sp->rs_owner);
7712 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7713 break;
7714 default:
7715 ASSERT(FALSE);
7716 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7717 break;
7718 }
7719 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7720 rfs4_state_rele(sp);
7721
7722 out:
7723 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7724 OPEN_CONFIRM4res *, resp);
7725 }
7726
7727 /*ARGSUSED*/
7728 void
7729 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7730 struct svc_req *req, struct compound_state *cs)
7731 {
7732 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7733 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7734 uint32_t access = args->share_access;
7735 uint32_t deny = args->share_deny;
7736 nfsstat4 status;
7737 rfs4_state_t *sp;
7738 rfs4_file_t *fp;
7739 int fflags = 0;
7740
7741 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7742 OPEN_DOWNGRADE4args *, args);
7743
7744 if (cs->vp == NULL) {
7745 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7746 goto out;
7747 }
7748
7749 if (cs->vp->v_type != VREG) {
7750 *cs->statusp = resp->status = NFS4ERR_INVAL;
7751 return;
7752 }
7753
7754 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7755 if (status != NFS4_OK) {
7756 *cs->statusp = resp->status = status;
7757 goto out;
7758 }
7759
7760 /* Ensure specified filehandle matches */
7761 if (cs->vp != sp->rs_finfo->rf_vp) {
7762 rfs4_state_rele(sp);
7763 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7764 goto out;
7765 }
7766
7767 /* hold off other access to open_owner while we tinker */
7768 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7769
7770 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7771 case NFS4_CHECK_STATEID_OKAY:
7772 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7773 resop) != NFS4_CHKSEQ_OKAY) {
7774 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7775 goto end;
7776 }
7777 break;
7778 case NFS4_CHECK_STATEID_OLD:
7779 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7780 goto end;
7781 case NFS4_CHECK_STATEID_BAD:
7782 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7783 goto end;
7784 case NFS4_CHECK_STATEID_EXPIRED:
7785 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7786 goto end;
7787 case NFS4_CHECK_STATEID_CLOSED:
7788 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7789 goto end;
7790 case NFS4_CHECK_STATEID_UNCONFIRMED:
7791 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7792 goto end;
7793 case NFS4_CHECK_STATEID_REPLAY:
7794 /* Check the sequence id for the open owner */
7795 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7796 resop)) {
7797 case NFS4_CHKSEQ_OKAY:
7798 /*
7799 * This is replayed stateid; if seqid matches
7800 * next expected, then client is using wrong seqid.
7801 */
7802 /* fall through */
7803 case NFS4_CHKSEQ_BAD:
7804 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7805 goto end;
7806 case NFS4_CHKSEQ_REPLAY:
7807 /*
7808 * Note this case is the duplicate case so
7809 * resp->status is already set.
7810 */
7811 *cs->statusp = resp->status;
7812 rfs4_update_lease(sp->rs_owner->ro_client);
7813 goto end;
7814 }
7815 break;
7816 default:
7817 ASSERT(FALSE);
7818 break;
7819 }
7820
7821 rfs4_dbe_lock(sp->rs_dbe);
7822 /*
7823 * Check that the new access modes and deny modes are valid.
7824 * Check that no invalid bits are set.
7825 */
7826 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7827 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7828 *cs->statusp = resp->status = NFS4ERR_INVAL;
7829 rfs4_update_open_sequence(sp->rs_owner);
7830 rfs4_dbe_unlock(sp->rs_dbe);
7831 goto end;
7832 }
7833
7834 /*
7835 * The new modes must be a subset of the current modes and
7836 * the access must specify at least one mode. To test that
7837 * the new mode is a subset of the current modes we bitwise
7838 * AND them together and check that the result equals the new
7839 * mode. For example:
7840 * New mode, access == R and current mode, sp->rs_open_access == RW
7841 * access & sp->rs_open_access == R == access, so the new access mode
7842 * is valid. Consider access == RW, sp->rs_open_access = R
7843 * access & sp->rs_open_access == R != access, so the new access mode
7844 * is invalid.
7845 */
7846 if ((access & sp->rs_open_access) != access ||
7847 (deny & sp->rs_open_deny) != deny ||
7848 (access &
7849 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7850 *cs->statusp = resp->status = NFS4ERR_INVAL;
7851 rfs4_update_open_sequence(sp->rs_owner);
7852 rfs4_dbe_unlock(sp->rs_dbe);
7853 goto end;
7854 }
7855
7856 /*
7857 * Release any share locks associated with this stateID.
7858 * Strictly speaking, this violates the spec because the
7859 * spec effectively requires that open downgrade be atomic.
7860 * At present, fs_shrlock does not have this capability.
7861 */
7862 (void) rfs4_unshare(sp);
7863
7864 status = rfs4_share(sp, access, deny);
7865 if (status != NFS4_OK) {
7866 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7867 rfs4_update_open_sequence(sp->rs_owner);
7868 rfs4_dbe_unlock(sp->rs_dbe);
7869 goto end;
7870 }
7871
7872 fp = sp->rs_finfo;
7873 rfs4_dbe_lock(fp->rf_dbe);
7874
7875 /*
7876 * If the current mode has deny read and the new mode
7877 * does not, decrement the number of deny read mode bits
7878 * and if it goes to zero turn off the deny read bit
7879 * on the file.
7880 */
7881 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7882 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7883 fp->rf_deny_read--;
7884 if (fp->rf_deny_read == 0)
7885 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7886 }
7887
7888 /*
7889 * If the current mode has deny write and the new mode
7890 * does not, decrement the number of deny write mode bits
7891 * and if it goes to zero turn off the deny write bit
7892 * on the file.
7893 */
7894 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7895 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7896 fp->rf_deny_write--;
7897 if (fp->rf_deny_write == 0)
7898 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7899 }
7900
7901 /*
7902 * If the current mode has access read and the new mode
7903 * does not, decrement the number of access read mode bits
7904 * and if it goes to zero turn off the access read bit
7905 * on the file. set fflags to FREAD for the call to
7906 * vn_open_downgrade().
7907 */
7908 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7909 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7910 fp->rf_access_read--;
7911 if (fp->rf_access_read == 0)
7912 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7913 fflags |= FREAD;
7914 }
7915
7916 /*
7917 * If the current mode has access write and the new mode
7918 * does not, decrement the number of access write mode bits
7919 * and if it goes to zero turn off the access write bit
7920 * on the file. set fflags to FWRITE for the call to
7921 * vn_open_downgrade().
7922 */
7923 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7924 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7925 fp->rf_access_write--;
7926 if (fp->rf_access_write == 0)
7927 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7928 fflags |= FWRITE;
7929 }
7930
7931 /* Check that the file is still accessible */
7932 ASSERT(fp->rf_share_access);
7933
7934 rfs4_dbe_unlock(fp->rf_dbe);
7935
7936 /* now set the new open access and deny modes */
7937 sp->rs_open_access = access;
7938 sp->rs_open_deny = deny;
7939
7940 /*
7941 * we successfully downgraded the share lock, now we need to downgrade
7942 * the open. it is possible that the downgrade was only for a deny
7943 * mode and we have nothing else to do.
7944 */
7945 if ((fflags & (FREAD|FWRITE)) != 0)
7946 vn_open_downgrade(cs->vp, fflags);
7947
7948 /* Update the stateid */
7949 next_stateid(&sp->rs_stateid);
7950 resp->open_stateid = sp->rs_stateid.stateid;
7951
7952 rfs4_dbe_unlock(sp->rs_dbe);
7953
7954 *cs->statusp = resp->status = NFS4_OK;
7955 /* Update the lease */
7956 rfs4_update_lease(sp->rs_owner->ro_client);
7957 /* And the sequence */
7958 rfs4_update_open_sequence(sp->rs_owner);
7959 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7960
7961 end:
7962 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7963 rfs4_state_rele(sp);
7964 out:
7965 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7966 OPEN_DOWNGRADE4res *, resp);
7967 }
7968
7969 static void *
7970 memstr(const void *s1, const char *s2, size_t n)
7971 {
7972 size_t l = strlen(s2);
7973 char *p = (char *)s1;
7974
7975 while (n >= l) {
7976 if (bcmp(p, s2, l) == 0)
7977 return (p);
7978 p++;
7979 n--;
7980 }
7981
7982 return (NULL);
7983 }
7984
7985 /*
7986 * The logic behind this function is detailed in the NFSv4 RFC in the
7987 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7988 * that section for explicit guidance to server behavior for
7989 * SETCLIENTID.
7990 */
7991 void
7992 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7993 struct svc_req *req, struct compound_state *cs)
7994 {
7995 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7996 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7997 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7998 rfs4_clntip_t *ci;
7999 bool_t create;
8000 char *addr, *netid;
8001 int len;
8002
8003 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8004 SETCLIENTID4args *, args);
8005 retry:
8006 newcp = cp_confirmed = cp_unconfirmed = NULL;
8007
8008 /*
8009 * Save the caller's IP address
8010 */
8011 args->client.cl_addr =
8012 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8013
8014 /*
8015 * Record if it is a Solaris client that cannot handle referrals.
8016 */
8017 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8018 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8019 /* Add a "yes, it's downrev" record */
8020 create = TRUE;
8021 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8022 ASSERT(ci != NULL);
8023 rfs4_dbe_rele(ci->ri_dbe);
8024 } else {
8025 /* Remove any previous record */
8026 rfs4_invalidate_clntip(args->client.cl_addr);
8027 }
8028
8029 /*
8030 * In search of an EXISTING client matching the incoming
8031 * request to establish a new client identifier at the server
8032 */
8033 create = TRUE;
8034 cp = rfs4_findclient(&args->client, &create, NULL);
8035
8036 /* Should never happen */
8037 ASSERT(cp != NULL);
8038
8039 if (cp == NULL) {
8040 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8041 goto out;
8042 }
8043
8044 /*
8045 * Easiest case. Client identifier is newly created and is
8046 * unconfirmed. Also note that for this case, no other
8047 * entries exist for the client identifier. Nothing else to
8048 * check. Just setup the response and respond.
8049 */
8050 if (create) {
8051 *cs->statusp = res->status = NFS4_OK;
8052 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8053 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8054 cp->rc_confirm_verf;
8055 /* Setup callback information; CB_NULL confirmation later */
8056 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8057
8058 rfs4_client_rele(cp);
8059 goto out;
8060 }
8061
8062 /*
8063 * An existing, confirmed client may exist but it may not have
8064 * been active for at least one lease period. If so, then
8065 * "close" the client and create a new client identifier
8066 */
8067 if (rfs4_lease_expired(cp)) {
8068 rfs4_client_close(cp);
8069 goto retry;
8070 }
8071
8072 if (cp->rc_need_confirm == TRUE)
8073 cp_unconfirmed = cp;
8074 else
8075 cp_confirmed = cp;
8076
8077 cp = NULL;
8078
8079 /*
8080 * We have a confirmed client, now check for an
8081 * unconfimred entry
8082 */
8083 if (cp_confirmed) {
8084 /* If creds don't match then client identifier is inuse */
8085 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8086 rfs4_cbinfo_t *cbp;
8087 /*
8088 * Some one else has established this client
8089 * id. Try and say * who they are. We will use
8090 * the call back address supplied by * the
8091 * first client.
8092 */
8093 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8094
8095 addr = netid = NULL;
8096
8097 cbp = &cp_confirmed->rc_cbinfo;
8098 if (cbp->cb_callback.cb_location.r_addr &&
8099 cbp->cb_callback.cb_location.r_netid) {
8100 cb_client4 *cbcp = &cbp->cb_callback;
8101
8102 len = strlen(cbcp->cb_location.r_addr)+1;
8103 addr = kmem_alloc(len, KM_SLEEP);
8104 bcopy(cbcp->cb_location.r_addr, addr, len);
8105 len = strlen(cbcp->cb_location.r_netid)+1;
8106 netid = kmem_alloc(len, KM_SLEEP);
8107 bcopy(cbcp->cb_location.r_netid, netid, len);
8108 }
8109
8110 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8111 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8112
8113 rfs4_client_rele(cp_confirmed);
8114 }
8115
8116 /*
8117 * Confirmed, creds match, and verifier matches; must
8118 * be an update of the callback info
8119 */
8120 if (cp_confirmed->rc_nfs_client.verifier ==
8121 args->client.verifier) {
8122 /* Setup callback information */
8123 rfs4_client_setcb(cp_confirmed, &args->callback,
8124 args->callback_ident);
8125
8126 /* everything okay -- move ahead */
8127 *cs->statusp = res->status = NFS4_OK;
8128 res->SETCLIENTID4res_u.resok4.clientid =
8129 cp_confirmed->rc_clientid;
8130
8131 /* update the confirm_verifier and return it */
8132 rfs4_client_scv_next(cp_confirmed);
8133 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8134 cp_confirmed->rc_confirm_verf;
8135
8136 rfs4_client_rele(cp_confirmed);
8137 goto out;
8138 }
8139
8140 /*
8141 * Creds match but the verifier doesn't. Must search
8142 * for an unconfirmed client that would be replaced by
8143 * this request.
8144 */
8145 create = FALSE;
8146 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8147 cp_confirmed);
8148 }
8149
8150 /*
8151 * At this point, we have taken care of the brand new client
8152 * struct, INUSE case, update of an existing, and confirmed
8153 * client struct.
8154 */
8155
8156 /*
8157 * check to see if things have changed while we originally
8158 * picked up the client struct. If they have, then return and
8159 * retry the processing of this SETCLIENTID request.
8160 */
8161 if (cp_unconfirmed) {
8162 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8163 if (!cp_unconfirmed->rc_need_confirm) {
8164 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8165 rfs4_client_rele(cp_unconfirmed);
8166 if (cp_confirmed)
8167 rfs4_client_rele(cp_confirmed);
8168 goto retry;
8169 }
8170 /* do away with the old unconfirmed one */
8171 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8172 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8173 rfs4_client_rele(cp_unconfirmed);
8174 cp_unconfirmed = NULL;
8175 }
8176
8177 /*
8178 * This search will temporarily hide the confirmed client
8179 * struct while a new client struct is created as the
8180 * unconfirmed one.
8181 */
8182 create = TRUE;
8183 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8184
8185 ASSERT(newcp != NULL);
8186
8187 if (newcp == NULL) {
8188 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8189 rfs4_client_rele(cp_confirmed);
8190 goto out;
8191 }
8192
8193 /*
8194 * If one was not created, then a similar request must be in
8195 * process so release and start over with this one
8196 */
8197 if (create != TRUE) {
8198 rfs4_client_rele(newcp);
8199 if (cp_confirmed)
8200 rfs4_client_rele(cp_confirmed);
8201 goto retry;
8202 }
8203
8204 *cs->statusp = res->status = NFS4_OK;
8205 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8206 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8207 newcp->rc_confirm_verf;
8208 /* Setup callback information; CB_NULL confirmation later */
8209 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8210
8211 newcp->rc_cp_confirmed = cp_confirmed;
8212
8213 rfs4_client_rele(newcp);
8214
8215 out:
8216 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8217 SETCLIENTID4res *, res);
8218 }
8219
8220 /*ARGSUSED*/
8221 void
8222 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8223 struct svc_req *req, struct compound_state *cs)
8224 {
8225 SETCLIENTID_CONFIRM4args *args =
8226 &argop->nfs_argop4_u.opsetclientid_confirm;
8227 SETCLIENTID_CONFIRM4res *res =
8228 &resop->nfs_resop4_u.opsetclientid_confirm;
8229 rfs4_client_t *cp, *cptoclose = NULL;
8230 nfs4_srv_t *nsrv4;
8231
8232 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8233 struct compound_state *, cs,
8234 SETCLIENTID_CONFIRM4args *, args);
8235
8236 nsrv4 = zone_getspecific(rfs4_zone_key, curzone);
8237 *cs->statusp = res->status = NFS4_OK;
8238
8239 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8240
8241 if (cp == NULL) {
8242 *cs->statusp = res->status =
8243 rfs4_check_clientid(&args->clientid, 1);
8244 goto out;
8245 }
8246
8247 if (!creds_ok(cp, req, cs)) {
8248 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8249 rfs4_client_rele(cp);
8250 goto out;
8251 }
8252
8253 /* If the verifier doesn't match, the record doesn't match */
8254 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8255 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8256 rfs4_client_rele(cp);
8257 goto out;
8258 }
8259
8260 rfs4_dbe_lock(cp->rc_dbe);
8261 cp->rc_need_confirm = FALSE;
8262 if (cp->rc_cp_confirmed) {
8263 cptoclose = cp->rc_cp_confirmed;
8264 cptoclose->rc_ss_remove = 1;
8265 cp->rc_cp_confirmed = NULL;
8266 }
8267
8268 /*
8269 * Update the client's associated server instance, if it's changed
8270 * since the client was created.
8271 */
8272 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8273 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8274
8275 /*
8276 * Record clientid in stable storage.
8277 * Must be done after server instance has been assigned.
8278 */
8279 rfs4_ss_clid(nsrv4, cp);
8280
8281 rfs4_dbe_unlock(cp->rc_dbe);
8282
8283 if (cptoclose)
8284 /* don't need to rele, client_close does it */
8285 rfs4_client_close(cptoclose);
8286
8287 /* If needed, initiate CB_NULL call for callback path */
8288 rfs4_deleg_cb_check(cp);
8289 rfs4_update_lease(cp);
8290
8291 /*
8292 * Check to see if client can perform reclaims
8293 */
8294 rfs4_ss_chkclid(nsrv4, cp);
8295
8296 rfs4_client_rele(cp);
8297
8298 out:
8299 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8300 struct compound_state *, cs,
8301 SETCLIENTID_CONFIRM4 *, res);
8302 }
8303
8304
8305 /*ARGSUSED*/
8306 void
8307 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8308 struct svc_req *req, struct compound_state *cs)
8309 {
8310 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8311 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8312 rfs4_state_t *sp;
8313 nfsstat4 status;
8314
8315 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8316 CLOSE4args *, args);
8317
8318 if (cs->vp == NULL) {
8319 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8320 goto out;
8321 }
8322
8323 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8324 if (status != NFS4_OK) {
8325 *cs->statusp = resp->status = status;
8326 goto out;
8327 }
8328
8329 /* Ensure specified filehandle matches */
8330 if (cs->vp != sp->rs_finfo->rf_vp) {
8331 rfs4_state_rele(sp);
8332 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8333 goto out;
8334 }
8335
8336 /* hold off other access to open_owner while we tinker */
8337 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8338
8339 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8340 case NFS4_CHECK_STATEID_OKAY:
8341 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8342 resop) != NFS4_CHKSEQ_OKAY) {
8343 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8344 goto end;
8345 }
8346 break;
8347 case NFS4_CHECK_STATEID_OLD:
8348 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8349 goto end;
8350 case NFS4_CHECK_STATEID_BAD:
8351 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8352 goto end;
8353 case NFS4_CHECK_STATEID_EXPIRED:
8354 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8355 goto end;
8356 case NFS4_CHECK_STATEID_CLOSED:
8357 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8358 goto end;
8359 case NFS4_CHECK_STATEID_UNCONFIRMED:
8360 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8361 goto end;
8362 case NFS4_CHECK_STATEID_REPLAY:
8363 /* Check the sequence id for the open owner */
8364 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8365 resop)) {
8366 case NFS4_CHKSEQ_OKAY:
8367 /*
8368 * This is replayed stateid; if seqid matches
8369 * next expected, then client is using wrong seqid.
8370 */
8371 /* FALL THROUGH */
8372 case NFS4_CHKSEQ_BAD:
8373 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8374 goto end;
8375 case NFS4_CHKSEQ_REPLAY:
8376 /*
8377 * Note this case is the duplicate case so
8378 * resp->status is already set.
8379 */
8380 *cs->statusp = resp->status;
8381 rfs4_update_lease(sp->rs_owner->ro_client);
8382 goto end;
8383 }
8384 break;
8385 default:
8386 ASSERT(FALSE);
8387 break;
8388 }
8389
8390 rfs4_dbe_lock(sp->rs_dbe);
8391
8392 /* Update the stateid. */
8393 next_stateid(&sp->rs_stateid);
8394 resp->open_stateid = sp->rs_stateid.stateid;
8395
8396 rfs4_dbe_unlock(sp->rs_dbe);
8397
8398 rfs4_update_lease(sp->rs_owner->ro_client);
8399 rfs4_update_open_sequence(sp->rs_owner);
8400 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8401
8402 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8403
8404 *cs->statusp = resp->status = status;
8405
8406 end:
8407 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8408 rfs4_state_rele(sp);
8409 out:
8410 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8411 CLOSE4res *, resp);
8412 }
8413
8414 /*
8415 * Manage the counts on the file struct and close all file locks
8416 */
8417 /*ARGSUSED*/
8418 void
8419 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8420 bool_t close_of_client)
8421 {
8422 rfs4_file_t *fp = sp->rs_finfo;
8423 rfs4_lo_state_t *lsp;
8424 int fflags = 0;
8425
8426 /*
8427 * If this call is part of the larger closing down of client
8428 * state then it is just easier to release all locks
8429 * associated with this client instead of going through each
8430 * individual file and cleaning locks there.
8431 */
8432 if (close_of_client) {
8433 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8434 !list_is_empty(&sp->rs_lostatelist) &&
8435 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8436 /* Is the PxFS kernel module loaded? */
8437 if (lm_remove_file_locks != NULL) {
8438 int new_sysid;
8439
8440 /* Encode the cluster nodeid in new sysid */
8441 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8442 lm_set_nlmid_flk(&new_sysid);
8443
8444 /*
8445 * This PxFS routine removes file locks for a
8446 * client over all nodes of a cluster.
8447 */
8448 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8449 "lm_remove_file_locks(sysid=0x%x)\n",
8450 new_sysid));
8451 (*lm_remove_file_locks)(new_sysid);
8452 } else {
8453 struct flock64 flk;
8454
8455 /* Release all locks for this client */
8456 flk.l_type = F_UNLKSYS;
8457 flk.l_whence = 0;
8458 flk.l_start = 0;
8459 flk.l_len = 0;
8460 flk.l_sysid =
8461 sp->rs_owner->ro_client->rc_sysidt;
8462 flk.l_pid = 0;
8463 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8464 &flk, F_REMOTELOCK | FREAD | FWRITE,
8465 (u_offset_t)0, NULL, CRED(), NULL);
8466 }
8467
8468 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8469 }
8470 }
8471
8472 /*
8473 * Release all locks on this file by this lock owner or at
8474 * least mark the locks as having been released
8475 */
8476 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8477 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8478 lsp->rls_locks_cleaned = TRUE;
8479
8480 /* Was this already taken care of above? */
8481 if (!close_of_client &&
8482 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8483 (void) cleanlocks(sp->rs_finfo->rf_vp,
8484 lsp->rls_locker->rl_pid,
8485 lsp->rls_locker->rl_client->rc_sysidt);
8486 }
8487
8488 /*
8489 * Release any shrlocks associated with this open state ID.
8490 * This must be done before the rfs4_state gets marked closed.
8491 */
8492 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8493 (void) rfs4_unshare(sp);
8494
8495 if (sp->rs_open_access) {
8496 rfs4_dbe_lock(fp->rf_dbe);
8497
8498 /*
8499 * Decrement the count for each access and deny bit that this
8500 * state has contributed to the file.
8501 * If the file counts go to zero
8502 * clear the appropriate bit in the appropriate mask.
8503 */
8504 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8505 fp->rf_access_read--;
8506 fflags |= FREAD;
8507 if (fp->rf_access_read == 0)
8508 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8509 }
8510 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8511 fp->rf_access_write--;
8512 fflags |= FWRITE;
8513 if (fp->rf_access_write == 0)
8514 fp->rf_share_access &=
8515 ~OPEN4_SHARE_ACCESS_WRITE;
8516 }
8517 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8518 fp->rf_deny_read--;
8519 if (fp->rf_deny_read == 0)
8520 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8521 }
8522 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8523 fp->rf_deny_write--;
8524 if (fp->rf_deny_write == 0)
8525 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8526 }
8527
8528 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8529
8530 rfs4_dbe_unlock(fp->rf_dbe);
8531
8532 sp->rs_open_access = 0;
8533 sp->rs_open_deny = 0;
8534 }
8535 }
8536
8537 /*
8538 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8539 */
8540 static nfsstat4
8541 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8542 {
8543 rfs4_lockowner_t *lo;
8544 rfs4_client_t *cp;
8545 uint32_t len;
8546
8547 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8548 if (lo != NULL) {
8549 cp = lo->rl_client;
8550 if (rfs4_lease_expired(cp)) {
8551 rfs4_lockowner_rele(lo);
8552 rfs4_dbe_hold(cp->rc_dbe);
8553 rfs4_client_close(cp);
8554 return (NFS4ERR_EXPIRED);
8555 }
8556 dp->owner.clientid = lo->rl_owner.clientid;
8557 len = lo->rl_owner.owner_len;
8558 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8559 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8560 dp->owner.owner_len = len;
8561 rfs4_lockowner_rele(lo);
8562 goto finish;
8563 }
8564
8565 /*
8566 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8567 * of the client id contain the boot time for a NFS4 lock. So we
8568 * fabricate and identity by setting clientid to the sysid, and
8569 * the lock owner to the pid.
8570 */
8571 dp->owner.clientid = flk->l_sysid;
8572 len = sizeof (pid_t);
8573 dp->owner.owner_len = len;
8574 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8575 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8576 finish:
8577 dp->offset = flk->l_start;
8578 dp->length = flk->l_len;
8579
8580 if (flk->l_type == F_RDLCK)
8581 dp->locktype = READ_LT;
8582 else if (flk->l_type == F_WRLCK)
8583 dp->locktype = WRITE_LT;
8584 else
8585 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8586
8587 return (NFS4_OK);
8588 }
8589
8590 /*
8591 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8592 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8593 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8594 * for that (obviously); they are sending the LOCK requests with some delays
8595 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8596 * locking and delay implementation at the client side.
8597 *
8598 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8599 * fast retries on its own (the for loop below) in a hope the lock will be
8600 * available soon. And if not, the client won't need to resend the LOCK
8601 * requests so fast to check the lock availability. This basically saves some
8602 * network traffic and tries to make sure the client gets the lock ASAP.
8603 */
8604 static int
8605 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8606 {
8607 int error;
8608 struct flock64 flk;
8609 int i;
8610 clock_t delaytime;
8611 int cmd;
8612 int spin_cnt = 0;
8613
8614 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8615 retry:
8616 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8617
8618 for (i = 0; i < rfs4_maxlock_tries; i++) {
8619 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8620 error = VOP_FRLOCK(vp, cmd,
8621 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8622
8623 if (error != EAGAIN && error != EACCES)
8624 break;
8625
8626 if (i < rfs4_maxlock_tries - 1) {
8627 delay(delaytime);
8628 delaytime *= 2;
8629 }
8630 }
8631
8632 if (error == EAGAIN || error == EACCES) {
8633 /* Get the owner of the lock */
8634 flk = *flock;
8635 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8636 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8637 NULL) == 0) {
8638 /*
8639 * There's a race inherent in the current VOP_FRLOCK
8640 * design where:
8641 * a: "other guy" takes a lock that conflicts with a
8642 * lock we want
8643 * b: we attempt to take our lock (non-blocking) and
8644 * the attempt fails.
8645 * c: "other guy" releases the conflicting lock
8646 * d: we ask what lock conflicts with the lock we want,
8647 * getting F_UNLCK (no lock blocks us)
8648 *
8649 * If we retry the non-blocking lock attempt in this
8650 * case (restart at step 'b') there's some possibility
8651 * that many such attempts might fail. However a test
8652 * designed to actually provoke this race shows that
8653 * the vast majority of cases require no retry, and
8654 * only a few took as many as three retries. Here's
8655 * the test outcome:
8656 *
8657 * number of retries how many times we needed
8658 * that many retries
8659 * 0 79461
8660 * 1 862
8661 * 2 49
8662 * 3 5
8663 *
8664 * Given those empirical results, we arbitrarily limit
8665 * the retry count to ten.
8666 *
8667 * If we actually make to ten retries and give up,
8668 * nothing catastrophic happens, but we're unable to
8669 * return the information about the conflicting lock to
8670 * the NFS client. That's an acceptable trade off vs.
8671 * letting this retry loop run forever.
8672 */
8673 if (flk.l_type == F_UNLCK) {
8674 if (spin_cnt++ < 10) {
8675 /* No longer locked, retry */
8676 goto retry;
8677 }
8678 } else {
8679 *flock = flk;
8680 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8681 F_GETLK, &flk);
8682 }
8683 }
8684 }
8685
8686 return (error);
8687 }
8688
8689 /*ARGSUSED*/
8690 static nfsstat4
8691 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8692 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8693 {
8694 nfsstat4 status;
8695 rfs4_lockowner_t *lo = lsp->rls_locker;
8696 rfs4_state_t *sp = lsp->rls_state;
8697 struct flock64 flock;
8698 int16_t ltype;
8699 int flag;
8700 int error;
8701 sysid_t sysid;
8702 LOCK4res *lres;
8703 vnode_t *vp;
8704
8705 if (rfs4_lease_expired(lo->rl_client)) {
8706 return (NFS4ERR_EXPIRED);
8707 }
8708
8709 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8710 return (status);
8711
8712 /* Check for zero length. To lock to end of file use all ones for V4 */
8713 if (length == 0)
8714 return (NFS4ERR_INVAL);
8715 else if (length == (length4)(~0))
8716 length = 0; /* Posix to end of file */
8717
8718 retry:
8719 rfs4_dbe_lock(sp->rs_dbe);
8720 if (sp->rs_closed == TRUE) {
8721 rfs4_dbe_unlock(sp->rs_dbe);
8722 return (NFS4ERR_OLD_STATEID);
8723 }
8724
8725 if (resop->resop != OP_LOCKU) {
8726 switch (locktype) {
8727 case READ_LT:
8728 case READW_LT:
8729 if ((sp->rs_share_access
8730 & OPEN4_SHARE_ACCESS_READ) == 0) {
8731 rfs4_dbe_unlock(sp->rs_dbe);
8732
8733 return (NFS4ERR_OPENMODE);
8734 }
8735 ltype = F_RDLCK;
8736 break;
8737 case WRITE_LT:
8738 case WRITEW_LT:
8739 if ((sp->rs_share_access
8740 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8741 rfs4_dbe_unlock(sp->rs_dbe);
8742
8743 return (NFS4ERR_OPENMODE);
8744 }
8745 ltype = F_WRLCK;
8746 break;
8747 }
8748 } else
8749 ltype = F_UNLCK;
8750
8751 flock.l_type = ltype;
8752 flock.l_whence = 0; /* SEEK_SET */
8753 flock.l_start = offset;
8754 flock.l_len = length;
8755 flock.l_sysid = sysid;
8756 flock.l_pid = lsp->rls_locker->rl_pid;
8757
8758 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8759 if (flock.l_len < 0 || flock.l_start < 0) {
8760 rfs4_dbe_unlock(sp->rs_dbe);
8761 return (NFS4ERR_INVAL);
8762 }
8763
8764 /*
8765 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8766 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8767 */
8768 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8769
8770 vp = sp->rs_finfo->rf_vp;
8771 VN_HOLD(vp);
8772
8773 /*
8774 * We need to unlock sp before we call the underlying filesystem to
8775 * acquire the file lock.
8776 */
8777 rfs4_dbe_unlock(sp->rs_dbe);
8778
8779 error = setlock(vp, &flock, flag, cred);
8780
8781 /*
8782 * Make sure the file is still open. In a case the file was closed in
8783 * the meantime, clean the lock we acquired using the setlock() call
8784 * above, and return the appropriate error.
8785 */
8786 rfs4_dbe_lock(sp->rs_dbe);
8787 if (sp->rs_closed == TRUE) {
8788 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8789 rfs4_dbe_unlock(sp->rs_dbe);
8790
8791 VN_RELE(vp);
8792
8793 return (NFS4ERR_OLD_STATEID);
8794 }
8795 rfs4_dbe_unlock(sp->rs_dbe);
8796
8797 VN_RELE(vp);
8798
8799 if (error == 0) {
8800 rfs4_dbe_lock(lsp->rls_dbe);
8801 next_stateid(&lsp->rls_lockid);
8802 rfs4_dbe_unlock(lsp->rls_dbe);
8803 }
8804
8805 /*
8806 * N.B. We map error values to nfsv4 errors. This is differrent
8807 * than puterrno4 routine.
8808 */
8809 switch (error) {
8810 case 0:
8811 status = NFS4_OK;
8812 break;
8813 case EAGAIN:
8814 case EACCES: /* Old value */
8815 /* Can only get here if op is OP_LOCK */
8816 ASSERT(resop->resop == OP_LOCK);
8817 lres = &resop->nfs_resop4_u.oplock;
8818 status = NFS4ERR_DENIED;
8819 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8820 == NFS4ERR_EXPIRED)
8821 goto retry;
8822 break;
8823 case ENOLCK:
8824 status = NFS4ERR_DELAY;
8825 break;
8826 case EOVERFLOW:
8827 status = NFS4ERR_INVAL;
8828 break;
8829 case EINVAL:
8830 status = NFS4ERR_NOTSUPP;
8831 break;
8832 default:
8833 status = NFS4ERR_SERVERFAULT;
8834 break;
8835 }
8836
8837 return (status);
8838 }
8839
8840 /*ARGSUSED*/
8841 void
8842 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8843 struct svc_req *req, struct compound_state *cs)
8844 {
8845 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8846 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8847 nfsstat4 status;
8848 stateid4 *stateid;
8849 rfs4_lockowner_t *lo;
8850 rfs4_client_t *cp;
8851 rfs4_state_t *sp = NULL;
8852 rfs4_lo_state_t *lsp = NULL;
8853 bool_t ls_sw_held = FALSE;
8854 bool_t create = TRUE;
8855 bool_t lcreate = TRUE;
8856 bool_t dup_lock = FALSE;
8857 int rc;
8858
8859 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8860 LOCK4args *, args);
8861
8862 if (cs->vp == NULL) {
8863 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8864 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8865 cs, LOCK4res *, resp);
8866 return;
8867 }
8868
8869 if (args->locker.new_lock_owner) {
8870 /* Create a new lockowner for this instance */
8871 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8872
8873 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8874
8875 stateid = &olo->open_stateid;
8876 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8877 if (status != NFS4_OK) {
8878 NFS4_DEBUG(rfs4_debug,
8879 (CE_NOTE, "Get state failed in lock %d", status));
8880 *cs->statusp = resp->status = status;
8881 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8882 cs, LOCK4res *, resp);
8883 return;
8884 }
8885
8886 /* Ensure specified filehandle matches */
8887 if (cs->vp != sp->rs_finfo->rf_vp) {
8888 rfs4_state_rele(sp);
8889 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8890 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8891 cs, LOCK4res *, resp);
8892 return;
8893 }
8894
8895 /* hold off other access to open_owner while we tinker */
8896 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8897
8898 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8899 case NFS4_CHECK_STATEID_OLD:
8900 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8901 goto end;
8902 case NFS4_CHECK_STATEID_BAD:
8903 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8904 goto end;
8905 case NFS4_CHECK_STATEID_EXPIRED:
8906 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8907 goto end;
8908 case NFS4_CHECK_STATEID_UNCONFIRMED:
8909 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8910 goto end;
8911 case NFS4_CHECK_STATEID_CLOSED:
8912 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8913 goto end;
8914 case NFS4_CHECK_STATEID_OKAY:
8915 case NFS4_CHECK_STATEID_REPLAY:
8916 switch (rfs4_check_olo_seqid(olo->open_seqid,
8917 sp->rs_owner, resop)) {
8918 case NFS4_CHKSEQ_OKAY:
8919 if (rc == NFS4_CHECK_STATEID_OKAY)
8920 break;
8921 /*
8922 * This is replayed stateid; if seqid
8923 * matches next expected, then client
8924 * is using wrong seqid.
8925 */
8926 /* FALLTHROUGH */
8927 case NFS4_CHKSEQ_BAD:
8928 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8929 goto end;
8930 case NFS4_CHKSEQ_REPLAY:
8931 /* This is a duplicate LOCK request */
8932 dup_lock = TRUE;
8933
8934 /*
8935 * For a duplicate we do not want to
8936 * create a new lockowner as it should
8937 * already exist.
8938 * Turn off the lockowner create flag.
8939 */
8940 lcreate = FALSE;
8941 }
8942 break;
8943 }
8944
8945 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8946 if (lo == NULL) {
8947 NFS4_DEBUG(rfs4_debug,
8948 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8949 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8950 goto end;
8951 }
8952
8953 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8954 if (lsp == NULL) {
8955 rfs4_update_lease(sp->rs_owner->ro_client);
8956 /*
8957 * Only update theh open_seqid if this is not
8958 * a duplicate request
8959 */
8960 if (dup_lock == FALSE) {
8961 rfs4_update_open_sequence(sp->rs_owner);
8962 }
8963
8964 NFS4_DEBUG(rfs4_debug,
8965 (CE_NOTE, "rfs4_op_lock: no state"));
8966 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8967 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8968 rfs4_lockowner_rele(lo);
8969 goto end;
8970 }
8971
8972 /*
8973 * This is the new_lock_owner branch and the client is
8974 * supposed to be associating a new lock_owner with
8975 * the open file at this point. If we find that a
8976 * lock_owner/state association already exists and a
8977 * successful LOCK request was returned to the client,
8978 * an error is returned to the client since this is
8979 * not appropriate. The client should be using the
8980 * existing lock_owner branch.
8981 */
8982 if (dup_lock == FALSE && create == FALSE) {
8983 if (lsp->rls_lock_completed == TRUE) {
8984 *cs->statusp =
8985 resp->status = NFS4ERR_BAD_SEQID;
8986 rfs4_lockowner_rele(lo);
8987 goto end;
8988 }
8989 }
8990
8991 rfs4_update_lease(sp->rs_owner->ro_client);
8992
8993 /*
8994 * Only update theh open_seqid if this is not
8995 * a duplicate request
8996 */
8997 if (dup_lock == FALSE) {
8998 rfs4_update_open_sequence(sp->rs_owner);
8999 }
9000
9001 /*
9002 * If this is a duplicate lock request, just copy the
9003 * previously saved reply and return.
9004 */
9005 if (dup_lock == TRUE) {
9006 /* verify that lock_seqid's match */
9007 if (lsp->rls_seqid != olo->lock_seqid) {
9008 NFS4_DEBUG(rfs4_debug,
9009 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9010 "lsp->seqid=%d old->seqid=%d",
9011 lsp->rls_seqid, olo->lock_seqid));
9012 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9013 } else {
9014 rfs4_copy_reply(resop, &lsp->rls_reply);
9015 /*
9016 * Make sure to copy the just
9017 * retrieved reply status into the
9018 * overall compound status
9019 */
9020 *cs->statusp = resp->status;
9021 }
9022 rfs4_lockowner_rele(lo);
9023 goto end;
9024 }
9025
9026 rfs4_dbe_lock(lsp->rls_dbe);
9027
9028 /* Make sure to update the lock sequence id */
9029 lsp->rls_seqid = olo->lock_seqid;
9030
9031 NFS4_DEBUG(rfs4_debug,
9032 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9033
9034 /*
9035 * This is used to signify the newly created lockowner
9036 * stateid and its sequence number. The checks for
9037 * sequence number and increment don't occur on the
9038 * very first lock request for a lockowner.
9039 */
9040 lsp->rls_skip_seqid_check = TRUE;
9041
9042 /* hold off other access to lsp while we tinker */
9043 rfs4_sw_enter(&lsp->rls_sw);
9044 ls_sw_held = TRUE;
9045
9046 rfs4_dbe_unlock(lsp->rls_dbe);
9047
9048 rfs4_lockowner_rele(lo);
9049 } else {
9050 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9051 /* get lsp and hold the lock on the underlying file struct */
9052 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9053 != NFS4_OK) {
9054 *cs->statusp = resp->status = status;
9055 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9056 cs, LOCK4res *, resp);
9057 return;
9058 }
9059 create = FALSE; /* We didn't create lsp */
9060
9061 /* Ensure specified filehandle matches */
9062 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9063 rfs4_lo_state_rele(lsp, TRUE);
9064 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9065 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9066 cs, LOCK4res *, resp);
9067 return;
9068 }
9069
9070 /* hold off other access to lsp while we tinker */
9071 rfs4_sw_enter(&lsp->rls_sw);
9072 ls_sw_held = TRUE;
9073
9074 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9075 /*
9076 * The stateid looks like it was okay (expected to be
9077 * the next one)
9078 */
9079 case NFS4_CHECK_STATEID_OKAY:
9080 /*
9081 * The sequence id is now checked. Determine
9082 * if this is a replay or if it is in the
9083 * expected (next) sequence. In the case of a
9084 * replay, there are two replay conditions
9085 * that may occur. The first is the normal
9086 * condition where a LOCK is done with a
9087 * NFS4_OK response and the stateid is
9088 * updated. That case is handled below when
9089 * the stateid is identified as a REPLAY. The
9090 * second is the case where an error is
9091 * returned, like NFS4ERR_DENIED, and the
9092 * sequence number is updated but the stateid
9093 * is not updated. This second case is dealt
9094 * with here. So it may seem odd that the
9095 * stateid is okay but the sequence id is a
9096 * replay but it is okay.
9097 */
9098 switch (rfs4_check_lock_seqid(
9099 args->locker.locker4_u.lock_owner.lock_seqid,
9100 lsp, resop)) {
9101 case NFS4_CHKSEQ_REPLAY:
9102 if (resp->status != NFS4_OK) {
9103 /*
9104 * Here is our replay and need
9105 * to verify that the last
9106 * response was an error.
9107 */
9108 *cs->statusp = resp->status;
9109 goto end;
9110 }
9111 /*
9112 * This is done since the sequence id
9113 * looked like a replay but it didn't
9114 * pass our check so a BAD_SEQID is
9115 * returned as a result.
9116 */
9117 /*FALLTHROUGH*/
9118 case NFS4_CHKSEQ_BAD:
9119 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9120 goto end;
9121 case NFS4_CHKSEQ_OKAY:
9122 /* Everything looks okay move ahead */
9123 break;
9124 }
9125 break;
9126 case NFS4_CHECK_STATEID_OLD:
9127 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9128 goto end;
9129 case NFS4_CHECK_STATEID_BAD:
9130 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9131 goto end;
9132 case NFS4_CHECK_STATEID_EXPIRED:
9133 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9134 goto end;
9135 case NFS4_CHECK_STATEID_CLOSED:
9136 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9137 goto end;
9138 case NFS4_CHECK_STATEID_REPLAY:
9139 switch (rfs4_check_lock_seqid(
9140 args->locker.locker4_u.lock_owner.lock_seqid,
9141 lsp, resop)) {
9142 case NFS4_CHKSEQ_OKAY:
9143 /*
9144 * This is a replayed stateid; if
9145 * seqid matches the next expected,
9146 * then client is using wrong seqid.
9147 */
9148 case NFS4_CHKSEQ_BAD:
9149 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9150 goto end;
9151 case NFS4_CHKSEQ_REPLAY:
9152 rfs4_update_lease(lsp->rls_locker->rl_client);
9153 *cs->statusp = status = resp->status;
9154 goto end;
9155 }
9156 break;
9157 default:
9158 ASSERT(FALSE);
9159 break;
9160 }
9161
9162 rfs4_update_lock_sequence(lsp);
9163 rfs4_update_lease(lsp->rls_locker->rl_client);
9164 }
9165
9166 /*
9167 * NFS4 only allows locking on regular files, so
9168 * verify type of object.
9169 */
9170 if (cs->vp->v_type != VREG) {
9171 if (cs->vp->v_type == VDIR)
9172 status = NFS4ERR_ISDIR;
9173 else
9174 status = NFS4ERR_INVAL;
9175 goto out;
9176 }
9177
9178 cp = lsp->rls_state->rs_owner->ro_client;
9179
9180 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9181 status = NFS4ERR_GRACE;
9182 goto out;
9183 }
9184
9185 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9186 status = NFS4ERR_NO_GRACE;
9187 goto out;
9188 }
9189
9190 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9191 status = NFS4ERR_NO_GRACE;
9192 goto out;
9193 }
9194
9195 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9196 cs->deleg = TRUE;
9197
9198 status = rfs4_do_lock(lsp, args->locktype,
9199 args->offset, args->length, cs->cr, resop);
9200
9201 out:
9202 lsp->rls_skip_seqid_check = FALSE;
9203
9204 *cs->statusp = resp->status = status;
9205
9206 if (status == NFS4_OK) {
9207 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9208 lsp->rls_lock_completed = TRUE;
9209 }
9210 /*
9211 * Only update the "OPEN" response here if this was a new
9212 * lock_owner
9213 */
9214 if (sp)
9215 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9216
9217 rfs4_update_lock_resp(lsp, resop);
9218
9219 end:
9220 if (lsp) {
9221 if (ls_sw_held)
9222 rfs4_sw_exit(&lsp->rls_sw);
9223 /*
9224 * If an sp obtained, then the lsp does not represent
9225 * a lock on the file struct.
9226 */
9227 if (sp != NULL)
9228 rfs4_lo_state_rele(lsp, FALSE);
9229 else
9230 rfs4_lo_state_rele(lsp, TRUE);
9231 }
9232 if (sp) {
9233 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9234 rfs4_state_rele(sp);
9235 }
9236
9237 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9238 LOCK4res *, resp);
9239 }
9240
9241 /* free function for LOCK/LOCKT */
9242 static void
9243 lock_denied_free(nfs_resop4 *resop)
9244 {
9245 LOCK4denied *dp = NULL;
9246
9247 switch (resop->resop) {
9248 case OP_LOCK:
9249 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9250 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9251 break;
9252 case OP_LOCKT:
9253 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9254 dp = &resop->nfs_resop4_u.oplockt.denied;
9255 break;
9256 default:
9257 break;
9258 }
9259
9260 if (dp)
9261 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9262 }
9263
9264 /*ARGSUSED*/
9265 void
9266 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9267 struct svc_req *req, struct compound_state *cs)
9268 {
9269 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9270 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9271 nfsstat4 status;
9272 stateid4 *stateid = &args->lock_stateid;
9273 rfs4_lo_state_t *lsp;
9274
9275 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9276 LOCKU4args *, args);
9277
9278 if (cs->vp == NULL) {
9279 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9280 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9281 LOCKU4res *, resp);
9282 return;
9283 }
9284
9285 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9286 *cs->statusp = resp->status = status;
9287 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9288 LOCKU4res *, resp);
9289 return;
9290 }
9291
9292 /* Ensure specified filehandle matches */
9293 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9294 rfs4_lo_state_rele(lsp, TRUE);
9295 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9296 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9297 LOCKU4res *, resp);
9298 return;
9299 }
9300
9301 /* hold off other access to lsp while we tinker */
9302 rfs4_sw_enter(&lsp->rls_sw);
9303
9304 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9305 case NFS4_CHECK_STATEID_OKAY:
9306 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9307 != NFS4_CHKSEQ_OKAY) {
9308 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9309 goto end;
9310 }
9311 break;
9312 case NFS4_CHECK_STATEID_OLD:
9313 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9314 goto end;
9315 case NFS4_CHECK_STATEID_BAD:
9316 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9317 goto end;
9318 case NFS4_CHECK_STATEID_EXPIRED:
9319 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9320 goto end;
9321 case NFS4_CHECK_STATEID_CLOSED:
9322 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9323 goto end;
9324 case NFS4_CHECK_STATEID_REPLAY:
9325 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9326 case NFS4_CHKSEQ_OKAY:
9327 /*
9328 * This is a replayed stateid; if
9329 * seqid matches the next expected,
9330 * then client is using wrong seqid.
9331 */
9332 case NFS4_CHKSEQ_BAD:
9333 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9334 goto end;
9335 case NFS4_CHKSEQ_REPLAY:
9336 rfs4_update_lease(lsp->rls_locker->rl_client);
9337 *cs->statusp = status = resp->status;
9338 goto end;
9339 }
9340 break;
9341 default:
9342 ASSERT(FALSE);
9343 break;
9344 }
9345
9346 rfs4_update_lock_sequence(lsp);
9347 rfs4_update_lease(lsp->rls_locker->rl_client);
9348
9349 /*
9350 * NFS4 only allows locking on regular files, so
9351 * verify type of object.
9352 */
9353 if (cs->vp->v_type != VREG) {
9354 if (cs->vp->v_type == VDIR)
9355 status = NFS4ERR_ISDIR;
9356 else
9357 status = NFS4ERR_INVAL;
9358 goto out;
9359 }
9360
9361 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9362 status = NFS4ERR_GRACE;
9363 goto out;
9364 }
9365
9366 status = rfs4_do_lock(lsp, args->locktype,
9367 args->offset, args->length, cs->cr, resop);
9368
9369 out:
9370 *cs->statusp = resp->status = status;
9371
9372 if (status == NFS4_OK)
9373 resp->lock_stateid = lsp->rls_lockid.stateid;
9374
9375 rfs4_update_lock_resp(lsp, resop);
9376
9377 end:
9378 rfs4_sw_exit(&lsp->rls_sw);
9379 rfs4_lo_state_rele(lsp, TRUE);
9380
9381 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9382 LOCKU4res *, resp);
9383 }
9384
9385 /*
9386 * LOCKT is a best effort routine, the client can not be guaranteed that
9387 * the status return is still in effect by the time the reply is received.
9388 * They are numerous race conditions in this routine, but we are not required
9389 * and can not be accurate.
9390 */
9391 /*ARGSUSED*/
9392 void
9393 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9394 struct svc_req *req, struct compound_state *cs)
9395 {
9396 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9397 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9398 rfs4_lockowner_t *lo;
9399 rfs4_client_t *cp;
9400 bool_t create = FALSE;
9401 struct flock64 flk;
9402 int error;
9403 int flag = FREAD | FWRITE;
9404 int ltype;
9405 length4 posix_length;
9406 sysid_t sysid;
9407 pid_t pid;
9408
9409 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9410 LOCKT4args *, args);
9411
9412 if (cs->vp == NULL) {
9413 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9414 goto out;
9415 }
9416
9417 /*
9418 * NFS4 only allows locking on regular files, so
9419 * verify type of object.
9420 */
9421 if (cs->vp->v_type != VREG) {
9422 if (cs->vp->v_type == VDIR)
9423 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9424 else
9425 *cs->statusp = resp->status = NFS4ERR_INVAL;
9426 goto out;
9427 }
9428
9429 /*
9430 * Check out the clientid to ensure the server knows about it
9431 * so that we correctly inform the client of a server reboot.
9432 */
9433 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9434 == NULL) {
9435 *cs->statusp = resp->status =
9436 rfs4_check_clientid(&args->owner.clientid, 0);
9437 goto out;
9438 }
9439 if (rfs4_lease_expired(cp)) {
9440 rfs4_client_close(cp);
9441 /*
9442 * Protocol doesn't allow returning NFS4ERR_STALE as
9443 * other operations do on this check so STALE_CLIENTID
9444 * is returned instead
9445 */
9446 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9447 goto out;
9448 }
9449
9450 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9451 *cs->statusp = resp->status = NFS4ERR_GRACE;
9452 rfs4_client_rele(cp);
9453 goto out;
9454 }
9455 rfs4_client_rele(cp);
9456
9457 resp->status = NFS4_OK;
9458
9459 switch (args->locktype) {
9460 case READ_LT:
9461 case READW_LT:
9462 ltype = F_RDLCK;
9463 break;
9464 case WRITE_LT:
9465 case WRITEW_LT:
9466 ltype = F_WRLCK;
9467 break;
9468 }
9469
9470 posix_length = args->length;
9471 /* Check for zero length. To lock to end of file use all ones for V4 */
9472 if (posix_length == 0) {
9473 *cs->statusp = resp->status = NFS4ERR_INVAL;
9474 goto out;
9475 } else if (posix_length == (length4)(~0)) {
9476 posix_length = 0; /* Posix to end of file */
9477 }
9478
9479 /* Find or create a lockowner */
9480 lo = rfs4_findlockowner(&args->owner, &create);
9481
9482 if (lo) {
9483 pid = lo->rl_pid;
9484 if ((resp->status =
9485 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9486 goto err;
9487 } else {
9488 pid = 0;
9489 sysid = lockt_sysid;
9490 }
9491 retry:
9492 flk.l_type = ltype;
9493 flk.l_whence = 0; /* SEEK_SET */
9494 flk.l_start = args->offset;
9495 flk.l_len = posix_length;
9496 flk.l_sysid = sysid;
9497 flk.l_pid = pid;
9498 flag |= F_REMOTELOCK;
9499
9500 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9501
9502 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9503 if (flk.l_len < 0 || flk.l_start < 0) {
9504 resp->status = NFS4ERR_INVAL;
9505 goto err;
9506 }
9507 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9508 NULL, cs->cr, NULL);
9509
9510 /*
9511 * N.B. We map error values to nfsv4 errors. This is differrent
9512 * than puterrno4 routine.
9513 */
9514 switch (error) {
9515 case 0:
9516 if (flk.l_type == F_UNLCK)
9517 resp->status = NFS4_OK;
9518 else {
9519 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9520 goto retry;
9521 resp->status = NFS4ERR_DENIED;
9522 }
9523 break;
9524 case EOVERFLOW:
9525 resp->status = NFS4ERR_INVAL;
9526 break;
9527 case EINVAL:
9528 resp->status = NFS4ERR_NOTSUPP;
9529 break;
9530 default:
9531 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9532 error);
9533 resp->status = NFS4ERR_SERVERFAULT;
9534 break;
9535 }
9536
9537 err:
9538 if (lo)
9539 rfs4_lockowner_rele(lo);
9540 *cs->statusp = resp->status;
9541 out:
9542 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9543 LOCKT4res *, resp);
9544 }
9545
9546 int
9547 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9548 {
9549 int err;
9550 int cmd;
9551 vnode_t *vp;
9552 struct shrlock shr;
9553 struct shr_locowner shr_loco;
9554 int fflags = 0;
9555
9556 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9557 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9558
9559 if (sp->rs_closed)
9560 return (NFS4ERR_OLD_STATEID);
9561
9562 vp = sp->rs_finfo->rf_vp;
9563 ASSERT(vp);
9564
9565 shr.s_access = shr.s_deny = 0;
9566
9567 if (access & OPEN4_SHARE_ACCESS_READ) {
9568 fflags |= FREAD;
9569 shr.s_access |= F_RDACC;
9570 }
9571 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9572 fflags |= FWRITE;
9573 shr.s_access |= F_WRACC;
9574 }
9575 ASSERT(shr.s_access);
9576
9577 if (deny & OPEN4_SHARE_DENY_READ)
9578 shr.s_deny |= F_RDDNY;
9579 if (deny & OPEN4_SHARE_DENY_WRITE)
9580 shr.s_deny |= F_WRDNY;
9581
9582 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9583 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9584 shr_loco.sl_pid = shr.s_pid;
9585 shr_loco.sl_id = shr.s_sysid;
9586 shr.s_owner = (caddr_t)&shr_loco;
9587 shr.s_own_len = sizeof (shr_loco);
9588
9589 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9590
9591 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9592 if (err != 0) {
9593 if (err == EAGAIN)
9594 err = NFS4ERR_SHARE_DENIED;
9595 else
9596 err = puterrno4(err);
9597 return (err);
9598 }
9599
9600 sp->rs_share_access |= access;
9601 sp->rs_share_deny |= deny;
9602
9603 return (0);
9604 }
9605
9606 int
9607 rfs4_unshare(rfs4_state_t *sp)
9608 {
9609 int err;
9610 struct shrlock shr;
9611 struct shr_locowner shr_loco;
9612
9613 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9614
9615 if (sp->rs_closed || sp->rs_share_access == 0)
9616 return (0);
9617
9618 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9619 ASSERT(sp->rs_finfo->rf_vp);
9620
9621 shr.s_access = shr.s_deny = 0;
9622 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9623 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9624 shr_loco.sl_pid = shr.s_pid;
9625 shr_loco.sl_id = shr.s_sysid;
9626 shr.s_owner = (caddr_t)&shr_loco;
9627 shr.s_own_len = sizeof (shr_loco);
9628
9629 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9630 NULL);
9631 if (err != 0) {
9632 err = puterrno4(err);
9633 return (err);
9634 }
9635
9636 sp->rs_share_access = 0;
9637 sp->rs_share_deny = 0;
9638
9639 return (0);
9640
9641 }
9642
9643 static int
9644 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9645 {
9646 struct clist *wcl;
9647 count4 count = rok->data_len;
9648 int wlist_len;
9649
9650 wcl = args->wlist;
9651 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9652 return (FALSE);
9653 }
9654 wcl = args->wlist;
9655 rok->wlist_len = wlist_len;
9656 rok->wlist = wcl;
9657 return (TRUE);
9658 }
9659
9660 /* tunable to disable server referrals */
9661 int rfs4_no_referrals = 0;
9662
9663 /*
9664 * Find an NFS record in reparse point data.
9665 * Returns 0 for success and <0 or an errno value on failure.
9666 */
9667 int
9668 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9669 {
9670 int err;
9671 char *stype, *val;
9672 nvlist_t *nvl;
9673 nvpair_t *curr;
9674
9675 if ((nvl = reparse_init()) == NULL)
9676 return (-1);
9677
9678 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9679 reparse_free(nvl);
9680 return (err);
9681 }
9682
9683 curr = NULL;
9684 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9685 if ((stype = nvpair_name(curr)) == NULL) {
9686 reparse_free(nvl);
9687 return (-2);
9688 }
9689 if (strncasecmp(stype, "NFS", 3) == 0)
9690 break;
9691 }
9692
9693 if ((curr == NULL) ||
9694 (nvpair_value_string(curr, &val))) {
9695 reparse_free(nvl);
9696 return (-3);
9697 }
9698 *nvlp = nvl;
9699 *svcp = stype;
9700 *datap = val;
9701 return (0);
9702 }
9703
9704 int
9705 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9706 {
9707 nvlist_t *nvl;
9708 char *s, *d;
9709
9710 if (rfs4_no_referrals != 0)
9711 return (B_FALSE);
9712
9713 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9714 return (B_FALSE);
9715
9716 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9717 return (B_FALSE);
9718
9719 reparse_free(nvl);
9720
9721 return (B_TRUE);
9722 }
9723
9724 /*
9725 * There is a user-level copy of this routine in ref_subr.c.
9726 * Changes should be kept in sync.
9727 */
9728 static int
9729 nfs4_create_components(char *path, component4 *comp4)
9730 {
9731 int slen, plen, ncomp;
9732 char *ori_path, *nxtc, buf[MAXNAMELEN];
9733
9734 if (path == NULL)
9735 return (0);
9736
9737 plen = strlen(path) + 1; /* include the terminator */
9738 ori_path = path;
9739 ncomp = 0;
9740
9741 /* count number of components in the path */
9742 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9743 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9744 if ((slen = nxtc - path) == 0) {
9745 path = nxtc + 1;
9746 continue;
9747 }
9748
9749 if (comp4 != NULL) {
9750 bcopy(path, buf, slen);
9751 buf[slen] = '\0';
9752 (void) str_to_utf8(buf, &comp4[ncomp]);
9753 }
9754
9755 ncomp++; /* 1 valid component */
9756 path = nxtc + 1;
9757 }
9758 if (*nxtc == '\0' || *nxtc == '\n')
9759 break;
9760 }
9761
9762 return (ncomp);
9763 }
9764
9765 /*
9766 * There is a user-level copy of this routine in ref_subr.c.
9767 * Changes should be kept in sync.
9768 */
9769 static int
9770 make_pathname4(char *path, pathname4 *pathname)
9771 {
9772 int ncomp;
9773 component4 *comp4;
9774
9775 if (pathname == NULL)
9776 return (0);
9777
9778 if (path == NULL) {
9779 pathname->pathname4_val = NULL;
9780 pathname->pathname4_len = 0;
9781 return (0);
9782 }
9783
9784 /* count number of components to alloc buffer */
9785 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9786 pathname->pathname4_val = NULL;
9787 pathname->pathname4_len = 0;
9788 return (0);
9789 }
9790 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9791
9792 /* copy components into allocated buffer */
9793 ncomp = nfs4_create_components(path, comp4);
9794
9795 pathname->pathname4_val = comp4;
9796 pathname->pathname4_len = ncomp;
9797
9798 return (ncomp);
9799 }
9800
9801 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9802
9803 fs_locations4 *
9804 fetch_referral(vnode_t *vp, cred_t *cr)
9805 {
9806 nvlist_t *nvl;
9807 char *stype, *sdata;
9808 fs_locations4 *result;
9809 char buf[1024];
9810 size_t bufsize;
9811 XDR xdr;
9812 int err;
9813
9814 /*
9815 * Check attrs to ensure it's a reparse point
9816 */
9817 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9818 return (NULL);
9819
9820 /*
9821 * Look for an NFS record and get the type and data
9822 */
9823 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9824 return (NULL);
9825
9826 /*
9827 * With the type and data, upcall to get the referral
9828 */
9829 bufsize = sizeof (buf);
9830 bzero(buf, sizeof (buf));
9831 err = reparse_kderef((const char *)stype, (const char *)sdata,
9832 buf, &bufsize);
9833 reparse_free(nvl);
9834
9835 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9836 char *, stype, char *, sdata, char *, buf, int, err);
9837 if (err) {
9838 cmn_err(CE_NOTE,
9839 "reparsed daemon not running: unable to get referral (%d)",
9840 err);
9841 return (NULL);
9842 }
9843
9844 /*
9845 * We get an XDR'ed record back from the kderef call
9846 */
9847 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9848 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9849 err = xdr_fs_locations4(&xdr, result);
9850 XDR_DESTROY(&xdr);
9851 if (err != TRUE) {
9852 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9853 int, err);
9854 return (NULL);
9855 }
9856
9857 /*
9858 * Look at path to recover fs_root, ignoring the leading '/'
9859 */
9860 (void) make_pathname4(vp->v_path, &result->fs_root);
9861
9862 return (result);
9863 }
9864
9865 char *
9866 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9867 {
9868 fs_locations4 *fsl;
9869 fs_location4 *fs;
9870 char *server, *path, *symbuf;
9871 static char *prefix = "/net/";
9872 int i, size, npaths;
9873 uint_t len;
9874
9875 /* Get the referral */
9876 if ((fsl = fetch_referral(vp, cr)) == NULL)
9877 return (NULL);
9878
9879 /* Deal with only the first location and first server */
9880 fs = &fsl->locations_val[0];
9881 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9882 if (server == NULL) {
9883 rfs4_free_fs_locations4(fsl);
9884 kmem_free(fsl, sizeof (fs_locations4));
9885 return (NULL);
9886 }
9887
9888 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9889 size = strlen(prefix) + len;
9890 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9891 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9892
9893 /* Allocate the symlink buffer and fill it */
9894 symbuf = kmem_zalloc(size, KM_SLEEP);
9895 (void) strcat(symbuf, prefix);
9896 (void) strcat(symbuf, server);
9897 kmem_free(server, len);
9898
9899 npaths = 0;
9900 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9901 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9902 if (path == NULL)
9903 continue;
9904 (void) strcat(symbuf, "/");
9905 (void) strcat(symbuf, path);
9906 npaths++;
9907 kmem_free(path, len);
9908 }
9909
9910 rfs4_free_fs_locations4(fsl);
9911 kmem_free(fsl, sizeof (fs_locations4));
9912
9913 if (strsz != NULL)
9914 *strsz = size;
9915 return (symbuf);
9916 }
9917
9918 /*
9919 * Check to see if we have a downrev Solaris client, so that we
9920 * can send it a symlink instead of a referral.
9921 */
9922 int
9923 client_is_downrev(struct svc_req *req)
9924 {
9925 struct sockaddr *ca;
9926 rfs4_clntip_t *ci;
9927 bool_t create = FALSE;
9928 int is_downrev;
9929
9930 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9931 ASSERT(ca);
9932 ci = rfs4_find_clntip(ca, &create);
9933 if (ci == NULL)
9934 return (0);
9935 is_downrev = ci->ri_no_referrals;
9936 rfs4_dbe_rele(ci->ri_dbe);
9937 return (is_downrev);
9938 }
9939
9940 /*
9941 * Do the main work of handling HA-NFSv4 Resource Group failover on
9942 * Sun Cluster.
9943 * We need to detect whether any RG admin paths have been added or removed,
9944 * and adjust resources accordingly.
9945 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9946 * order to scale, the list and array of paths need to be held in more
9947 * suitable data structures.
9948 */
9949 static void
9950 hanfsv4_failover(nfs4_srv_t *nsrv4)
9951 {
9952 int i, start_grace, numadded_paths = 0;
9953 char **added_paths = NULL;
9954 rfs4_dss_path_t *dss_path;
9955
9956 /*
9957 * Note: currently, dss_pathlist cannot be NULL, since
9958 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9959 * make the latter dynamically specified too, the following will
9960 * need to be adjusted.
9961 */
9962
9963 /*
9964 * First, look for removed paths: RGs that have been failed-over
9965 * away from this node.
9966 * Walk the "currently-serving" dss_pathlist and, for each
9967 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9968 * from nfsd. If not, that RG path has been removed.
9969 *
9970 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9971 * any duplicates.
9972 */
9973 dss_path = nsrv4->dss_pathlist;
9974 do {
9975 int found = 0;
9976 char *path = dss_path->path;
9977
9978 /* used only for non-HA so may not be removed */
9979 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9980 dss_path = dss_path->next;
9981 continue;
9982 }
9983
9984 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9985 int cmpret;
9986 char *newpath = rfs4_dss_newpaths[i];
9987
9988 /*
9989 * Since nfsd has sorted rfs4_dss_newpaths for us,
9990 * once the return from strcmp is negative we know
9991 * we've passed the point where "path" should be,
9992 * and can stop searching: "path" has been removed.
9993 */
9994 cmpret = strcmp(path, newpath);
9995 if (cmpret < 0)
9996 break;
9997 if (cmpret == 0) {
9998 found = 1;
9999 break;
10000 }
10001 }
10002
10003 if (found == 0) {
10004 unsigned index = dss_path->index;
10005 rfs4_servinst_t *sip = dss_path->sip;
10006 rfs4_dss_path_t *path_next = dss_path->next;
10007
10008 /*
10009 * This path has been removed.
10010 * We must clear out the servinst reference to
10011 * it, since it's now owned by another
10012 * node: we should not attempt to touch it.
10013 */
10014 ASSERT(dss_path == sip->dss_paths[index]);
10015 sip->dss_paths[index] = NULL;
10016
10017 /* remove from "currently-serving" list, and destroy */
10018 remque(dss_path);
10019 /* allow for NUL */
10020 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10021 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10022
10023 dss_path = path_next;
10024 } else {
10025 /* path was found; not removed */
10026 dss_path = dss_path->next;
10027 }
10028 } while (dss_path != nsrv4->dss_pathlist);
10029
10030 /*
10031 * Now, look for added paths: RGs that have been failed-over
10032 * to this node.
10033 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10034 * for each path, check if it is on the "currently-serving"
10035 * dss_pathlist. If not, that RG path has been added.
10036 *
10037 * Note: we don't do duplicate detection here; nfsd does that for us.
10038 *
10039 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10040 * an upper bound for the size needed for added_paths[numadded_paths].
10041 */
10042
10043 /* probably more space than we need, but guaranteed to be enough */
10044 if (rfs4_dss_numnewpaths > 0) {
10045 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10046 added_paths = kmem_zalloc(sz, KM_SLEEP);
10047 }
10048
10049 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10050 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10051 int found = 0;
10052 char *newpath = rfs4_dss_newpaths[i];
10053
10054 dss_path = nsrv4->dss_pathlist;
10055 do {
10056 char *path = dss_path->path;
10057
10058 /* used only for non-HA */
10059 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10060 dss_path = dss_path->next;
10061 continue;
10062 }
10063
10064 if (strncmp(path, newpath, strlen(path)) == 0) {
10065 found = 1;
10066 break;
10067 }
10068
10069 dss_path = dss_path->next;
10070 } while (dss_path != nsrv4->dss_pathlist);
10071
10072 if (found == 0) {
10073 added_paths[numadded_paths] = newpath;
10074 numadded_paths++;
10075 }
10076 }
10077
10078 /* did we find any added paths? */
10079 if (numadded_paths > 0) {
10080
10081 /* create a new server instance, and start its grace period */
10082 start_grace = 1;
10083 /* CSTYLED */
10084 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10085
10086 /* read in the stable storage state from these paths */
10087 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10088
10089 /*
10090 * Multiple failovers during a grace period will cause
10091 * clients of the same resource group to be partitioned
10092 * into different server instances, with different
10093 * grace periods. Since clients of the same resource
10094 * group must be subject to the same grace period,
10095 * we need to reset all currently active grace periods.
10096 */
10097 rfs4_grace_reset_all(nsrv4);
10098 }
10099
10100 if (rfs4_dss_numnewpaths > 0)
10101 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10102 }