1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All Rights Reserved
31 */
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 #include <sys/sdt.h>
58 #include <sys/ddi.h>
59 #include <sys/zone.h>
60
61 #include <fs/fs_reparse.h>
62
63 #include <rpc/types.h>
64 #include <rpc/auth.h>
65 #include <rpc/rpcsec_gss.h>
66 #include <rpc/svc.h>
67
68 #include <nfs/nfs.h>
69 #include <nfs/export.h>
70 #include <nfs/nfs_cmd.h>
71 #include <nfs/lm.h>
72 #include <nfs/nfs4.h>
73
74 #include <sys/strsubr.h>
75 #include <sys/strsun.h>
76
77 #include <inet/common.h>
78 #include <inet/ip.h>
79 #include <inet/ip6.h>
80
81 #include <sys/tsol/label.h>
82 #include <sys/tsol/tndb.h>
83
84 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
86 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
87 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
88 extern struct svc_ops rdma_svc_ops;
89 extern int nfs_loaned_buffers;
90 /* End of Tunables */
91
92 static int rdma_setup_read_data4(READ4args *, READ4res *);
93
94 /*
95 * Used to bump the stateid4.seqid value and show changes in the stateid
96 */
97 #define next_stateid(sp) (++(sp)->bits.chgseq)
98
99 /*
100 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
101 * This is used to return NFS4ERR_TOOSMALL when clients specify
102 * maxcount that isn't large enough to hold the smallest possible
103 * XDR encoded dirent.
104 *
105 * sizeof cookie (8 bytes) +
106 * sizeof name_len (4 bytes) +
107 * sizeof smallest (padded) name (4 bytes) +
108 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
109 * sizeof attrlist4_len (4 bytes) +
110 * sizeof next boolean (4 bytes)
111 *
112 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
113 * the smallest possible entry4 (assumes no attrs requested).
114 * sizeof nfsstat4 (4 bytes) +
115 * sizeof verifier4 (8 bytes) +
116 * sizeof entry4list bool (4 bytes) +
117 * sizeof entry4 (36 bytes) +
118 * sizeof eof bool (4 bytes)
119 *
120 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
121 * VOP_READDIR. Its value is the size of the maximum possible dirent
122 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
123 * required for a given name length. MAXNAMELEN is the maximum
124 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
125 * macros are to allow for . and .. entries -- just a minor tweak to try
126 * and guarantee that buffer we give to VOP_READDIR will be large enough
127 * to hold ., .., and the largest possible solaris dirent64.
128 */
129 #define RFS4_MINLEN_ENTRY4 36
130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
131 #define RFS4_MINLEN_RDDIR_BUF \
132 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
133
134 /*
135 * It would be better to pad to 4 bytes since that's what XDR would do,
136 * but the dirents UFS gives us are already padded to 8, so just take
137 * what we're given. Dircount is only a hint anyway. Currently the
138 * solaris kernel is ASCII only, so there's no point in calling the
139 * UTF8 functions.
140 *
141 * dirent64: named padded to provide 8 byte struct alignment
142 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
143 *
144 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
145 *
146 */
147 #define DIRENT64_TO_DIRCOUNT(dp) \
148 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
149
150 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
151
152 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
153
154 u_longlong_t nfs4_srv_caller_id;
155 uint_t nfs4_srv_vkey = 0;
156
157 verifier4 Write4verf;
158 verifier4 Readdir4verf;
159
160 void rfs4_init_compound_state(struct compound_state *);
161
162 static void nullfree(caddr_t);
163 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 struct compound_state *);
165 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create_free(nfs_resop4 *resop);
174 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
175 struct svc_req *, struct compound_state *);
176 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 struct compound_state *);
180 static void rfs4_op_getattr_free(nfs_resop4 *);
181 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
183 static void rfs4_op_getfh_free(nfs_resop4 *);
184 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 struct compound_state *);
186 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void lock_denied_free(nfs_resop4 *);
191 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 struct compound_state *);
193 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 struct compound_state *);
195 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
200 struct svc_req *req, struct compound_state *cs);
201 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 struct compound_state *);
203 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 struct compound_state *);
205 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
206 struct svc_req *, struct compound_state *);
207 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
208 struct svc_req *, struct compound_state *);
209 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 struct compound_state *);
211 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 struct compound_state *);
213 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_read_free(nfs_resop4 *);
218 static void rfs4_op_readdir_free(nfs_resop4 *resop);
219 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 struct compound_state *);
221 static void rfs4_op_readlink_free(nfs_resop4 *);
222 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
223 struct svc_req *, struct compound_state *);
224 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
225 struct compound_state *);
226 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 struct compound_state *);
228 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
241 struct svc_req *, struct compound_state *);
242 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *req, struct compound_state *);
244 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
245 struct compound_state *);
246 static void rfs4_op_secinfo_free(nfs_resop4 *);
247
248 static nfsstat4 check_open_access(uint32_t,
249 struct compound_state *, struct svc_req *);
250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
251 void rfs4_ss_clid(rfs4_client_t *);
252
253 /*
254 * translation table for attrs
255 */
256 struct nfs4_ntov_table {
257 union nfs4_attr_u *na;
258 uint8_t amap[NFS4_MAXNUM_ATTRS];
259 int attrcnt;
260 bool_t vfsstat;
261 };
262
263 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
264 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
265 struct nfs4_svgetit_arg *sargp);
266
267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
268 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
269 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
270
271 fem_t *deleg_rdops;
272 fem_t *deleg_wrops;
273
274 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
275 kmutex_t rfs4_servinst_lock; /* protects linked list */
276 int rfs4_seen_first_compound; /* set first time we see one */
277
278 /*
279 * NFS4 op dispatch table
280 */
281
282 struct rfsv4disp {
283 void (*dis_proc)(); /* proc to call */
284 void (*dis_resfree)(); /* frees space allocated by proc */
285 int dis_flags; /* RPC_IDEMPOTENT, etc... */
286 };
287
288 static struct rfsv4disp rfsv4disptab[] = {
289 /*
290 * NFS VERSION 4
291 */
292
293 /* RFS_NULL = 0 */
294 {rfs4_op_illegal, nullfree, 0},
295
296 /* UNUSED = 1 */
297 {rfs4_op_illegal, nullfree, 0},
298
299 /* UNUSED = 2 */
300 {rfs4_op_illegal, nullfree, 0},
301
302 /* OP_ACCESS = 3 */
303 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
304
305 /* OP_CLOSE = 4 */
306 {rfs4_op_close, nullfree, 0},
307
308 /* OP_COMMIT = 5 */
309 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
310
311 /* OP_CREATE = 6 */
312 {rfs4_op_create, nullfree, 0},
313
314 /* OP_DELEGPURGE = 7 */
315 {rfs4_op_delegpurge, nullfree, 0},
316
317 /* OP_DELEGRETURN = 8 */
318 {rfs4_op_delegreturn, nullfree, 0},
319
320 /* OP_GETATTR = 9 */
321 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
322
323 /* OP_GETFH = 10 */
324 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
325
326 /* OP_LINK = 11 */
327 {rfs4_op_link, nullfree, 0},
328
329 /* OP_LOCK = 12 */
330 {rfs4_op_lock, lock_denied_free, 0},
331
332 /* OP_LOCKT = 13 */
333 {rfs4_op_lockt, lock_denied_free, 0},
334
335 /* OP_LOCKU = 14 */
336 {rfs4_op_locku, nullfree, 0},
337
338 /* OP_LOOKUP = 15 */
339 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
340
341 /* OP_LOOKUPP = 16 */
342 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
343
344 /* OP_NVERIFY = 17 */
345 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
346
347 /* OP_OPEN = 18 */
348 {rfs4_op_open, rfs4_free_reply, 0},
349
350 /* OP_OPENATTR = 19 */
351 {rfs4_op_openattr, nullfree, 0},
352
353 /* OP_OPEN_CONFIRM = 20 */
354 {rfs4_op_open_confirm, nullfree, 0},
355
356 /* OP_OPEN_DOWNGRADE = 21 */
357 {rfs4_op_open_downgrade, nullfree, 0},
358
359 /* OP_OPEN_PUTFH = 22 */
360 {rfs4_op_putfh, nullfree, RPC_ALL},
361
362 /* OP_PUTPUBFH = 23 */
363 {rfs4_op_putpubfh, nullfree, RPC_ALL},
364
365 /* OP_PUTROOTFH = 24 */
366 {rfs4_op_putrootfh, nullfree, RPC_ALL},
367
368 /* OP_READ = 25 */
369 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
370
371 /* OP_READDIR = 26 */
372 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
373
374 /* OP_READLINK = 27 */
375 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
376
377 /* OP_REMOVE = 28 */
378 {rfs4_op_remove, nullfree, 0},
379
380 /* OP_RENAME = 29 */
381 {rfs4_op_rename, nullfree, 0},
382
383 /* OP_RENEW = 30 */
384 {rfs4_op_renew, nullfree, 0},
385
386 /* OP_RESTOREFH = 31 */
387 {rfs4_op_restorefh, nullfree, RPC_ALL},
388
389 /* OP_SAVEFH = 32 */
390 {rfs4_op_savefh, nullfree, RPC_ALL},
391
392 /* OP_SECINFO = 33 */
393 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
394
395 /* OP_SETATTR = 34 */
396 {rfs4_op_setattr, nullfree, 0},
397
398 /* OP_SETCLIENTID = 35 */
399 {rfs4_op_setclientid, nullfree, 0},
400
401 /* OP_SETCLIENTID_CONFIRM = 36 */
402 {rfs4_op_setclientid_confirm, nullfree, 0},
403
404 /* OP_VERIFY = 37 */
405 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
406
407 /* OP_WRITE = 38 */
408 {rfs4_op_write, nullfree, 0},
409
410 /* OP_RELEASE_LOCKOWNER = 39 */
411 {rfs4_op_release_lockowner, nullfree, 0},
412 };
413
414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
415
416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
417
418 #ifdef DEBUG
419
420 int rfs4_fillone_debug = 0;
421 int rfs4_no_stub_access = 1;
422 int rfs4_rddir_debug = 0;
423
424 static char *rfs4_op_string[] = {
425 "rfs4_op_null",
426 "rfs4_op_1 unused",
427 "rfs4_op_2 unused",
428 "rfs4_op_access",
429 "rfs4_op_close",
430 "rfs4_op_commit",
431 "rfs4_op_create",
432 "rfs4_op_delegpurge",
433 "rfs4_op_delegreturn",
434 "rfs4_op_getattr",
435 "rfs4_op_getfh",
436 "rfs4_op_link",
437 "rfs4_op_lock",
438 "rfs4_op_lockt",
439 "rfs4_op_locku",
440 "rfs4_op_lookup",
441 "rfs4_op_lookupp",
442 "rfs4_op_nverify",
443 "rfs4_op_open",
444 "rfs4_op_openattr",
445 "rfs4_op_open_confirm",
446 "rfs4_op_open_downgrade",
447 "rfs4_op_putfh",
448 "rfs4_op_putpubfh",
449 "rfs4_op_putrootfh",
450 "rfs4_op_read",
451 "rfs4_op_readdir",
452 "rfs4_op_readlink",
453 "rfs4_op_remove",
454 "rfs4_op_rename",
455 "rfs4_op_renew",
456 "rfs4_op_restorefh",
457 "rfs4_op_savefh",
458 "rfs4_op_secinfo",
459 "rfs4_op_setattr",
460 "rfs4_op_setclientid",
461 "rfs4_op_setclient_confirm",
462 "rfs4_op_verify",
463 "rfs4_op_write",
464 "rfs4_op_release_lockowner",
465 "rfs4_op_illegal"
466 };
467 #endif
468
469 void rfs4_ss_chkclid(rfs4_client_t *);
470
471 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
472
473 extern void rfs4_free_fs_locations4(fs_locations4 *);
474
475 #ifdef nextdp
476 #undef nextdp
477 #endif
478 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
479
480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
481 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
482 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
483 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
484 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
485 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
486 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
487 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
488 NULL, NULL
489 };
490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
491 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
492 VOPNAME_READ, { .femop_read = deleg_wr_read },
493 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
494 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
495 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
496 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
497 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
498 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
499 NULL, NULL
500 };
501
502 int
503 rfs4_srvrinit(void)
504 {
505 timespec32_t verf;
506 int error;
507 extern void rfs4_attr_init();
508 extern krwlock_t rfs4_deleg_policy_lock;
509
510 /*
511 * The following algorithm attempts to find a unique verifier
512 * to be used as the write verifier returned from the server
513 * to the client. It is important that this verifier change
514 * whenever the server reboots. Of secondary importance, it
515 * is important for the verifier to be unique between two
516 * different servers.
517 *
518 * Thus, an attempt is made to use the system hostid and the
519 * current time in seconds when the nfssrv kernel module is
520 * loaded. It is assumed that an NFS server will not be able
521 * to boot and then to reboot in less than a second. If the
522 * hostid has not been set, then the current high resolution
523 * time is used. This will ensure different verifiers each
524 * time the server reboots and minimize the chances that two
525 * different servers will have the same verifier.
526 * XXX - this is broken on LP64 kernels.
527 */
528 verf.tv_sec = (time_t)zone_get_hostid(NULL);
529 if (verf.tv_sec != 0) {
530 verf.tv_nsec = gethrestime_sec();
531 } else {
532 timespec_t tverf;
533
534 gethrestime(&tverf);
535 verf.tv_sec = (time_t)tverf.tv_sec;
536 verf.tv_nsec = tverf.tv_nsec;
537 }
538
539 Write4verf = *(uint64_t *)&verf;
540
541 rfs4_attr_init();
542 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
543
544 /* Used to manage create/destroy of server state */
545 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
546
547 /* Used to manage access to server instance linked list */
548 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
549
550 /* Used to manage access to rfs4_deleg_policy */
551 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
552
553 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
554 if (error != 0) {
555 rfs4_disable_delegation();
556 } else {
557 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
558 &deleg_wrops);
559 if (error != 0) {
560 rfs4_disable_delegation();
561 fem_free(deleg_rdops);
562 }
563 }
564
565 nfs4_srv_caller_id = fs_new_caller_id();
566
567 lockt_sysid = lm_alloc_sysidt();
568
569 vsd_create(&nfs4_srv_vkey, NULL);
570
571 return (0);
572 }
573
574 void
575 rfs4_srvrfini(void)
576 {
577 extern krwlock_t rfs4_deleg_policy_lock;
578
579 if (lockt_sysid != LM_NOSYSID) {
580 lm_free_sysidt(lockt_sysid);
581 lockt_sysid = LM_NOSYSID;
582 }
583
584 mutex_destroy(&rfs4_deleg_lock);
585 mutex_destroy(&rfs4_state_lock);
586 rw_destroy(&rfs4_deleg_policy_lock);
587
588 fem_free(deleg_rdops);
589 fem_free(deleg_wrops);
590 }
591
592 void
593 rfs4_init_compound_state(struct compound_state *cs)
594 {
595 bzero(cs, sizeof (*cs));
596 cs->cont = TRUE;
597 cs->access = CS_ACCESS_DENIED;
598 cs->deleg = FALSE;
599 cs->mandlock = FALSE;
600 cs->fh.nfs_fh4_val = cs->fhbuf;
601 }
602
603 void
604 rfs4_grace_start(rfs4_servinst_t *sip)
605 {
606 rw_enter(&sip->rwlock, RW_WRITER);
607 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
608 sip->grace_period = rfs4_grace_period;
609 rw_exit(&sip->rwlock);
610 }
611
612 /*
613 * returns true if the instance's grace period has never been started
614 */
615 int
616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
617 {
618 time_t start_time;
619
620 rw_enter(&sip->rwlock, RW_READER);
621 start_time = sip->start_time;
622 rw_exit(&sip->rwlock);
623
624 return (start_time == 0);
625 }
626
627 /*
628 * Indicates if server instance is within the
629 * grace period.
630 */
631 int
632 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
633 {
634 time_t grace_expiry;
635
636 rw_enter(&sip->rwlock, RW_READER);
637 grace_expiry = sip->start_time + sip->grace_period;
638 rw_exit(&sip->rwlock);
639
640 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
641 }
642
643 int
644 rfs4_clnt_in_grace(rfs4_client_t *cp)
645 {
646 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
647
648 return (rfs4_servinst_in_grace(cp->rc_server_instance));
649 }
650
651 /*
652 * reset all currently active grace periods
653 */
654 void
655 rfs4_grace_reset_all(void)
656 {
657 rfs4_servinst_t *sip;
658
659 mutex_enter(&rfs4_servinst_lock);
660 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
661 if (rfs4_servinst_in_grace(sip))
662 rfs4_grace_start(sip);
663 mutex_exit(&rfs4_servinst_lock);
664 }
665
666 /*
667 * start any new instances' grace periods
668 */
669 void
670 rfs4_grace_start_new(void)
671 {
672 rfs4_servinst_t *sip;
673
674 mutex_enter(&rfs4_servinst_lock);
675 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
676 if (rfs4_servinst_grace_new(sip))
677 rfs4_grace_start(sip);
678 mutex_exit(&rfs4_servinst_lock);
679 }
680
681 static rfs4_dss_path_t *
682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
683 {
684 size_t len;
685 rfs4_dss_path_t *dss_path;
686
687 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
688
689 /*
690 * Take a copy of the string, since the original may be overwritten.
691 * Sadly, no strdup() in the kernel.
692 */
693 /* allow for NUL */
694 len = strlen(path) + 1;
695 dss_path->path = kmem_alloc(len, KM_SLEEP);
696 (void) strlcpy(dss_path->path, path, len);
697
698 /* associate with servinst */
699 dss_path->sip = sip;
700 dss_path->index = index;
701
702 /*
703 * Add to list of served paths.
704 * No locking required, as we're only ever called at startup.
705 */
706 if (rfs4_dss_pathlist == NULL) {
707 /* this is the first dss_path_t */
708
709 /* needed for insque/remque */
710 dss_path->next = dss_path->prev = dss_path;
711
712 rfs4_dss_pathlist = dss_path;
713 } else {
714 insque(dss_path, rfs4_dss_pathlist);
715 }
716
717 return (dss_path);
718 }
719
720 /*
721 * Create a new server instance, and make it the currently active instance.
722 * Note that starting the grace period too early will reduce the clients'
723 * recovery window.
724 */
725 void
726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
727 {
728 unsigned i;
729 rfs4_servinst_t *sip;
730 rfs4_oldstate_t *oldstate;
731
732 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
733 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
734
735 sip->start_time = (time_t)0;
736 sip->grace_period = (time_t)0;
737 sip->next = NULL;
738 sip->prev = NULL;
739
740 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
741 /*
742 * This initial dummy entry is required to setup for insque/remque.
743 * It must be skipped over whenever the list is traversed.
744 */
745 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
746 /* insque/remque require initial list entry to be self-terminated */
747 oldstate->next = oldstate;
748 oldstate->prev = oldstate;
749 sip->oldstate = oldstate;
750
751
752 sip->dss_npaths = dss_npaths;
753 sip->dss_paths = kmem_alloc(dss_npaths *
754 sizeof (rfs4_dss_path_t *), KM_SLEEP);
755
756 for (i = 0; i < dss_npaths; i++) {
757 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
758 }
759
760 mutex_enter(&rfs4_servinst_lock);
761 if (rfs4_cur_servinst != NULL) {
762 /* add to linked list */
763 sip->prev = rfs4_cur_servinst;
764 rfs4_cur_servinst->next = sip;
765 }
766 if (start_grace)
767 rfs4_grace_start(sip);
768 /* make the new instance "current" */
769 rfs4_cur_servinst = sip;
770
771 mutex_exit(&rfs4_servinst_lock);
772 }
773
774 /*
775 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
776 * all instances directly.
777 */
778 void
779 rfs4_servinst_destroy_all(void)
780 {
781 rfs4_servinst_t *sip, *prev, *current;
782 #ifdef DEBUG
783 int n = 0;
784 #endif
785
786 mutex_enter(&rfs4_servinst_lock);
787 ASSERT(rfs4_cur_servinst != NULL);
788 current = rfs4_cur_servinst;
789 rfs4_cur_servinst = NULL;
790 for (sip = current; sip != NULL; sip = prev) {
791 prev = sip->prev;
792 rw_destroy(&sip->rwlock);
793 if (sip->oldstate)
794 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
795 if (sip->dss_paths)
796 kmem_free(sip->dss_paths,
797 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
798 kmem_free(sip, sizeof (rfs4_servinst_t));
799 #ifdef DEBUG
800 n++;
801 #endif
802 }
803 mutex_exit(&rfs4_servinst_lock);
804 }
805
806 /*
807 * Assign the current server instance to a client_t.
808 * Should be called with cp->rc_dbe held.
809 */
810 void
811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
812 {
813 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
814
815 /*
816 * The lock ensures that if the current instance is in the process
817 * of changing, we will see the new one.
818 */
819 mutex_enter(&rfs4_servinst_lock);
820 cp->rc_server_instance = sip;
821 mutex_exit(&rfs4_servinst_lock);
822 }
823
824 rfs4_servinst_t *
825 rfs4_servinst(rfs4_client_t *cp)
826 {
827 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
828
829 return (cp->rc_server_instance);
830 }
831
832 /* ARGSUSED */
833 static void
834 nullfree(caddr_t resop)
835 {
836 }
837
838 /*
839 * This is a fall-through for invalid or not implemented (yet) ops
840 */
841 /* ARGSUSED */
842 static void
843 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
844 struct compound_state *cs)
845 {
846 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
847 }
848
849 /*
850 * Check if the security flavor, nfsnum, is in the flavor_list.
851 */
852 bool_t
853 in_flavor_list(int nfsnum, int *flavor_list, int count)
854 {
855 int i;
856
857 for (i = 0; i < count; i++) {
858 if (nfsnum == flavor_list[i])
859 return (TRUE);
860 }
861 return (FALSE);
862 }
863
864 /*
865 * Used by rfs4_op_secinfo to get the security information from the
866 * export structure associated with the component.
867 */
868 /* ARGSUSED */
869 static nfsstat4
870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
871 {
872 int error, different_export = 0;
873 vnode_t *dvp, *vp;
874 struct exportinfo *exi = NULL;
875 fid_t fid;
876 uint_t count, i;
877 secinfo4 *resok_val;
878 struct secinfo *secp;
879 seconfig_t *si;
880 bool_t did_traverse = FALSE;
881 int dotdot, walk;
882
883 dvp = cs->vp;
884 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
885
886 /*
887 * If dotdotting, then need to check whether it's above the
888 * root of a filesystem, or above an export point.
889 */
890 if (dotdot) {
891
892 /*
893 * If dotdotting at the root of a filesystem, then
894 * need to traverse back to the mounted-on filesystem
895 * and do the dotdot lookup there.
896 */
897 if (cs->vp->v_flag & VROOT) {
898
899 /*
900 * If at the system root, then can
901 * go up no further.
902 */
903 if (VN_CMP(dvp, rootdir))
904 return (puterrno4(ENOENT));
905
906 /*
907 * Traverse back to the mounted-on filesystem
908 */
909 dvp = untraverse(cs->vp);
910
911 /*
912 * Set the different_export flag so we remember
913 * to pick up a new exportinfo entry for
914 * this new filesystem.
915 */
916 different_export = 1;
917 } else {
918
919 /*
920 * If dotdotting above an export point then set
921 * the different_export to get new export info.
922 */
923 different_export = nfs_exported(cs->exi, cs->vp);
924 }
925 }
926
927 /*
928 * Get the vnode for the component "nm".
929 */
930 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
931 NULL, NULL, NULL);
932 if (error)
933 return (puterrno4(error));
934
935 /*
936 * If the vnode is in a pseudo filesystem, or if the security flavor
937 * used in the request is valid but not an explicitly shared flavor,
938 * or the access bit indicates that this is a limited access,
939 * check whether this vnode is visible.
940 */
941 if (!different_export &&
942 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
943 cs->access & CS_ACCESS_LIMITED)) {
944 if (! nfs_visible(cs->exi, vp, &different_export)) {
945 VN_RELE(vp);
946 return (puterrno4(ENOENT));
947 }
948 }
949
950 /*
951 * If it's a mountpoint, then traverse it.
952 */
953 if (vn_ismntpt(vp)) {
954 if ((error = traverse(&vp)) != 0) {
955 VN_RELE(vp);
956 return (puterrno4(error));
957 }
958 /* remember that we had to traverse mountpoint */
959 did_traverse = TRUE;
960 different_export = 1;
961 } else if (vp->v_vfsp != dvp->v_vfsp) {
962 /*
963 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
964 * then vp is probably an LOFS object. We don't need the
965 * realvp, we just need to know that we might have crossed
966 * a server fs boundary and need to call checkexport4.
967 * (LOFS lookup hides server fs mountpoints, and actually calls
968 * traverse)
969 */
970 different_export = 1;
971 }
972
973 /*
974 * Get the export information for it.
975 */
976 if (different_export) {
977
978 bzero(&fid, sizeof (fid));
979 fid.fid_len = MAXFIDSZ;
980 error = vop_fid_pseudo(vp, &fid);
981 if (error) {
982 VN_RELE(vp);
983 return (puterrno4(error));
984 }
985
986 if (dotdot)
987 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
988 else
989 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
990
991 if (exi == NULL) {
992 if (did_traverse == TRUE) {
993 /*
994 * If this vnode is a mounted-on vnode,
995 * but the mounted-on file system is not
996 * exported, send back the secinfo for
997 * the exported node that the mounted-on
998 * vnode lives in.
999 */
1000 exi = cs->exi;
1001 } else {
1002 VN_RELE(vp);
1003 return (puterrno4(EACCES));
1004 }
1005 }
1006 } else {
1007 exi = cs->exi;
1008 }
1009 ASSERT(exi != NULL);
1010
1011
1012 /*
1013 * Create the secinfo result based on the security information
1014 * from the exportinfo structure (exi).
1015 *
1016 * Return all flavors for a pseudo node.
1017 * For a real export node, return the flavor that the client
1018 * has access with.
1019 */
1020 ASSERT(RW_LOCK_HELD(&exported_lock));
1021 if (PSEUDO(exi)) {
1022 count = exi->exi_export.ex_seccnt; /* total sec count */
1023 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024 secp = exi->exi_export.ex_secinfo;
1025
1026 for (i = 0; i < count; i++) {
1027 si = &secp[i].s_secinfo;
1028 resok_val[i].flavor = si->sc_rpcnum;
1029 if (resok_val[i].flavor == RPCSEC_GSS) {
1030 rpcsec_gss_info *info;
1031
1032 info = &resok_val[i].flavor_info;
1033 info->qop = si->sc_qop;
1034 info->service = (rpc_gss_svc_t)si->sc_service;
1035
1036 /* get oid opaque data */
1037 info->oid.sec_oid4_len =
1038 si->sc_gss_mech_type->length;
1039 info->oid.sec_oid4_val = kmem_alloc(
1040 si->sc_gss_mech_type->length, KM_SLEEP);
1041 bcopy(
1042 si->sc_gss_mech_type->elements,
1043 info->oid.sec_oid4_val,
1044 info->oid.sec_oid4_len);
1045 }
1046 }
1047 resp->SECINFO4resok_len = count;
1048 resp->SECINFO4resok_val = resok_val;
1049 } else {
1050 int ret_cnt = 0, k = 0;
1051 int *flavor_list;
1052
1053 count = exi->exi_export.ex_seccnt; /* total sec count */
1054 secp = exi->exi_export.ex_secinfo;
1055
1056 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1057 /* find out which flavors to return */
1058 for (i = 0; i < count; i ++) {
1059 int access, flavor, perm;
1060
1061 flavor = secp[i].s_secinfo.sc_nfsnum;
1062 perm = secp[i].s_flags;
1063
1064 access = nfsauth4_secinfo_access(exi, cs->req,
1065 flavor, perm, cs->basecr);
1066
1067 if (! (access & NFSAUTH_DENIED) &&
1068 ! (access & NFSAUTH_WRONGSEC)) {
1069 flavor_list[ret_cnt] = flavor;
1070 ret_cnt++;
1071 }
1072 }
1073
1074 /* Create the returning SECINFO value */
1075 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1076
1077 for (i = 0; i < count; i++) {
1078 /*
1079 * If the flavor is in the flavor list,
1080 * fill in resok_val.
1081 */
1082 si = &secp[i].s_secinfo;
1083 if (in_flavor_list(si->sc_nfsnum,
1084 flavor_list, ret_cnt)) {
1085 resok_val[k].flavor = si->sc_rpcnum;
1086 if (resok_val[k].flavor == RPCSEC_GSS) {
1087 rpcsec_gss_info *info;
1088
1089 info = &resok_val[k].flavor_info;
1090 info->qop = si->sc_qop;
1091 info->service = (rpc_gss_svc_t)
1092 si->sc_service;
1093
1094 /* get oid opaque data */
1095 info->oid.sec_oid4_len =
1096 si->sc_gss_mech_type->length;
1097 info->oid.sec_oid4_val = kmem_alloc(
1098 si->sc_gss_mech_type->length,
1099 KM_SLEEP);
1100 bcopy(si->sc_gss_mech_type->elements,
1101 info->oid.sec_oid4_val,
1102 info->oid.sec_oid4_len);
1103 }
1104 k++;
1105 }
1106 if (k >= ret_cnt)
1107 break;
1108 }
1109 resp->SECINFO4resok_len = ret_cnt;
1110 resp->SECINFO4resok_val = resok_val;
1111 kmem_free(flavor_list, count * sizeof (int));
1112 }
1113
1114 VN_RELE(vp);
1115 return (NFS4_OK);
1116 }
1117
1118 /*
1119 * SECINFO (Operation 33): Obtain required security information on
1120 * the component name in the format of (security-mechanism-oid, qop, service)
1121 * triplets.
1122 */
1123 /* ARGSUSED */
1124 static void
1125 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1126 struct compound_state *cs)
1127 {
1128 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1129 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1130 utf8string *utfnm = &args->name;
1131 uint_t len;
1132 char *nm;
1133 struct sockaddr *ca;
1134 char *name = NULL;
1135 nfsstat4 status = NFS4_OK;
1136
1137 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1138 SECINFO4args *, args);
1139
1140 /*
1141 * Current file handle (cfh) should have been set before getting
1142 * into this function. If not, return error.
1143 */
1144 if (cs->vp == NULL) {
1145 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1146 goto out;
1147 }
1148
1149 if (cs->vp->v_type != VDIR) {
1150 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1151 goto out;
1152 }
1153
1154 /*
1155 * Verify the component name. If failed, error out, but
1156 * do not error out if the component name is a "..".
1157 * SECINFO will return its parents secinfo data for SECINFO "..".
1158 */
1159 status = utf8_dir_verify(utfnm);
1160 if (status != NFS4_OK) {
1161 if (utfnm->utf8string_len != 2 ||
1162 utfnm->utf8string_val[0] != '.' ||
1163 utfnm->utf8string_val[1] != '.') {
1164 *cs->statusp = resp->status = status;
1165 goto out;
1166 }
1167 }
1168
1169 nm = utf8_to_str(utfnm, &len, NULL);
1170 if (nm == NULL) {
1171 *cs->statusp = resp->status = NFS4ERR_INVAL;
1172 goto out;
1173 }
1174
1175 if (len > MAXNAMELEN) {
1176 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1177 kmem_free(nm, len);
1178 goto out;
1179 }
1180
1181 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1182 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1183 MAXPATHLEN + 1);
1184
1185 if (name == NULL) {
1186 *cs->statusp = resp->status = NFS4ERR_INVAL;
1187 kmem_free(nm, len);
1188 goto out;
1189 }
1190
1191
1192 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1193
1194 if (name != nm)
1195 kmem_free(name, MAXPATHLEN + 1);
1196 kmem_free(nm, len);
1197
1198 out:
1199 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1200 SECINFO4res *, resp);
1201 }
1202
1203 /*
1204 * Free SECINFO result.
1205 */
1206 /* ARGSUSED */
1207 static void
1208 rfs4_op_secinfo_free(nfs_resop4 *resop)
1209 {
1210 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1211 int count, i;
1212 secinfo4 *resok_val;
1213
1214 /* If this is not an Ok result, nothing to free. */
1215 if (resp->status != NFS4_OK) {
1216 return;
1217 }
1218
1219 count = resp->SECINFO4resok_len;
1220 resok_val = resp->SECINFO4resok_val;
1221
1222 for (i = 0; i < count; i++) {
1223 if (resok_val[i].flavor == RPCSEC_GSS) {
1224 rpcsec_gss_info *info;
1225
1226 info = &resok_val[i].flavor_info;
1227 kmem_free(info->oid.sec_oid4_val,
1228 info->oid.sec_oid4_len);
1229 }
1230 }
1231 kmem_free(resok_val, count * sizeof (secinfo4));
1232 resp->SECINFO4resok_len = 0;
1233 resp->SECINFO4resok_val = NULL;
1234 }
1235
1236 /* ARGSUSED */
1237 static void
1238 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1239 struct compound_state *cs)
1240 {
1241 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1242 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1243 int error;
1244 vnode_t *vp;
1245 struct vattr va;
1246 int checkwriteperm;
1247 cred_t *cr = cs->cr;
1248 bslabel_t *clabel, *slabel;
1249 ts_label_t *tslabel;
1250 boolean_t admin_low_client;
1251
1252 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1253 ACCESS4args *, args);
1254
1255 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1256 if (cs->access == CS_ACCESS_DENIED) {
1257 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1258 goto out;
1259 }
1260 #endif
1261 if (cs->vp == NULL) {
1262 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1263 goto out;
1264 }
1265
1266 ASSERT(cr != NULL);
1267
1268 vp = cs->vp;
1269
1270 /*
1271 * If the file system is exported read only, it is not appropriate
1272 * to check write permissions for regular files and directories.
1273 * Special files are interpreted by the client, so the underlying
1274 * permissions are sent back to the client for interpretation.
1275 */
1276 if (rdonly4(req, cs) &&
1277 (vp->v_type == VREG || vp->v_type == VDIR))
1278 checkwriteperm = 0;
1279 else
1280 checkwriteperm = 1;
1281
1282 /*
1283 * XXX
1284 * We need the mode so that we can correctly determine access
1285 * permissions relative to a mandatory lock file. Access to
1286 * mandatory lock files is denied on the server, so it might
1287 * as well be reflected to the server during the open.
1288 */
1289 va.va_mask = AT_MODE;
1290 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1291 if (error) {
1292 *cs->statusp = resp->status = puterrno4(error);
1293 goto out;
1294 }
1295 resp->access = 0;
1296 resp->supported = 0;
1297
1298 if (is_system_labeled()) {
1299 ASSERT(req->rq_label != NULL);
1300 clabel = req->rq_label;
1301 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1302 "got client label from request(1)",
1303 struct svc_req *, req);
1304 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1305 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1306 *cs->statusp = resp->status = puterrno4(EACCES);
1307 goto out;
1308 }
1309 slabel = label2bslabel(tslabel);
1310 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1311 char *, "got server label(1) for vp(2)",
1312 bslabel_t *, slabel, vnode_t *, vp);
1313
1314 admin_low_client = B_FALSE;
1315 } else
1316 admin_low_client = B_TRUE;
1317 }
1318
1319 if (args->access & ACCESS4_READ) {
1320 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1321 if (!error && !MANDLOCK(vp, va.va_mode) &&
1322 (!is_system_labeled() || admin_low_client ||
1323 bldominates(clabel, slabel)))
1324 resp->access |= ACCESS4_READ;
1325 resp->supported |= ACCESS4_READ;
1326 }
1327 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1328 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1329 if (!error && (!is_system_labeled() || admin_low_client ||
1330 bldominates(clabel, slabel)))
1331 resp->access |= ACCESS4_LOOKUP;
1332 resp->supported |= ACCESS4_LOOKUP;
1333 }
1334 if (checkwriteperm &&
1335 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1336 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1337 if (!error && !MANDLOCK(vp, va.va_mode) &&
1338 (!is_system_labeled() || admin_low_client ||
1339 blequal(clabel, slabel)))
1340 resp->access |=
1341 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1342 resp->supported |=
1343 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1344 }
1345
1346 if (checkwriteperm &&
1347 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1348 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1349 if (!error && (!is_system_labeled() || admin_low_client ||
1350 blequal(clabel, slabel)))
1351 resp->access |= ACCESS4_DELETE;
1352 resp->supported |= ACCESS4_DELETE;
1353 }
1354 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1355 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1356 if (!error && !MANDLOCK(vp, va.va_mode) &&
1357 (!is_system_labeled() || admin_low_client ||
1358 bldominates(clabel, slabel)))
1359 resp->access |= ACCESS4_EXECUTE;
1360 resp->supported |= ACCESS4_EXECUTE;
1361 }
1362
1363 if (is_system_labeled() && !admin_low_client)
1364 label_rele(tslabel);
1365
1366 *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369 ACCESS4res *, resp);
1370 }
1371
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375 struct compound_state *cs)
1376 {
1377 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379 int error;
1380 vnode_t *vp = cs->vp;
1381 cred_t *cr = cs->cr;
1382 vattr_t va;
1383
1384 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385 COMMIT4args *, args);
1386
1387 if (vp == NULL) {
1388 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389 goto out;
1390 }
1391 if (cs->access == CS_ACCESS_DENIED) {
1392 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393 goto out;
1394 }
1395
1396 if (args->offset + args->count < args->offset) {
1397 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398 goto out;
1399 }
1400
1401 va.va_mask = AT_UID;
1402 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1403
1404 /*
1405 * If we can't get the attributes, then we can't do the
1406 * right access checking. So, we'll fail the request.
1407 */
1408 if (error) {
1409 *cs->statusp = resp->status = puterrno4(error);
1410 goto out;
1411 }
1412 if (rdonly4(req, cs)) {
1413 *cs->statusp = resp->status = NFS4ERR_ROFS;
1414 goto out;
1415 }
1416
1417 if (vp->v_type != VREG) {
1418 if (vp->v_type == VDIR)
1419 resp->status = NFS4ERR_ISDIR;
1420 else
1421 resp->status = NFS4ERR_INVAL;
1422 *cs->statusp = resp->status;
1423 goto out;
1424 }
1425
1426 if (crgetuid(cr) != va.va_uid &&
1427 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428 *cs->statusp = resp->status = puterrno4(error);
1429 goto out;
1430 }
1431
1432 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433
1434 if (error) {
1435 *cs->statusp = resp->status = puterrno4(error);
1436 goto out;
1437 }
1438
1439 *cs->statusp = resp->status = NFS4_OK;
1440 resp->writeverf = Write4verf;
1441 out:
1442 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443 COMMIT4res *, resp);
1444 }
1445
1446 /*
1447 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448 * was completed. It does the nfsv4 create for special files.
1449 */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453 struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455 int error;
1456 cred_t *cr = cs->cr;
1457 vnode_t *dvp = cs->vp;
1458 vnode_t *vp = NULL;
1459 int mode;
1460 enum vcexcl excl;
1461
1462 switch (args->type) {
1463 case NF4CHR:
1464 case NF4BLK:
1465 if (secpolicy_sys_devices(cr) != 0) {
1466 *cs->statusp = resp->status = NFS4ERR_PERM;
1467 return (NULL);
1468 }
1469 if (args->type == NF4CHR)
1470 vap->va_type = VCHR;
1471 else
1472 vap->va_type = VBLK;
1473 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1474 args->ftype4_u.devdata.specdata2);
1475 vap->va_mask |= AT_RDEV;
1476 break;
1477 case NF4SOCK:
1478 vap->va_type = VSOCK;
1479 break;
1480 case NF4FIFO:
1481 vap->va_type = VFIFO;
1482 break;
1483 default:
1484 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1485 return (NULL);
1486 }
1487
1488 /*
1489 * Must specify the mode.
1490 */
1491 if (!(vap->va_mask & AT_MODE)) {
1492 *cs->statusp = resp->status = NFS4ERR_INVAL;
1493 return (NULL);
1494 }
1495
1496 excl = EXCL;
1497
1498 mode = 0;
1499
1500 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1501 if (error) {
1502 *cs->statusp = resp->status = puterrno4(error);
1503 return (NULL);
1504 }
1505 return (vp);
1506 }
1507
1508 /*
1509 * nfsv4 create is used to create non-regular files. For regular files,
1510 * use nfsv4 open.
1511 */
1512 /* ARGSUSED */
1513 static void
1514 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1515 struct compound_state *cs)
1516 {
1517 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1518 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1519 int error;
1520 struct vattr bva, iva, iva2, ava, *vap;
1521 cred_t *cr = cs->cr;
1522 vnode_t *dvp = cs->vp;
1523 vnode_t *vp = NULL;
1524 vnode_t *realvp;
1525 char *nm, *lnm;
1526 uint_t len, llen;
1527 int syncval = 0;
1528 struct nfs4_svgetit_arg sarg;
1529 struct nfs4_ntov_table ntov;
1530 struct statvfs64 sb;
1531 nfsstat4 status;
1532 struct sockaddr *ca;
1533 char *name = NULL;
1534 char *lname = NULL;
1535
1536 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1537 CREATE4args *, args);
1538
1539 resp->attrset = 0;
1540
1541 if (dvp == NULL) {
1542 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1543 goto out;
1544 }
1545
1546 /*
1547 * If there is an unshared filesystem mounted on this vnode,
1548 * do not allow to create an object in this directory.
1549 */
1550 if (vn_ismntpt(dvp)) {
1551 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1552 goto out;
1553 }
1554
1555 /* Verify that type is correct */
1556 switch (args->type) {
1557 case NF4LNK:
1558 case NF4BLK:
1559 case NF4CHR:
1560 case NF4SOCK:
1561 case NF4FIFO:
1562 case NF4DIR:
1563 break;
1564 default:
1565 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1566 goto out;
1567 };
1568
1569 if (cs->access == CS_ACCESS_DENIED) {
1570 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1571 goto out;
1572 }
1573 if (dvp->v_type != VDIR) {
1574 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1575 goto out;
1576 }
1577 status = utf8_dir_verify(&args->objname);
1578 if (status != NFS4_OK) {
1579 *cs->statusp = resp->status = status;
1580 goto out;
1581 }
1582
1583 if (rdonly4(req, cs)) {
1584 *cs->statusp = resp->status = NFS4ERR_ROFS;
1585 goto out;
1586 }
1587
1588 /*
1589 * Name of newly created object
1590 */
1591 nm = utf8_to_fn(&args->objname, &len, NULL);
1592 if (nm == NULL) {
1593 *cs->statusp = resp->status = NFS4ERR_INVAL;
1594 goto out;
1595 }
1596
1597 if (len > MAXNAMELEN) {
1598 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1599 kmem_free(nm, len);
1600 goto out;
1601 }
1602
1603 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1604 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1605 MAXPATHLEN + 1);
1606
1607 if (name == NULL) {
1608 *cs->statusp = resp->status = NFS4ERR_INVAL;
1609 kmem_free(nm, len);
1610 goto out;
1611 }
1612
1613 resp->attrset = 0;
1614
1615 sarg.sbp = &sb;
1616 sarg.is_referral = B_FALSE;
1617 nfs4_ntov_table_init(&ntov);
1618
1619 status = do_rfs4_set_attrs(&resp->attrset,
1620 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1621
1622 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1623 status = NFS4ERR_INVAL;
1624
1625 if (status != NFS4_OK) {
1626 *cs->statusp = resp->status = status;
1627 if (name != nm)
1628 kmem_free(name, MAXPATHLEN + 1);
1629 kmem_free(nm, len);
1630 nfs4_ntov_table_free(&ntov, &sarg);
1631 resp->attrset = 0;
1632 goto out;
1633 }
1634
1635 /* Get "before" change value */
1636 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1637 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1638 if (error) {
1639 *cs->statusp = resp->status = puterrno4(error);
1640 if (name != nm)
1641 kmem_free(name, MAXPATHLEN + 1);
1642 kmem_free(nm, len);
1643 nfs4_ntov_table_free(&ntov, &sarg);
1644 resp->attrset = 0;
1645 goto out;
1646 }
1647 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1648
1649 vap = sarg.vap;
1650
1651 /*
1652 * Set the default initial values for attributes when the parent
1653 * directory does not have the VSUID/VSGID bit set and they have
1654 * not been specified in createattrs.
1655 */
1656 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1657 vap->va_uid = crgetuid(cr);
1658 vap->va_mask |= AT_UID;
1659 }
1660 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1661 vap->va_gid = crgetgid(cr);
1662 vap->va_mask |= AT_GID;
1663 }
1664
1665 vap->va_mask |= AT_TYPE;
1666 switch (args->type) {
1667 case NF4DIR:
1668 vap->va_type = VDIR;
1669 if ((vap->va_mask & AT_MODE) == 0) {
1670 vap->va_mode = 0700; /* default: owner rwx only */
1671 vap->va_mask |= AT_MODE;
1672 }
1673 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1674 if (error)
1675 break;
1676
1677 /*
1678 * Get the initial "after" sequence number, if it fails,
1679 * set to zero
1680 */
1681 iva.va_mask = AT_SEQ;
1682 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1683 iva.va_seq = 0;
1684 break;
1685 case NF4LNK:
1686 vap->va_type = VLNK;
1687 if ((vap->va_mask & AT_MODE) == 0) {
1688 vap->va_mode = 0700; /* default: owner rwx only */
1689 vap->va_mask |= AT_MODE;
1690 }
1691
1692 /*
1693 * symlink names must be treated as data
1694 */
1695 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1696 &llen, NULL);
1697
1698 if (lnm == NULL) {
1699 *cs->statusp = resp->status = NFS4ERR_INVAL;
1700 if (name != nm)
1701 kmem_free(name, MAXPATHLEN + 1);
1702 kmem_free(nm, len);
1703 nfs4_ntov_table_free(&ntov, &sarg);
1704 resp->attrset = 0;
1705 goto out;
1706 }
1707
1708 if (llen > MAXPATHLEN) {
1709 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710 if (name != nm)
1711 kmem_free(name, MAXPATHLEN + 1);
1712 kmem_free(nm, len);
1713 kmem_free(lnm, llen);
1714 nfs4_ntov_table_free(&ntov, &sarg);
1715 resp->attrset = 0;
1716 goto out;
1717 }
1718
1719 lname = nfscmd_convname(ca, cs->exi, lnm,
1720 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1721
1722 if (lname == NULL) {
1723 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724 if (name != nm)
1725 kmem_free(name, MAXPATHLEN + 1);
1726 kmem_free(nm, len);
1727 kmem_free(lnm, llen);
1728 nfs4_ntov_table_free(&ntov, &sarg);
1729 resp->attrset = 0;
1730 goto out;
1731 }
1732
1733 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734 if (lname != lnm)
1735 kmem_free(lname, MAXPATHLEN + 1);
1736 kmem_free(lnm, llen);
1737 if (error)
1738 break;
1739
1740 /*
1741 * Get the initial "after" sequence number, if it fails,
1742 * set to zero
1743 */
1744 iva.va_mask = AT_SEQ;
1745 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746 iva.va_seq = 0;
1747
1748 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749 NULL, NULL, NULL);
1750 if (error)
1751 break;
1752
1753 /*
1754 * va_seq is not safe over VOP calls, check it again
1755 * if it has changed zero out iva to force atomic = FALSE.
1756 */
1757 iva2.va_mask = AT_SEQ;
1758 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759 iva2.va_seq != iva.va_seq)
1760 iva.va_seq = 0;
1761 break;
1762 default:
1763 /*
1764 * probably a special file.
1765 */
1766 if ((vap->va_mask & AT_MODE) == 0) {
1767 vap->va_mode = 0600; /* default: owner rw only */
1768 vap->va_mask |= AT_MODE;
1769 }
1770 syncval = FNODSYNC;
1771 /*
1772 * We know this will only generate one VOP call
1773 */
1774 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775
1776 if (vp == NULL) {
1777 if (name != nm)
1778 kmem_free(name, MAXPATHLEN + 1);
1779 kmem_free(nm, len);
1780 nfs4_ntov_table_free(&ntov, &sarg);
1781 resp->attrset = 0;
1782 goto out;
1783 }
1784
1785 /*
1786 * Get the initial "after" sequence number, if it fails,
1787 * set to zero
1788 */
1789 iva.va_mask = AT_SEQ;
1790 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791 iva.va_seq = 0;
1792
1793 break;
1794 }
1795 if (name != nm)
1796 kmem_free(name, MAXPATHLEN + 1);
1797 kmem_free(nm, len);
1798
1799 if (error) {
1800 *cs->statusp = resp->status = puterrno4(error);
1801 }
1802
1803 /*
1804 * Force modified data and metadata out to stable storage.
1805 */
1806 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807
1808 if (resp->status != NFS4_OK) {
1809 if (vp != NULL)
1810 VN_RELE(vp);
1811 nfs4_ntov_table_free(&ntov, &sarg);
1812 resp->attrset = 0;
1813 goto out;
1814 }
1815
1816 /*
1817 * Finish setup of cinfo response, "before" value already set.
1818 * Get "after" change value, if it fails, simply return the
1819 * before value.
1820 */
1821 ava.va_mask = AT_CTIME|AT_SEQ;
1822 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823 ava.va_ctime = bva.va_ctime;
1824 ava.va_seq = 0;
1825 }
1826 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827
1828 /*
1829 * True verification that object was created with correct
1830 * attrs is impossible. The attrs could have been changed
1831 * immediately after object creation. If attributes did
1832 * not verify, the only recourse for the server is to
1833 * destroy the object. Maybe if some attrs (like gid)
1834 * are set incorrectly, the object should be destroyed;
1835 * however, seems bad as a default policy. Do we really
1836 * want to destroy an object over one of the times not
1837 * verifying correctly? For these reasons, the server
1838 * currently sets bits in attrset for createattrs
1839 * that were set; however, no verification is done.
1840 *
1841 * vmask_to_nmask accounts for vattr bits set on create
1842 * [do_rfs4_set_attrs() only sets resp bits for
1843 * non-vattr/vfs bits.]
1844 * Mask off any bits set by default so as not to return
1845 * more attrset bits than were requested in createattrs
1846 */
1847 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848 resp->attrset &= args->createattrs.attrmask;
1849 nfs4_ntov_table_free(&ntov, &sarg);
1850
1851 error = makefh4(&cs->fh, vp, cs->exi);
1852 if (error) {
1853 *cs->statusp = resp->status = puterrno4(error);
1854 }
1855
1856 /*
1857 * The cinfo.atomic = TRUE only if we got no errors, we have
1858 * non-zero va_seq's, and it has incremented by exactly one
1859 * during the creation and it didn't change during the VOP_LOOKUP
1860 * or VOP_FSYNC.
1861 */
1862 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864 resp->cinfo.atomic = TRUE;
1865 else
1866 resp->cinfo.atomic = FALSE;
1867
1868 /*
1869 * Force modified metadata out to stable storage.
1870 *
1871 * if a underlying vp exists, pass it to VOP_FSYNC
1872 */
1873 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875 else
1876 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877
1878 if (resp->status != NFS4_OK) {
1879 VN_RELE(vp);
1880 goto out;
1881 }
1882 if (cs->vp)
1883 VN_RELE(cs->vp);
1884
1885 cs->vp = vp;
1886 *cs->statusp = resp->status = NFS4_OK;
1887 out:
1888 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889 CREATE4res *, resp);
1890 }
1891
1892 /*ARGSUSED*/
1893 static void
1894 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895 struct compound_state *cs)
1896 {
1897 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899
1900 rfs4_op_inval(argop, resop, req, cs);
1901
1902 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 }
1905
1906 /*ARGSUSED*/
1907 static void
1908 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909 struct compound_state *cs)
1910 {
1911 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913 rfs4_deleg_state_t *dsp;
1914 nfsstat4 status;
1915
1916 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917 DELEGRETURN4args *, args);
1918
1919 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920 resp->status = *cs->statusp = status;
1921 if (status != NFS4_OK)
1922 goto out;
1923
1924 /* Ensure specified filehandle matches */
1925 if (cs->vp != dsp->rds_finfo->rf_vp) {
1926 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927 } else
1928 rfs4_return_deleg(dsp, FALSE);
1929
1930 rfs4_update_lease(dsp->rds_client);
1931
1932 rfs4_deleg_state_rele(dsp);
1933 out:
1934 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935 DELEGRETURN4res *, resp);
1936 }
1937
1938 /*
1939 * Check to see if a given "flavor" is an explicitly shared flavor.
1940 * The assumption of this routine is the "flavor" is already a valid
1941 * flavor in the secinfo list of "exi".
1942 *
1943 * e.g.
1944 * # share -o sec=flavor1 /export
1945 * # share -o sec=flavor2 /export/home
1946 *
1947 * flavor2 is not an explicitly shared flavor for /export,
1948 * however it is in the secinfo list for /export thru the
1949 * server namespace setup.
1950 */
1951 int
1952 is_exported_sec(int flavor, struct exportinfo *exi)
1953 {
1954 int i;
1955 struct secinfo *sp;
1956
1957 sp = exi->exi_export.ex_secinfo;
1958 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961 return (SEC_REF_EXPORTED(&sp[i]));
1962 }
1963 }
1964
1965 /* Should not reach this point based on the assumption */
1966 return (0);
1967 }
1968
1969 /*
1970 * Check if the security flavor used in the request matches what is
1971 * required at the export point or at the root pseudo node (exi_root).
1972 *
1973 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974 *
1975 */
1976 static int
1977 secinfo_match_or_authnone(struct compound_state *cs)
1978 {
1979 int i;
1980 struct secinfo *sp;
1981
1982 /*
1983 * Check cs->nfsflavor (from the request) against
1984 * the current export data in cs->exi.
1985 */
1986 sp = cs->exi->exi_export.ex_secinfo;
1987 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990 return (1);
1991 }
1992
1993 return (0);
1994 }
1995
1996 /*
1997 * Check the access authority for the client and return the correct error.
1998 */
1999 nfsstat4
2000 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 {
2002 int authres;
2003
2004 /*
2005 * First, check if the security flavor used in the request
2006 * are among the flavors set in the server namespace.
2007 */
2008 if (!secinfo_match_or_authnone(cs)) {
2009 *cs->statusp = NFS4ERR_WRONGSEC;
2010 return (*cs->statusp);
2011 }
2012
2013 authres = checkauth4(cs, req);
2014
2015 if (authres > 0) {
2016 *cs->statusp = NFS4_OK;
2017 if (! (cs->access & CS_ACCESS_LIMITED))
2018 cs->access = CS_ACCESS_OK;
2019 } else if (authres == 0) {
2020 *cs->statusp = NFS4ERR_ACCESS;
2021 } else if (authres == -2) {
2022 *cs->statusp = NFS4ERR_WRONGSEC;
2023 } else {
2024 *cs->statusp = NFS4ERR_DELAY;
2025 }
2026 return (*cs->statusp);
2027 }
2028
2029 /*
2030 * bitmap4_to_attrmask is called by getattr and readdir.
2031 * It sets up the vattr mask and determines whether vfsstat call is needed
2032 * based on the input bitmap.
2033 * Returns nfsv4 status.
2034 */
2035 static nfsstat4
2036 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 {
2038 int i;
2039 uint_t va_mask;
2040 struct statvfs64 *sbp = sargp->sbp;
2041
2042 sargp->sbp = NULL;
2043 sargp->flag = 0;
2044 sargp->rdattr_error = NFS4_OK;
2045 sargp->mntdfid_set = FALSE;
2046 if (sargp->cs->vp)
2047 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048 FH4_ATTRDIR | FH4_NAMEDATTR);
2049 else
2050 sargp->xattr = 0;
2051
2052 /*
2053 * Set rdattr_error_req to true if return error per
2054 * failed entry rather than fail the readdir.
2055 */
2056 if (breq & FATTR4_RDATTR_ERROR_MASK)
2057 sargp->rdattr_error_req = 1;
2058 else
2059 sargp->rdattr_error_req = 0;
2060
2061 /*
2062 * generate the va_mask
2063 * Handle the easy cases first
2064 */
2065 switch (breq) {
2066 case NFS4_NTOV_ATTR_MASK:
2067 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068 return (NFS4_OK);
2069
2070 case NFS4_FS_ATTR_MASK:
2071 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072 sargp->sbp = sbp;
2073 return (NFS4_OK);
2074
2075 case NFS4_NTOV_ATTR_CACHE_MASK:
2076 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077 return (NFS4_OK);
2078
2079 case FATTR4_LEASE_TIME_MASK:
2080 sargp->vap->va_mask = 0;
2081 return (NFS4_OK);
2082
2083 default:
2084 va_mask = 0;
2085 for (i = 0; i < nfs4_ntov_map_size; i++) {
2086 if ((breq & nfs4_ntov_map[i].fbit) &&
2087 nfs4_ntov_map[i].vbit)
2088 va_mask |= nfs4_ntov_map[i].vbit;
2089 }
2090
2091 /*
2092 * Check is vfsstat is needed
2093 */
2094 if (breq & NFS4_FS_ATTR_MASK)
2095 sargp->sbp = sbp;
2096
2097 sargp->vap->va_mask = va_mask;
2098 return (NFS4_OK);
2099 }
2100 /* NOTREACHED */
2101 }
2102
2103 /*
2104 * bitmap4_get_sysattrs is called by getattr and readdir.
2105 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106 * Returns nfsv4 status.
2107 */
2108 static nfsstat4
2109 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 {
2111 int error;
2112 struct compound_state *cs = sargp->cs;
2113 vnode_t *vp = cs->vp;
2114
2115 if (sargp->sbp != NULL) {
2116 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117 sargp->sbp = NULL; /* to identify error */
2118 return (puterrno4(error));
2119 }
2120 }
2121
2122 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 }
2124
2125 static void
2126 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 {
2128 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129 KM_SLEEP);
2130 ntovp->attrcnt = 0;
2131 ntovp->vfsstat = FALSE;
2132 }
2133
2134 static void
2135 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136 struct nfs4_svgetit_arg *sargp)
2137 {
2138 int i;
2139 union nfs4_attr_u *na;
2140 uint8_t *amap;
2141
2142 /*
2143 * XXX Should do the same checks for whether the bit is set
2144 */
2145 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146 i < ntovp->attrcnt; i++, na++, amap++) {
2147 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148 NFS4ATTR_FREEIT, sargp, na);
2149 }
2150 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151 /*
2152 * xdr_free for getattr will be done later
2153 */
2154 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155 i < ntovp->attrcnt; i++, na++, amap++) {
2156 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157 }
2158 }
2159 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 }
2161
2162 /*
2163 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164 */
2165 static nfsstat4
2166 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167 struct nfs4_svgetit_arg *sargp)
2168 {
2169 int error = 0;
2170 int i, k;
2171 struct nfs4_ntov_table ntov;
2172 XDR xdr;
2173 ulong_t xdr_size;
2174 char *xdr_attrs;
2175 nfsstat4 status = NFS4_OK;
2176 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177 union nfs4_attr_u *na;
2178 uint8_t *amap;
2179
2180 sargp->op = NFS4ATTR_GETIT;
2181 sargp->flag = 0;
2182
2183 fattrp->attrmask = 0;
2184 /* if no bits requested, then return empty fattr4 */
2185 if (breq == 0) {
2186 fattrp->attrlist4_len = 0;
2187 fattrp->attrlist4 = NULL;
2188 return (NFS4_OK);
2189 }
2190
2191 /*
2192 * return NFS4ERR_INVAL when client requests write-only attrs
2193 */
2194 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195 return (NFS4ERR_INVAL);
2196
2197 nfs4_ntov_table_init(&ntov);
2198 na = ntov.na;
2199 amap = ntov.amap;
2200
2201 /*
2202 * Now loop to get or verify the attrs
2203 */
2204 for (i = 0; i < nfs4_ntov_map_size; i++) {
2205 if (breq & nfs4_ntov_map[i].fbit) {
2206 if ((*nfs4_ntov_map[i].sv_getit)(
2207 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208
2209 error = (*nfs4_ntov_map[i].sv_getit)(
2210 NFS4ATTR_GETIT, sargp, na);
2211
2212 /*
2213 * Possible error values:
2214 * >0 if sv_getit failed to
2215 * get the attr; 0 if succeeded;
2216 * <0 if rdattr_error and the
2217 * attribute cannot be returned.
2218 */
2219 if (error && !(sargp->rdattr_error_req))
2220 goto done;
2221 /*
2222 * If error then just for entry
2223 */
2224 if (error == 0) {
2225 fattrp->attrmask |=
2226 nfs4_ntov_map[i].fbit;
2227 *amap++ =
2228 (uint8_t)nfs4_ntov_map[i].nval;
2229 na++;
2230 (ntov.attrcnt)++;
2231 } else if ((error > 0) &&
2232 (sargp->rdattr_error == NFS4_OK)) {
2233 sargp->rdattr_error = puterrno4(error);
2234 }
2235 error = 0;
2236 }
2237 }
2238 }
2239
2240 /*
2241 * If rdattr_error was set after the return value for it was assigned,
2242 * update it.
2243 */
2244 if (prev_rdattr_error != sargp->rdattr_error) {
2245 na = ntov.na;
2246 amap = ntov.amap;
2247 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248 k = *amap;
2249 if (k < FATTR4_RDATTR_ERROR) {
2250 continue;
2251 }
2252 if ((k == FATTR4_RDATTR_ERROR) &&
2253 ((*nfs4_ntov_map[k].sv_getit)(
2254 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255
2256 (void) (*nfs4_ntov_map[k].sv_getit)(
2257 NFS4ATTR_GETIT, sargp, na);
2258 }
2259 break;
2260 }
2261 }
2262
2263 xdr_size = 0;
2264 na = ntov.na;
2265 amap = ntov.amap;
2266 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268 }
2269
2270 fattrp->attrlist4_len = xdr_size;
2271 if (xdr_size) {
2272 /* freed by rfs4_op_getattr_free() */
2273 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274
2275 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276
2277 na = ntov.na;
2278 amap = ntov.amap;
2279 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282 int, *amap);
2283 status = NFS4ERR_SERVERFAULT;
2284 break;
2285 }
2286 }
2287 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2288 } else {
2289 fattrp->attrlist4 = NULL;
2290 }
2291 done:
2292
2293 nfs4_ntov_table_free(&ntov, sargp);
2294
2295 if (error != 0)
2296 status = puterrno4(error);
2297
2298 return (status);
2299 }
2300
2301 /* ARGSUSED */
2302 static void
2303 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304 struct compound_state *cs)
2305 {
2306 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308 struct nfs4_svgetit_arg sarg;
2309 struct statvfs64 sb;
2310 nfsstat4 status;
2311
2312 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313 GETATTR4args *, args);
2314
2315 if (cs->vp == NULL) {
2316 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317 goto out;
2318 }
2319
2320 if (cs->access == CS_ACCESS_DENIED) {
2321 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322 goto out;
2323 }
2324
2325 sarg.sbp = &sb;
2326 sarg.cs = cs;
2327 sarg.is_referral = B_FALSE;
2328
2329 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330 if (status == NFS4_OK) {
2331
2332 status = bitmap4_get_sysattrs(&sarg);
2333 if (status == NFS4_OK) {
2334
2335 /* Is this a referral? */
2336 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337 /* Older V4 Solaris client sees a link */
2338 if (client_is_downrev(req))
2339 sarg.vap->va_type = VLNK;
2340 else
2341 sarg.is_referral = B_TRUE;
2342 }
2343
2344 status = do_rfs4_op_getattr(args->attr_request,
2345 &resp->obj_attributes, &sarg);
2346 }
2347 }
2348 *cs->statusp = resp->status = status;
2349 out:
2350 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351 GETATTR4res *, resp);
2352 }
2353
2354 static void
2355 rfs4_op_getattr_free(nfs_resop4 *resop)
2356 {
2357 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358
2359 nfs4_fattr4_free(&resp->obj_attributes);
2360 }
2361
2362 /* ARGSUSED */
2363 static void
2364 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365 struct compound_state *cs)
2366 {
2367 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368
2369 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370
2371 if (cs->vp == NULL) {
2372 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373 goto out;
2374 }
2375 if (cs->access == CS_ACCESS_DENIED) {
2376 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377 goto out;
2378 }
2379
2380 /* check for reparse point at the share point */
2381 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382 /* it's all bad */
2383 cs->exi->exi_moved = 1;
2384 *cs->statusp = resp->status = NFS4ERR_MOVED;
2385 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387 return;
2388 }
2389
2390 /* check for reparse point at vp */
2391 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392 /* it's not all bad */
2393 *cs->statusp = resp->status = NFS4ERR_MOVED;
2394 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396 return;
2397 }
2398
2399 resp->object.nfs_fh4_val =
2400 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401 nfs_fh4_copy(&cs->fh, &resp->object);
2402 *cs->statusp = resp->status = NFS4_OK;
2403 out:
2404 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405 GETFH4res *, resp);
2406 }
2407
2408 static void
2409 rfs4_op_getfh_free(nfs_resop4 *resop)
2410 {
2411 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412
2413 if (resp->status == NFS4_OK &&
2414 resp->object.nfs_fh4_val != NULL) {
2415 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416 resp->object.nfs_fh4_val = NULL;
2417 resp->object.nfs_fh4_len = 0;
2418 }
2419 }
2420
2421 /*
2422 * illegal: args: void
2423 * res : status (NFS4ERR_OP_ILLEGAL)
2424 */
2425 /* ARGSUSED */
2426 static void
2427 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428 struct svc_req *req, struct compound_state *cs)
2429 {
2430 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431
2432 resop->resop = OP_ILLEGAL;
2433 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 }
2435
2436 /*
2437 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438 * res: status. If success - CURRENT_FH unchanged, return change_info
2439 */
2440 /* ARGSUSED */
2441 static void
2442 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443 struct compound_state *cs)
2444 {
2445 LINK4args *args = &argop->nfs_argop4_u.oplink;
2446 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447 int error;
2448 vnode_t *vp;
2449 vnode_t *dvp;
2450 struct vattr bdva, idva, adva;
2451 char *nm;
2452 uint_t len;
2453 struct sockaddr *ca;
2454 char *name = NULL;
2455 nfsstat4 status;
2456
2457 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458 LINK4args *, args);
2459
2460 /* SAVED_FH: source object */
2461 vp = cs->saved_vp;
2462 if (vp == NULL) {
2463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464 goto out;
2465 }
2466
2467 /* CURRENT_FH: target directory */
2468 dvp = cs->vp;
2469 if (dvp == NULL) {
2470 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471 goto out;
2472 }
2473
2474 /*
2475 * If there is a non-shared filesystem mounted on this vnode,
2476 * do not allow to link any file in this directory.
2477 */
2478 if (vn_ismntpt(dvp)) {
2479 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480 goto out;
2481 }
2482
2483 if (cs->access == CS_ACCESS_DENIED) {
2484 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485 goto out;
2486 }
2487
2488 /* Check source object's type validity */
2489 if (vp->v_type == VDIR) {
2490 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491 goto out;
2492 }
2493
2494 /* Check target directory's type */
2495 if (dvp->v_type != VDIR) {
2496 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497 goto out;
2498 }
2499
2500 if (cs->saved_exi != cs->exi) {
2501 *cs->statusp = resp->status = NFS4ERR_XDEV;
2502 goto out;
2503 }
2504
2505 status = utf8_dir_verify(&args->newname);
2506 if (status != NFS4_OK) {
2507 *cs->statusp = resp->status = status;
2508 goto out;
2509 }
2510
2511 nm = utf8_to_fn(&args->newname, &len, NULL);
2512 if (nm == NULL) {
2513 *cs->statusp = resp->status = NFS4ERR_INVAL;
2514 goto out;
2515 }
2516
2517 if (len > MAXNAMELEN) {
2518 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519 kmem_free(nm, len);
2520 goto out;
2521 }
2522
2523 if (rdonly4(req, cs)) {
2524 *cs->statusp = resp->status = NFS4ERR_ROFS;
2525 kmem_free(nm, len);
2526 goto out;
2527 }
2528
2529 /* Get "before" change value */
2530 bdva.va_mask = AT_CTIME|AT_SEQ;
2531 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532 if (error) {
2533 *cs->statusp = resp->status = puterrno4(error);
2534 kmem_free(nm, len);
2535 goto out;
2536 }
2537
2538 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540 MAXPATHLEN + 1);
2541
2542 if (name == NULL) {
2543 *cs->statusp = resp->status = NFS4ERR_INVAL;
2544 kmem_free(nm, len);
2545 goto out;
2546 }
2547
2548 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549
2550 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551
2552 if (nm != name)
2553 kmem_free(name, MAXPATHLEN + 1);
2554 kmem_free(nm, len);
2555
2556 /*
2557 * Get the initial "after" sequence number, if it fails, set to zero
2558 */
2559 idva.va_mask = AT_SEQ;
2560 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561 idva.va_seq = 0;
2562
2563 /*
2564 * Force modified data and metadata out to stable storage.
2565 */
2566 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568
2569 if (error) {
2570 *cs->statusp = resp->status = puterrno4(error);
2571 goto out;
2572 }
2573
2574 /*
2575 * Get "after" change value, if it fails, simply return the
2576 * before value.
2577 */
2578 adva.va_mask = AT_CTIME|AT_SEQ;
2579 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580 adva.va_ctime = bdva.va_ctime;
2581 adva.va_seq = 0;
2582 }
2583
2584 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585
2586 /*
2587 * The cinfo.atomic = TRUE only if we have
2588 * non-zero va_seq's, and it has incremented by exactly one
2589 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590 */
2591 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593 resp->cinfo.atomic = TRUE;
2594 else
2595 resp->cinfo.atomic = FALSE;
2596
2597 *cs->statusp = resp->status = NFS4_OK;
2598 out:
2599 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600 LINK4res *, resp);
2601 }
2602
2603 /*
2604 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605 */
2606
2607 /* ARGSUSED */
2608 static nfsstat4
2609 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 {
2611 int error;
2612 int different_export = 0;
2613 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2614 struct exportinfo *exi = NULL, *pre_exi = NULL;
2615 nfsstat4 stat;
2616 fid_t fid;
2617 int attrdir, dotdot, walk;
2618 bool_t is_newvp = FALSE;
2619
2620 if (cs->vp->v_flag & V_XATTRDIR) {
2621 attrdir = 1;
2622 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623 } else {
2624 attrdir = 0;
2625 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626 }
2627
2628 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629
2630 /*
2631 * If dotdotting, then need to check whether it's
2632 * above the root of a filesystem, or above an
2633 * export point.
2634 */
2635 if (dotdot) {
2636
2637 /*
2638 * If dotdotting at the root of a filesystem, then
2639 * need to traverse back to the mounted-on filesystem
2640 * and do the dotdot lookup there.
2641 */
2642 if (cs->vp->v_flag & VROOT) {
2643
2644 /*
2645 * If at the system root, then can
2646 * go up no further.
2647 */
2648 if (VN_CMP(cs->vp, rootdir))
2649 return (puterrno4(ENOENT));
2650
2651 /*
2652 * Traverse back to the mounted-on filesystem
2653 */
2654 cs->vp = untraverse(cs->vp);
2655
2656 /*
2657 * Set the different_export flag so we remember
2658 * to pick up a new exportinfo entry for
2659 * this new filesystem.
2660 */
2661 different_export = 1;
2662 } else {
2663
2664 /*
2665 * If dotdotting above an export point then set
2666 * the different_export to get new export info.
2667 */
2668 different_export = nfs_exported(cs->exi, cs->vp);
2669 }
2670 }
2671
2672 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673 NULL, NULL, NULL);
2674 if (error)
2675 return (puterrno4(error));
2676
2677 /*
2678 * If the vnode is in a pseudo filesystem, check whether it is visible.
2679 *
2680 * XXX if the vnode is a symlink and it is not visible in
2681 * a pseudo filesystem, return ENOENT (not following symlink).
2682 * V4 client can not mount such symlink. This is a regression
2683 * from V2/V3.
2684 *
2685 * In the same exported filesystem, if the security flavor used
2686 * is not an explicitly shared flavor, limit the view to the visible
2687 * list entries only. This is not a WRONGSEC case because it's already
2688 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689 */
2690 if (!different_export &&
2691 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692 cs->access & CS_ACCESS_LIMITED)) {
2693 if (! nfs_visible(cs->exi, vp, &different_export)) {
2694 VN_RELE(vp);
2695 return (puterrno4(ENOENT));
2696 }
2697 }
2698
2699 /*
2700 * If it's a mountpoint, then traverse it.
2701 */
2702 if (vn_ismntpt(vp)) {
2703 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2704 pre_tvp = vp; /* save pre-traversed vnode */
2705
2706 /*
2707 * hold pre_tvp to counteract rele by traverse. We will
2708 * need pre_tvp below if checkexport4 fails
2709 */
2710 VN_HOLD(pre_tvp);
2711 if ((error = traverse(&vp)) != 0) {
2712 VN_RELE(vp);
2713 VN_RELE(pre_tvp);
2714 return (puterrno4(error));
2715 }
2716 different_export = 1;
2717 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2718 /*
2719 * The vfsp comparison is to handle the case where
2720 * a LOFS mount is shared. lo_lookup traverses mount points,
2721 * and NFS is unaware of local fs transistions because
2722 * v_vfsmountedhere isn't set. For this special LOFS case,
2723 * the dir and the obj returned by lookup will have different
2724 * vfs ptrs.
2725 */
2726 different_export = 1;
2727 }
2728
2729 if (different_export) {
2730
2731 bzero(&fid, sizeof (fid));
2732 fid.fid_len = MAXFIDSZ;
2733 error = vop_fid_pseudo(vp, &fid);
2734 if (error) {
2735 VN_RELE(vp);
2736 if (pre_tvp)
2737 VN_RELE(pre_tvp);
2738 return (puterrno4(error));
2739 }
2740
2741 if (dotdot)
2742 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2743 else
2744 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2745
2746 if (exi == NULL) {
2747 if (pre_tvp) {
2748 /*
2749 * If this vnode is a mounted-on vnode,
2750 * but the mounted-on file system is not
2751 * exported, send back the filehandle for
2752 * the mounted-on vnode, not the root of
2753 * the mounted-on file system.
2754 */
2755 VN_RELE(vp);
2756 vp = pre_tvp;
2757 exi = pre_exi;
2758 } else {
2759 VN_RELE(vp);
2760 return (puterrno4(EACCES));
2761 }
2762 } else if (pre_tvp) {
2763 /* we're done with pre_tvp now. release extra hold */
2764 VN_RELE(pre_tvp);
2765 }
2766
2767 cs->exi = exi;
2768
2769 /*
2770 * Now we do a checkauth4. The reason is that
2771 * this client/user may not have access to the new
2772 * exported file system, and if they do,
2773 * the client/user may be mapped to a different uid.
2774 *
2775 * We start with a new cr, because the checkauth4 done
2776 * in the PUT*FH operation over wrote the cred's uid,
2777 * gid, etc, and we want the real thing before calling
2778 * checkauth4()
2779 */
2780 crfree(cs->cr);
2781 cs->cr = crdup(cs->basecr);
2782
2783 oldvp = cs->vp;
2784 cs->vp = vp;
2785 is_newvp = TRUE;
2786
2787 stat = call_checkauth4(cs, req);
2788 if (stat != NFS4_OK) {
2789 VN_RELE(cs->vp);
2790 cs->vp = oldvp;
2791 return (stat);
2792 }
2793 }
2794
2795 /*
2796 * After various NFS checks, do a label check on the path
2797 * component. The label on this path should either be the
2798 * global zone's label or a zone's label. We are only
2799 * interested in the zone's label because exported files
2800 * in global zone is accessible (though read-only) to
2801 * clients. The exportability/visibility check is already
2802 * done before reaching this code.
2803 */
2804 if (is_system_labeled()) {
2805 bslabel_t *clabel;
2806
2807 ASSERT(req->rq_label != NULL);
2808 clabel = req->rq_label;
2809 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2810 "got client label from request(1)", struct svc_req *, req);
2811
2812 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2813 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2814 cs->exi)) {
2815 error = EACCES;
2816 goto err_out;
2817 }
2818 } else {
2819 /*
2820 * We grant access to admin_low label clients
2821 * only if the client is trusted, i.e. also
2822 * running Solaris Trusted Extension.
2823 */
2824 struct sockaddr *ca;
2825 int addr_type;
2826 void *ipaddr;
2827 tsol_tpc_t *tp;
2828
2829 ca = (struct sockaddr *)svc_getrpccaller(
2830 req->rq_xprt)->buf;
2831 if (ca->sa_family == AF_INET) {
2832 addr_type = IPV4_VERSION;
2833 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2834 } else if (ca->sa_family == AF_INET6) {
2835 addr_type = IPV6_VERSION;
2836 ipaddr = &((struct sockaddr_in6 *)
2837 ca)->sin6_addr;
2838 }
2839 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2840 if (tp == NULL || tp->tpc_tp.tp_doi !=
2841 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2842 SUN_CIPSO) {
2843 if (tp != NULL)
2844 TPC_RELE(tp);
2845 error = EACCES;
2846 goto err_out;
2847 }
2848 TPC_RELE(tp);
2849 }
2850 }
2851
2852 error = makefh4(&cs->fh, vp, cs->exi);
2853
2854 err_out:
2855 if (error) {
2856 if (is_newvp) {
2857 VN_RELE(cs->vp);
2858 cs->vp = oldvp;
2859 } else
2860 VN_RELE(vp);
2861 return (puterrno4(error));
2862 }
2863
2864 if (!is_newvp) {
2865 if (cs->vp)
2866 VN_RELE(cs->vp);
2867 cs->vp = vp;
2868 } else if (oldvp)
2869 VN_RELE(oldvp);
2870
2871 /*
2872 * if did lookup on attrdir and didn't lookup .., set named
2873 * attr fh flag
2874 */
2875 if (attrdir && ! dotdot)
2876 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2877
2878 /* Assume false for now, open proc will set this */
2879 cs->mandlock = FALSE;
2880
2881 return (NFS4_OK);
2882 }
2883
2884 /* ARGSUSED */
2885 static void
2886 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2887 struct compound_state *cs)
2888 {
2889 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2890 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2891 char *nm;
2892 uint_t len;
2893 struct sockaddr *ca;
2894 char *name = NULL;
2895 nfsstat4 status;
2896
2897 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2898 LOOKUP4args *, args);
2899
2900 if (cs->vp == NULL) {
2901 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2902 goto out;
2903 }
2904
2905 if (cs->vp->v_type == VLNK) {
2906 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2907 goto out;
2908 }
2909
2910 if (cs->vp->v_type != VDIR) {
2911 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2912 goto out;
2913 }
2914
2915 status = utf8_dir_verify(&args->objname);
2916 if (status != NFS4_OK) {
2917 *cs->statusp = resp->status = status;
2918 goto out;
2919 }
2920
2921 nm = utf8_to_str(&args->objname, &len, NULL);
2922 if (nm == NULL) {
2923 *cs->statusp = resp->status = NFS4ERR_INVAL;
2924 goto out;
2925 }
2926
2927 if (len > MAXNAMELEN) {
2928 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2929 kmem_free(nm, len);
2930 goto out;
2931 }
2932
2933 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2934 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2935 MAXPATHLEN + 1);
2936
2937 if (name == NULL) {
2938 *cs->statusp = resp->status = NFS4ERR_INVAL;
2939 kmem_free(nm, len);
2940 goto out;
2941 }
2942
2943 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2944
2945 if (name != nm)
2946 kmem_free(name, MAXPATHLEN + 1);
2947 kmem_free(nm, len);
2948
2949 out:
2950 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2951 LOOKUP4res *, resp);
2952 }
2953
2954 /* ARGSUSED */
2955 static void
2956 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2957 struct compound_state *cs)
2958 {
2959 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2960
2961 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2962
2963 if (cs->vp == NULL) {
2964 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2965 goto out;
2966 }
2967
2968 if (cs->vp->v_type != VDIR) {
2969 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2970 goto out;
2971 }
2972
2973 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2974
2975 /*
2976 * From NFSV4 Specification, LOOKUPP should not check for
2977 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2978 */
2979 if (resp->status == NFS4ERR_WRONGSEC) {
2980 *cs->statusp = resp->status = NFS4_OK;
2981 }
2982
2983 out:
2984 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2985 LOOKUPP4res *, resp);
2986 }
2987
2988
2989 /*ARGSUSED2*/
2990 static void
2991 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2992 struct compound_state *cs)
2993 {
2994 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
2995 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
2996 vnode_t *avp = NULL;
2997 int lookup_flags = LOOKUP_XATTR, error;
2998 int exp_ro = 0;
2999
3000 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3001 OPENATTR4args *, args);
3002
3003 if (cs->vp == NULL) {
3004 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3005 goto out;
3006 }
3007
3008 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3009 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3010 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3011 goto out;
3012 }
3013
3014 /*
3015 * If file system supports passing ACE mask to VOP_ACCESS then
3016 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3017 */
3018
3019 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3020 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3021 V_ACE_MASK, cs->cr, NULL);
3022 else
3023 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3024 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3025 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3026
3027 if (error) {
3028 *cs->statusp = resp->status = puterrno4(EACCES);
3029 goto out;
3030 }
3031
3032 /*
3033 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3034 * the file system is exported read-only -- regardless of
3035 * createdir flag. Otherwise the attrdir would be created
3036 * (assuming server fs isn't mounted readonly locally). If
3037 * VOP_LOOKUP returns ENOENT in this case, the error will
3038 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3039 * because specfs has no VOP_LOOKUP op, so the macro would
3040 * return ENOSYS. EINVAL is returned by all (current)
3041 * Solaris file system implementations when any of their
3042 * restrictions are violated (xattr(dir) can't have xattrdir).
3043 * Returning NOTSUPP is more appropriate in this case
3044 * because the object will never be able to have an attrdir.
3045 */
3046 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3047 lookup_flags |= CREATE_XATTR_DIR;
3048
3049 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3050 NULL, NULL, NULL);
3051
3052 if (error) {
3053 if (error == ENOENT && args->createdir && exp_ro)
3054 *cs->statusp = resp->status = puterrno4(EROFS);
3055 else if (error == EINVAL || error == ENOSYS)
3056 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3057 else
3058 *cs->statusp = resp->status = puterrno4(error);
3059 goto out;
3060 }
3061
3062 ASSERT(avp->v_flag & V_XATTRDIR);
3063
3064 error = makefh4(&cs->fh, avp, cs->exi);
3065
3066 if (error) {
3067 VN_RELE(avp);
3068 *cs->statusp = resp->status = puterrno4(error);
3069 goto out;
3070 }
3071
3072 VN_RELE(cs->vp);
3073 cs->vp = avp;
3074
3075 /*
3076 * There is no requirement for an attrdir fh flag
3077 * because the attrdir has a vnode flag to distinguish
3078 * it from regular (non-xattr) directories. The
3079 * FH4_ATTRDIR flag is set for future sanity checks.
3080 */
3081 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3082 *cs->statusp = resp->status = NFS4_OK;
3083
3084 out:
3085 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3086 OPENATTR4res *, resp);
3087 }
3088
3089 static int
3090 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3091 caller_context_t *ct)
3092 {
3093 int error;
3094 int i;
3095 clock_t delaytime;
3096
3097 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3098
3099 /*
3100 * Don't block on mandatory locks. If this routine returns
3101 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3102 */
3103 uio->uio_fmode = FNONBLOCK;
3104
3105 for (i = 0; i < rfs4_maxlock_tries; i++) {
3106
3107
3108 if (direction == FREAD) {
3109 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3110 error = VOP_READ(vp, uio, ioflag, cred, ct);
3111 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3112 } else {
3113 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3114 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3115 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3116 }
3117
3118 if (error != EAGAIN)
3119 break;
3120
3121 if (i < rfs4_maxlock_tries - 1) {
3122 delay(delaytime);
3123 delaytime *= 2;
3124 }
3125 }
3126
3127 return (error);
3128 }
3129
3130 /* ARGSUSED */
3131 static void
3132 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3133 struct compound_state *cs)
3134 {
3135 READ4args *args = &argop->nfs_argop4_u.opread;
3136 READ4res *resp = &resop->nfs_resop4_u.opread;
3137 int error;
3138 int verror;
3139 vnode_t *vp;
3140 struct vattr va;
3141 struct iovec iov, *iovp = NULL;
3142 int iovcnt;
3143 struct uio uio;
3144 u_offset_t offset;
3145 bool_t *deleg = &cs->deleg;
3146 nfsstat4 stat;
3147 int in_crit = 0;
3148 mblk_t *mp = NULL;
3149 int alloc_err = 0;
3150 int rdma_used = 0;
3151 int loaned_buffers;
3152 caller_context_t ct;
3153 struct uio *uiop;
3154
3155 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3156 READ4args, args);
3157
3158 vp = cs->vp;
3159 if (vp == NULL) {
3160 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 goto out;
3162 }
3163 if (cs->access == CS_ACCESS_DENIED) {
3164 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3165 goto out;
3166 }
3167
3168 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3169 deleg, TRUE, &ct)) != NFS4_OK) {
3170 *cs->statusp = resp->status = stat;
3171 goto out;
3172 }
3173
3174 /*
3175 * Enter the critical region before calling VOP_RWLOCK
3176 * to avoid a deadlock with write requests.
3177 */
3178 if (nbl_need_check(vp)) {
3179 nbl_start_crit(vp, RW_READER);
3180 in_crit = 1;
3181 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3182 &ct)) {
3183 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3184 goto out;
3185 }
3186 }
3187
3188 if (args->wlist) {
3189 if (args->count > clist_len(args->wlist)) {
3190 *cs->statusp = resp->status = NFS4ERR_INVAL;
3191 goto out;
3192 }
3193 rdma_used = 1;
3194 }
3195
3196 /* use loaned buffers for TCP */
3197 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3198
3199 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3200 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3201
3202 /*
3203 * If we can't get the attributes, then we can't do the
3204 * right access checking. So, we'll fail the request.
3205 */
3206 if (verror) {
3207 *cs->statusp = resp->status = puterrno4(verror);
3208 goto out;
3209 }
3210
3211 if (vp->v_type != VREG) {
3212 *cs->statusp = resp->status =
3213 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3214 goto out;
3215 }
3216
3217 if (crgetuid(cs->cr) != va.va_uid &&
3218 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3219 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3220 *cs->statusp = resp->status = puterrno4(error);
3221 goto out;
3222 }
3223
3224 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3225 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3226 goto out;
3227 }
3228
3229 offset = args->offset;
3230 if (offset >= va.va_size) {
3231 *cs->statusp = resp->status = NFS4_OK;
3232 resp->eof = TRUE;
3233 resp->data_len = 0;
3234 resp->data_val = NULL;
3235 resp->mblk = NULL;
3236 /* RDMA */
3237 resp->wlist = args->wlist;
3238 resp->wlist_len = resp->data_len;
3239 *cs->statusp = resp->status = NFS4_OK;
3240 if (resp->wlist)
3241 clist_zero_len(resp->wlist);
3242 goto out;
3243 }
3244
3245 if (args->count == 0) {
3246 *cs->statusp = resp->status = NFS4_OK;
3247 resp->eof = FALSE;
3248 resp->data_len = 0;
3249 resp->data_val = NULL;
3250 resp->mblk = NULL;
3251 /* RDMA */
3252 resp->wlist = args->wlist;
3253 resp->wlist_len = resp->data_len;
3254 if (resp->wlist)
3255 clist_zero_len(resp->wlist);
3256 goto out;
3257 }
3258
3259 /*
3260 * Do not allocate memory more than maximum allowed
3261 * transfer size
3262 */
3263 if (args->count > rfs4_tsize(req))
3264 args->count = rfs4_tsize(req);
3265
3266 if (loaned_buffers) {
3267 uiop = (uio_t *)rfs_setup_xuio(vp);
3268 ASSERT(uiop != NULL);
3269 uiop->uio_segflg = UIO_SYSSPACE;
3270 uiop->uio_loffset = args->offset;
3271 uiop->uio_resid = args->count;
3272
3273 /* Jump to do the read if successful */
3274 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3275 /*
3276 * Need to hold the vnode until after VOP_RETZCBUF()
3277 * is called.
3278 */
3279 VN_HOLD(vp);
3280 goto doio_read;
3281 }
3282
3283 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3284 uiop->uio_loffset, int, uiop->uio_resid);
3285
3286 uiop->uio_extflg = 0;
3287
3288 /* failure to setup for zero copy */
3289 rfs_free_xuio((void *)uiop);
3290 loaned_buffers = 0;
3291 }
3292
3293 /*
3294 * If returning data via RDMA Write, then grab the chunk list. If we
3295 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3296 */
3297 if (rdma_used) {
3298 mp = NULL;
3299 (void) rdma_get_wchunk(req, &iov, args->wlist);
3300 uio.uio_iov = &iov;
3301 uio.uio_iovcnt = 1;
3302 } else {
3303 /*
3304 * mp will contain the data to be sent out in the read reply.
3305 * It will be freed after the reply has been sent.
3306 */
3307 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3308 ASSERT(mp != NULL);
3309 ASSERT(alloc_err == 0);
3310 uio.uio_iov = iovp;
3311 uio.uio_iovcnt = iovcnt;
3312 }
3313
3314 uio.uio_segflg = UIO_SYSSPACE;
3315 uio.uio_extflg = UIO_COPY_CACHED;
3316 uio.uio_loffset = args->offset;
3317 uio.uio_resid = args->count;
3318 uiop = &uio;
3319
3320 doio_read:
3321 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3322
3323 va.va_mask = AT_SIZE;
3324 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3325
3326 if (error) {
3327 if (mp)
3328 freemsg(mp);
3329 *cs->statusp = resp->status = puterrno4(error);
3330 goto out;
3331 }
3332
3333 /* make mblk using zc buffers */
3334 if (loaned_buffers) {
3335 mp = uio_to_mblk(uiop);
3336 ASSERT(mp != NULL);
3337 }
3338
3339 *cs->statusp = resp->status = NFS4_OK;
3340
3341 ASSERT(uiop->uio_resid >= 0);
3342 resp->data_len = args->count - uiop->uio_resid;
3343 if (mp) {
3344 resp->data_val = (char *)mp->b_datap->db_base;
3345 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3346 } else {
3347 resp->data_val = (caddr_t)iov.iov_base;
3348 }
3349
3350 resp->mblk = mp;
3351
3352 if (!verror && offset + resp->data_len == va.va_size)
3353 resp->eof = TRUE;
3354 else
3355 resp->eof = FALSE;
3356
3357 if (rdma_used) {
3358 if (!rdma_setup_read_data4(args, resp)) {
3359 *cs->statusp = resp->status = NFS4ERR_INVAL;
3360 }
3361 } else {
3362 resp->wlist = NULL;
3363 }
3364
3365 out:
3366 if (in_crit)
3367 nbl_end_crit(vp);
3368
3369 if (iovp != NULL)
3370 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3371
3372 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3373 READ4res *, resp);
3374 }
3375
3376 static void
3377 rfs4_op_read_free(nfs_resop4 *resop)
3378 {
3379 READ4res *resp = &resop->nfs_resop4_u.opread;
3380
3381 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3382 freemsg(resp->mblk);
3383 resp->mblk = NULL;
3384 resp->data_val = NULL;
3385 resp->data_len = 0;
3386 }
3387 }
3388
3389 static void
3390 rfs4_op_readdir_free(nfs_resop4 * resop)
3391 {
3392 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3393
3394 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395 freeb(resp->mblk);
3396 resp->mblk = NULL;
3397 resp->data_len = 0;
3398 }
3399 }
3400
3401
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405 struct compound_state *cs)
3406 {
3407 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3408 int error;
3409 vnode_t *vp;
3410 struct exportinfo *exi, *sav_exi;
3411 nfs_fh4_fmt_t *fh_fmtp;
3412
3413 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414
3415 if (cs->vp) {
3416 VN_RELE(cs->vp);
3417 cs->vp = NULL;
3418 }
3419
3420 if (cs->cr)
3421 crfree(cs->cr);
3422
3423 cs->cr = crdup(cs->basecr);
3424
3425 vp = exi_public->exi_vp;
3426 if (vp == NULL) {
3427 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428 goto out;
3429 }
3430
3431 error = makefh4(&cs->fh, vp, exi_public);
3432 if (error != 0) {
3433 *cs->statusp = resp->status = puterrno4(error);
3434 goto out;
3435 }
3436 sav_exi = cs->exi;
3437 if (exi_public == exi_root) {
3438 /*
3439 * No filesystem is actually shared public, so we default
3440 * to exi_root. In this case, we must check whether root
3441 * is exported.
3442 */
3443 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444
3445 /*
3446 * if root filesystem is exported, the exportinfo struct that we
3447 * should use is what checkexport4 returns, because root_exi is
3448 * actually a mostly empty struct.
3449 */
3450 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452 cs->exi = ((exi != NULL) ? exi : exi_public);
3453 } else {
3454 /*
3455 * it's a properly shared filesystem
3456 */
3457 cs->exi = exi_public;
3458 }
3459
3460 if (is_system_labeled()) {
3461 bslabel_t *clabel;
3462
3463 ASSERT(req->rq_label != NULL);
3464 clabel = req->rq_label;
3465 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466 "got client label from request(1)",
3467 struct svc_req *, req);
3468 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470 cs->exi)) {
3471 *cs->statusp = resp->status =
3472 NFS4ERR_SERVERFAULT;
3473 goto out;
3474 }
3475 }
3476 }
3477
3478 VN_HOLD(vp);
3479 cs->vp = vp;
3480
3481 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3482 VN_RELE(cs->vp);
3483 cs->vp = NULL;
3484 cs->exi = sav_exi;
3485 goto out;
3486 }
3487
3488 *cs->statusp = resp->status = NFS4_OK;
3489 out:
3490 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3491 PUTPUBFH4res *, resp);
3492 }
3493
3494 /*
3495 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3496 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3497 * or joe have restrictive search permissions, then we shouldn't let
3498 * the client get a file handle. This is easy to enforce. However, we
3499 * don't know what security flavor should be used until we resolve the
3500 * path name. Another complication is uid mapping. If root is
3501 * the user, then it will be mapped to the anonymous user by default,
3502 * but we won't know that till we've resolved the path name. And we won't
3503 * know what the anonymous user is.
3504 * Luckily, SECINFO is specified to take a full filename.
3505 * So what we will have to in rfs4_op_lookup is check that flavor of
3506 * the target object matches that of the request, and if root was the
3507 * caller, check for the root= and anon= options, and if necessary,
3508 * repeat the lookup using the right cred_t. But that's not done yet.
3509 */
3510 /* ARGSUSED */
3511 static void
3512 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3513 struct compound_state *cs)
3514 {
3515 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3516 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3517 nfs_fh4_fmt_t *fh_fmtp;
3518
3519 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3520 PUTFH4args *, args);
3521
3522 if (cs->vp) {
3523 VN_RELE(cs->vp);
3524 cs->vp = NULL;
3525 }
3526
3527 if (cs->cr) {
3528 crfree(cs->cr);
3529 cs->cr = NULL;
3530 }
3531
3532
3533 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3534 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3535 goto out;
3536 }
3537
3538 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3539 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3540 NULL);
3541
3542 if (cs->exi == NULL) {
3543 *cs->statusp = resp->status = NFS4ERR_STALE;
3544 goto out;
3545 }
3546
3547 cs->cr = crdup(cs->basecr);
3548
3549 ASSERT(cs->cr != NULL);
3550
3551 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3552 *cs->statusp = resp->status;
3553 goto out;
3554 }
3555
3556 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3557 VN_RELE(cs->vp);
3558 cs->vp = NULL;
3559 goto out;
3560 }
3561
3562 nfs_fh4_copy(&args->object, &cs->fh);
3563 *cs->statusp = resp->status = NFS4_OK;
3564 cs->deleg = FALSE;
3565
3566 out:
3567 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3568 PUTFH4res *, resp);
3569 }
3570
3571 /* ARGSUSED */
3572 static void
3573 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574 struct compound_state *cs)
3575 {
3576 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3577 int error;
3578 fid_t fid;
3579 struct exportinfo *exi, *sav_exi;
3580
3581 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582
3583 if (cs->vp) {
3584 VN_RELE(cs->vp);
3585 cs->vp = NULL;
3586 }
3587
3588 if (cs->cr)
3589 crfree(cs->cr);
3590
3591 cs->cr = crdup(cs->basecr);
3592
3593 /*
3594 * Using rootdir, the system root vnode,
3595 * get its fid.
3596 */
3597 bzero(&fid, sizeof (fid));
3598 fid.fid_len = MAXFIDSZ;
3599 error = vop_fid_pseudo(rootdir, &fid);
3600 if (error != 0) {
3601 *cs->statusp = resp->status = puterrno4(error);
3602 goto out;
3603 }
3604
3605 /*
3606 * Then use the root fsid & fid it to find out if it's exported
3607 *
3608 * If the server root isn't exported directly, then
3609 * it should at least be a pseudo export based on
3610 * one or more exports further down in the server's
3611 * file tree.
3612 */
3613 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615 NFS4_DEBUG(rfs4_debug,
3616 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618 goto out;
3619 }
3620
3621 /*
3622 * Now make a filehandle based on the root
3623 * export and root vnode.
3624 */
3625 error = makefh4(&cs->fh, rootdir, exi);
3626 if (error != 0) {
3627 *cs->statusp = resp->status = puterrno4(error);
3628 goto out;
3629 }
3630
3631 sav_exi = cs->exi;
3632 cs->exi = exi;
3633
3634 VN_HOLD(rootdir);
3635 cs->vp = rootdir;
3636
3637 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638 VN_RELE(rootdir);
3639 cs->vp = NULL;
3640 cs->exi = sav_exi;
3641 goto out;
3642 }
3643
3644 *cs->statusp = resp->status = NFS4_OK;
3645 cs->deleg = FALSE;
3646 out:
3647 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648 PUTROOTFH4res *, resp);
3649 }
3650
3651 /*
3652 * readlink: args: CURRENT_FH.
3653 * res: status. If success - CURRENT_FH unchanged, return linktext.
3654 */
3655
3656 /* ARGSUSED */
3657 static void
3658 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3659 struct compound_state *cs)
3660 {
3661 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3662 int error;
3663 vnode_t *vp;
3664 struct iovec iov;
3665 struct vattr va;
3666 struct uio uio;
3667 char *data;
3668 struct sockaddr *ca;
3669 char *name = NULL;
3670 int is_referral;
3671
3672 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3673
3674 /* CURRENT_FH: directory */
3675 vp = cs->vp;
3676 if (vp == NULL) {
3677 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3678 goto out;
3679 }
3680
3681 if (cs->access == CS_ACCESS_DENIED) {
3682 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3683 goto out;
3684 }
3685
3686 /* Is it a referral? */
3687 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3688
3689 is_referral = 1;
3690
3691 } else {
3692
3693 is_referral = 0;
3694
3695 if (vp->v_type == VDIR) {
3696 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3697 goto out;
3698 }
3699
3700 if (vp->v_type != VLNK) {
3701 *cs->statusp = resp->status = NFS4ERR_INVAL;
3702 goto out;
3703 }
3704
3705 }
3706
3707 va.va_mask = AT_MODE;
3708 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3709 if (error) {
3710 *cs->statusp = resp->status = puterrno4(error);
3711 goto out;
3712 }
3713
3714 if (MANDLOCK(vp, va.va_mode)) {
3715 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3716 goto out;
3717 }
3718
3719 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3720
3721 if (is_referral) {
3722 char *s;
3723 size_t strsz;
3724
3725 /* Get an artificial symlink based on a referral */
3726 s = build_symlink(vp, cs->cr, &strsz);
3727 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3728 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3729 vnode_t *, vp, char *, s);
3730 if (s == NULL)
3731 error = EINVAL;
3732 else {
3733 error = 0;
3734 (void) strlcpy(data, s, MAXPATHLEN + 1);
3735 kmem_free(s, strsz);
3736 }
3737
3738 } else {
3739
3740 iov.iov_base = data;
3741 iov.iov_len = MAXPATHLEN;
3742 uio.uio_iov = &iov;
3743 uio.uio_iovcnt = 1;
3744 uio.uio_segflg = UIO_SYSSPACE;
3745 uio.uio_extflg = UIO_COPY_CACHED;
3746 uio.uio_loffset = 0;
3747 uio.uio_resid = MAXPATHLEN;
3748
3749 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3750
3751 if (!error)
3752 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3753 }
3754
3755 if (error) {
3756 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3757 *cs->statusp = resp->status = puterrno4(error);
3758 goto out;
3759 }
3760
3761 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3762 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3763 MAXPATHLEN + 1);
3764
3765 if (name == NULL) {
3766 /*
3767 * Even though the conversion failed, we return
3768 * something. We just don't translate it.
3769 */
3770 name = data;
3771 }
3772
3773 /*
3774 * treat link name as data
3775 */
3776 (void) str_to_utf8(name, (utf8string *)&resp->link);
3777
3778 if (name != data)
3779 kmem_free(name, MAXPATHLEN + 1);
3780 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3781 *cs->statusp = resp->status = NFS4_OK;
3782
3783 out:
3784 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3785 READLINK4res *, resp);
3786 }
3787
3788 static void
3789 rfs4_op_readlink_free(nfs_resop4 *resop)
3790 {
3791 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3792 utf8string *symlink = (utf8string *)&resp->link;
3793
3794 if (symlink->utf8string_val) {
3795 UTF8STRING_FREE(*symlink)
3796 }
3797 }
3798
3799 /*
3800 * release_lockowner:
3801 * Release any state associated with the supplied
3802 * lockowner. Note if any lo_state is holding locks we will not
3803 * rele that lo_state and thus the lockowner will not be destroyed.
3804 * A client using lock after the lock owner stateid has been released
3805 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3806 * to reissue the lock with new_lock_owner set to TRUE.
3807 * args: lock_owner
3808 * res: status
3809 */
3810 /* ARGSUSED */
3811 static void
3812 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3813 struct svc_req *req, struct compound_state *cs)
3814 {
3815 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3816 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3817 rfs4_lockowner_t *lo;
3818 rfs4_openowner_t *oo;
3819 rfs4_state_t *sp;
3820 rfs4_lo_state_t *lsp;
3821 rfs4_client_t *cp;
3822 bool_t create = FALSE;
3823 locklist_t *llist;
3824 sysid_t sysid;
3825
3826 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3827 cs, RELEASE_LOCKOWNER4args *, ap);
3828
3829 /* Make sure there is a clientid around for this request */
3830 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3831
3832 if (cp == NULL) {
3833 *cs->statusp = resp->status =
3834 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3835 goto out;
3836 }
3837 rfs4_client_rele(cp);
3838
3839 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3840 if (lo == NULL) {
3841 *cs->statusp = resp->status = NFS4_OK;
3842 goto out;
3843 }
3844 ASSERT(lo->rl_client != NULL);
3845
3846 /*
3847 * Check for EXPIRED client. If so will reap state with in a lease
3848 * period or on next set_clientid_confirm step
3849 */
3850 if (rfs4_lease_expired(lo->rl_client)) {
3851 rfs4_lockowner_rele(lo);
3852 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3853 goto out;
3854 }
3855
3856 /*
3857 * If no sysid has been assigned, then no locks exist; just return.
3858 */
3859 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3860 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3861 rfs4_lockowner_rele(lo);
3862 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3863 goto out;
3864 }
3865
3866 sysid = lo->rl_client->rc_sysidt;
3867 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3868
3869 /*
3870 * Mark the lockowner invalid.
3871 */
3872 rfs4_dbe_hide(lo->rl_dbe);
3873
3874 /*
3875 * sysid-pid pair should now not be used since the lockowner is
3876 * invalid. If the client were to instantiate the lockowner again
3877 * it would be assigned a new pid. Thus we can get the list of
3878 * current locks.
3879 */
3880
3881 llist = flk_get_active_locks(sysid, lo->rl_pid);
3882 /* If we are still holding locks fail */
3883 if (llist != NULL) {
3884
3885 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3886
3887 flk_free_locklist(llist);
3888 /*
3889 * We need to unhide the lockowner so the client can
3890 * try it again. The bad thing here is if the client
3891 * has a logic error that took it here in the first place
3892 * they probably have lost accounting of the locks that it
3893 * is holding. So we may have dangling state until the
3894 * open owner state is reaped via close. One scenario
3895 * that could possibly occur is that the client has
3896 * sent the unlock request(s) in separate threads
3897 * and has not waited for the replies before sending the
3898 * RELEASE_LOCKOWNER request. Presumably, it would expect
3899 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3900 * reissuing the request.
3901 */
3902 rfs4_dbe_unhide(lo->rl_dbe);
3903 rfs4_lockowner_rele(lo);
3904 goto out;
3905 }
3906
3907 /*
3908 * For the corresponding client we need to check each open
3909 * owner for any opens that have lockowner state associated
3910 * with this lockowner.
3911 */
3912
3913 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3914 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3915 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3916
3917 rfs4_dbe_lock(oo->ro_dbe);
3918 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3919 sp = list_next(&oo->ro_statelist, sp)) {
3920
3921 rfs4_dbe_lock(sp->rs_dbe);
3922 for (lsp = list_head(&sp->rs_lostatelist);
3923 lsp != NULL;
3924 lsp = list_next(&sp->rs_lostatelist, lsp)) {
3925 if (lsp->rls_locker == lo) {
3926 rfs4_dbe_lock(lsp->rls_dbe);
3927 rfs4_dbe_invalidate(lsp->rls_dbe);
3928 rfs4_dbe_unlock(lsp->rls_dbe);
3929 }
3930 }
3931 rfs4_dbe_unlock(sp->rs_dbe);
3932 }
3933 rfs4_dbe_unlock(oo->ro_dbe);
3934 }
3935 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3936
3937 rfs4_lockowner_rele(lo);
3938
3939 *cs->statusp = resp->status = NFS4_OK;
3940
3941 out:
3942 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
3943 cs, RELEASE_LOCKOWNER4res *, resp);
3944 }
3945
3946 /*
3947 * short utility function to lookup a file and recall the delegation
3948 */
3949 static rfs4_file_t *
3950 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3951 int *lkup_error, cred_t *cr)
3952 {
3953 vnode_t *vp;
3954 rfs4_file_t *fp = NULL;
3955 bool_t fcreate = FALSE;
3956 int error;
3957
3958 if (vpp)
3959 *vpp = NULL;
3960
3961 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
3962 NULL)) == 0) {
3963 if (vp->v_type == VREG)
3964 fp = rfs4_findfile(vp, NULL, &fcreate);
3965 if (vpp)
3966 *vpp = vp;
3967 else
3968 VN_RELE(vp);
3969 }
3970
3971 if (lkup_error)
3972 *lkup_error = error;
3973
3974 return (fp);
3975 }
3976
3977 /*
3978 * remove: args: CURRENT_FH: directory; name.
3979 * res: status. If success - CURRENT_FH unchanged, return change_info
3980 * for directory.
3981 */
3982 /* ARGSUSED */
3983 static void
3984 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3985 struct compound_state *cs)
3986 {
3987 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3988 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3989 int error;
3990 vnode_t *dvp, *vp;
3991 struct vattr bdva, idva, adva;
3992 char *nm;
3993 uint_t len;
3994 rfs4_file_t *fp;
3995 int in_crit = 0;
3996 bslabel_t *clabel;
3997 struct sockaddr *ca;
3998 char *name = NULL;
3999 nfsstat4 status;
4000
4001 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4002 REMOVE4args *, args);
4003
4004 /* CURRENT_FH: directory */
4005 dvp = cs->vp;
4006 if (dvp == NULL) {
4007 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4008 goto out;
4009 }
4010
4011 if (cs->access == CS_ACCESS_DENIED) {
4012 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4013 goto out;
4014 }
4015
4016 /*
4017 * If there is an unshared filesystem mounted on this vnode,
4018 * Do not allow to remove anything in this directory.
4019 */
4020 if (vn_ismntpt(dvp)) {
4021 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4022 goto out;
4023 }
4024
4025 if (dvp->v_type != VDIR) {
4026 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4027 goto out;
4028 }
4029
4030 status = utf8_dir_verify(&args->target);
4031 if (status != NFS4_OK) {
4032 *cs->statusp = resp->status = status;
4033 goto out;
4034 }
4035
4036 /*
4037 * Lookup the file so that we can check if it's a directory
4038 */
4039 nm = utf8_to_fn(&args->target, &len, NULL);
4040 if (nm == NULL) {
4041 *cs->statusp = resp->status = NFS4ERR_INVAL;
4042 goto out;
4043 }
4044
4045 if (len > MAXNAMELEN) {
4046 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4047 kmem_free(nm, len);
4048 goto out;
4049 }
4050
4051 if (rdonly4(req, cs)) {
4052 *cs->statusp = resp->status = NFS4ERR_ROFS;
4053 kmem_free(nm, len);
4054 goto out;
4055 }
4056
4057 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4058 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4059 MAXPATHLEN + 1);
4060
4061 if (name == NULL) {
4062 *cs->statusp = resp->status = NFS4ERR_INVAL;
4063 kmem_free(nm, len);
4064 goto out;
4065 }
4066
4067 /*
4068 * Lookup the file to determine type and while we are see if
4069 * there is a file struct around and check for delegation.
4070 * We don't need to acquire va_seq before this lookup, if
4071 * it causes an update, cinfo.before will not match, which will
4072 * trigger a cache flush even if atomic is TRUE.
4073 */
4074 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4075 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4076 NULL)) {
4077 VN_RELE(vp);
4078 rfs4_file_rele(fp);
4079 *cs->statusp = resp->status = NFS4ERR_DELAY;
4080 if (nm != name)
4081 kmem_free(name, MAXPATHLEN + 1);
4082 kmem_free(nm, len);
4083 goto out;
4084 }
4085 }
4086
4087 /* Didn't find anything to remove */
4088 if (vp == NULL) {
4089 *cs->statusp = resp->status = error;
4090 if (nm != name)
4091 kmem_free(name, MAXPATHLEN + 1);
4092 kmem_free(nm, len);
4093 goto out;
4094 }
4095
4096 if (nbl_need_check(vp)) {
4097 nbl_start_crit(vp, RW_READER);
4098 in_crit = 1;
4099 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4100 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4101 if (nm != name)
4102 kmem_free(name, MAXPATHLEN + 1);
4103 kmem_free(nm, len);
4104 nbl_end_crit(vp);
4105 VN_RELE(vp);
4106 if (fp) {
4107 rfs4_clear_dont_grant(fp);
4108 rfs4_file_rele(fp);
4109 }
4110 goto out;
4111 }
4112 }
4113
4114 /* check label before allowing removal */
4115 if (is_system_labeled()) {
4116 ASSERT(req->rq_label != NULL);
4117 clabel = req->rq_label;
4118 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4119 "got client label from request(1)",
4120 struct svc_req *, req);
4121 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4122 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4123 cs->exi)) {
4124 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4125 if (name != nm)
4126 kmem_free(name, MAXPATHLEN + 1);
4127 kmem_free(nm, len);
4128 if (in_crit)
4129 nbl_end_crit(vp);
4130 VN_RELE(vp);
4131 if (fp) {
4132 rfs4_clear_dont_grant(fp);
4133 rfs4_file_rele(fp);
4134 }
4135 goto out;
4136 }
4137 }
4138 }
4139
4140 /* Get dir "before" change value */
4141 bdva.va_mask = AT_CTIME|AT_SEQ;
4142 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4143 if (error) {
4144 *cs->statusp = resp->status = puterrno4(error);
4145 if (nm != name)
4146 kmem_free(name, MAXPATHLEN + 1);
4147 kmem_free(nm, len);
4148 if (in_crit)
4149 nbl_end_crit(vp);
4150 VN_RELE(vp);
4151 if (fp) {
4152 rfs4_clear_dont_grant(fp);
4153 rfs4_file_rele(fp);
4154 }
4155 goto out;
4156 }
4157 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4158
4159 /* Actually do the REMOVE operation */
4160 if (vp->v_type == VDIR) {
4161 /*
4162 * Can't remove a directory that has a mounted-on filesystem.
4163 */
4164 if (vn_ismntpt(vp)) {
4165 error = EACCES;
4166 } else {
4167 /*
4168 * System V defines rmdir to return EEXIST,
4169 * not ENOTEMPTY, if the directory is not
4170 * empty. A System V NFS server needs to map
4171 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4172 * transmit over the wire.
4173 */
4174 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4175 NULL, 0)) == EEXIST)
4176 error = ENOTEMPTY;
4177 }
4178 } else {
4179 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4180 fp != NULL) {
4181 struct vattr va;
4182 vnode_t *tvp;
4183
4184 rfs4_dbe_lock(fp->rf_dbe);
4185 tvp = fp->rf_vp;
4186 if (tvp)
4187 VN_HOLD(tvp);
4188 rfs4_dbe_unlock(fp->rf_dbe);
4189
4190 if (tvp) {
4191 /*
4192 * This is va_seq safe because we are not
4193 * manipulating dvp.
4194 */
4195 va.va_mask = AT_NLINK;
4196 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4197 va.va_nlink == 0) {
4198 /* Remove state on file remove */
4199 if (in_crit) {
4200 nbl_end_crit(vp);
4201 in_crit = 0;
4202 }
4203 rfs4_close_all_state(fp);
4204 }
4205 VN_RELE(tvp);
4206 }
4207 }
4208 }
4209
4210 if (in_crit)
4211 nbl_end_crit(vp);
4212 VN_RELE(vp);
4213
4214 if (fp) {
4215 rfs4_clear_dont_grant(fp);
4216 rfs4_file_rele(fp);
4217 }
4218 if (nm != name)
4219 kmem_free(name, MAXPATHLEN + 1);
4220 kmem_free(nm, len);
4221
4222 if (error) {
4223 *cs->statusp = resp->status = puterrno4(error);
4224 goto out;
4225 }
4226
4227 /*
4228 * Get the initial "after" sequence number, if it fails, set to zero
4229 */
4230 idva.va_mask = AT_SEQ;
4231 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4232 idva.va_seq = 0;
4233
4234 /*
4235 * Force modified data and metadata out to stable storage.
4236 */
4237 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4238
4239 /*
4240 * Get "after" change value, if it fails, simply return the
4241 * before value.
4242 */
4243 adva.va_mask = AT_CTIME|AT_SEQ;
4244 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4245 adva.va_ctime = bdva.va_ctime;
4246 adva.va_seq = 0;
4247 }
4248
4249 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4250
4251 /*
4252 * The cinfo.atomic = TRUE only if we have
4253 * non-zero va_seq's, and it has incremented by exactly one
4254 * during the VOP_REMOVE/RMDIR and it didn't change during
4255 * the VOP_FSYNC.
4256 */
4257 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4258 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4259 resp->cinfo.atomic = TRUE;
4260 else
4261 resp->cinfo.atomic = FALSE;
4262
4263 *cs->statusp = resp->status = NFS4_OK;
4264
4265 out:
4266 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4267 REMOVE4res *, resp);
4268 }
4269
4270 /*
4271 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4272 * oldname and newname.
4273 * res: status. If success - CURRENT_FH unchanged, return change_info
4274 * for both from and target directories.
4275 */
4276 /* ARGSUSED */
4277 static void
4278 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4279 struct compound_state *cs)
4280 {
4281 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4282 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4283 int error;
4284 vnode_t *odvp;
4285 vnode_t *ndvp;
4286 vnode_t *srcvp, *targvp;
4287 struct vattr obdva, oidva, oadva;
4288 struct vattr nbdva, nidva, nadva;
4289 char *onm, *nnm;
4290 uint_t olen, nlen;
4291 rfs4_file_t *fp, *sfp;
4292 int in_crit_src, in_crit_targ;
4293 int fp_rele_grant_hold, sfp_rele_grant_hold;
4294 bslabel_t *clabel;
4295 struct sockaddr *ca;
4296 char *converted_onm = NULL;
4297 char *converted_nnm = NULL;
4298 nfsstat4 status;
4299
4300 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4301 RENAME4args *, args);
4302
4303 fp = sfp = NULL;
4304 srcvp = targvp = NULL;
4305 in_crit_src = in_crit_targ = 0;
4306 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4307
4308 /* CURRENT_FH: target directory */
4309 ndvp = cs->vp;
4310 if (ndvp == NULL) {
4311 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4312 goto out;
4313 }
4314
4315 /* SAVED_FH: from directory */
4316 odvp = cs->saved_vp;
4317 if (odvp == NULL) {
4318 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4319 goto out;
4320 }
4321
4322 if (cs->access == CS_ACCESS_DENIED) {
4323 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4324 goto out;
4325 }
4326
4327 /*
4328 * If there is an unshared filesystem mounted on this vnode,
4329 * do not allow to rename objects in this directory.
4330 */
4331 if (vn_ismntpt(odvp)) {
4332 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4333 goto out;
4334 }
4335
4336 /*
4337 * If there is an unshared filesystem mounted on this vnode,
4338 * do not allow to rename to this directory.
4339 */
4340 if (vn_ismntpt(ndvp)) {
4341 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4342 goto out;
4343 }
4344
4345 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4346 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4347 goto out;
4348 }
4349
4350 if (cs->saved_exi != cs->exi) {
4351 *cs->statusp = resp->status = NFS4ERR_XDEV;
4352 goto out;
4353 }
4354
4355 status = utf8_dir_verify(&args->oldname);
4356 if (status != NFS4_OK) {
4357 *cs->statusp = resp->status = status;
4358 goto out;
4359 }
4360
4361 status = utf8_dir_verify(&args->newname);
4362 if (status != NFS4_OK) {
4363 *cs->statusp = resp->status = status;
4364 goto out;
4365 }
4366
4367 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4368 if (onm == NULL) {
4369 *cs->statusp = resp->status = NFS4ERR_INVAL;
4370 goto out;
4371 }
4372 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4373 nlen = MAXPATHLEN + 1;
4374 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4375 nlen);
4376
4377 if (converted_onm == NULL) {
4378 *cs->statusp = resp->status = NFS4ERR_INVAL;
4379 kmem_free(onm, olen);
4380 goto out;
4381 }
4382
4383 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4384 if (nnm == NULL) {
4385 *cs->statusp = resp->status = NFS4ERR_INVAL;
4386 if (onm != converted_onm)
4387 kmem_free(converted_onm, MAXPATHLEN + 1);
4388 kmem_free(onm, olen);
4389 goto out;
4390 }
4391 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4392 MAXPATHLEN + 1);
4393
4394 if (converted_nnm == NULL) {
4395 *cs->statusp = resp->status = NFS4ERR_INVAL;
4396 kmem_free(nnm, nlen);
4397 nnm = NULL;
4398 if (onm != converted_onm)
4399 kmem_free(converted_onm, MAXPATHLEN + 1);
4400 kmem_free(onm, olen);
4401 goto out;
4402 }
4403
4404
4405 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4406 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4407 kmem_free(onm, olen);
4408 kmem_free(nnm, nlen);
4409 goto out;
4410 }
4411
4412
4413 if (rdonly4(req, cs)) {
4414 *cs->statusp = resp->status = NFS4ERR_ROFS;
4415 if (onm != converted_onm)
4416 kmem_free(converted_onm, MAXPATHLEN + 1);
4417 kmem_free(onm, olen);
4418 if (nnm != converted_nnm)
4419 kmem_free(converted_nnm, MAXPATHLEN + 1);
4420 kmem_free(nnm, nlen);
4421 goto out;
4422 }
4423
4424 /* check label of the target dir */
4425 if (is_system_labeled()) {
4426 ASSERT(req->rq_label != NULL);
4427 clabel = req->rq_label;
4428 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4429 "got client label from request(1)",
4430 struct svc_req *, req);
4431 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4432 if (!do_rfs_label_check(clabel, ndvp,
4433 EQUALITY_CHECK, cs->exi)) {
4434 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4435 goto err_out;
4436 }
4437 }
4438 }
4439
4440 /*
4441 * Is the source a file and have a delegation?
4442 * We don't need to acquire va_seq before these lookups, if
4443 * it causes an update, cinfo.before will not match, which will
4444 * trigger a cache flush even if atomic is TRUE.
4445 */
4446 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4447 &error, cs->cr)) {
4448 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4449 NULL)) {
4450 *cs->statusp = resp->status = NFS4ERR_DELAY;
4451 goto err_out;
4452 }
4453 }
4454
4455 if (srcvp == NULL) {
4456 *cs->statusp = resp->status = puterrno4(error);
4457 if (onm != converted_onm)
4458 kmem_free(converted_onm, MAXPATHLEN + 1);
4459 kmem_free(onm, olen);
4460 if (nnm != converted_nnm)
4461 kmem_free(converted_nnm, MAXPATHLEN + 1);
4462 kmem_free(nnm, nlen);
4463 goto out;
4464 }
4465
4466 sfp_rele_grant_hold = 1;
4467
4468 /* Does the destination exist and a file and have a delegation? */
4469 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4470 NULL, cs->cr)) {
4471 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4472 NULL)) {
4473 *cs->statusp = resp->status = NFS4ERR_DELAY;
4474 goto err_out;
4475 }
4476 }
4477 fp_rele_grant_hold = 1;
4478
4479
4480 /* Check for NBMAND lock on both source and target */
4481 if (nbl_need_check(srcvp)) {
4482 nbl_start_crit(srcvp, RW_READER);
4483 in_crit_src = 1;
4484 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4485 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4486 goto err_out;
4487 }
4488 }
4489
4490 if (targvp && nbl_need_check(targvp)) {
4491 nbl_start_crit(targvp, RW_READER);
4492 in_crit_targ = 1;
4493 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4494 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4495 goto err_out;
4496 }
4497 }
4498
4499 /* Get source "before" change value */
4500 obdva.va_mask = AT_CTIME|AT_SEQ;
4501 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4502 if (!error) {
4503 nbdva.va_mask = AT_CTIME|AT_SEQ;
4504 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4505 }
4506 if (error) {
4507 *cs->statusp = resp->status = puterrno4(error);
4508 goto err_out;
4509 }
4510
4511 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4512 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4513
4514 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4515 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4516 struct vattr va;
4517 vnode_t *tvp;
4518
4519 rfs4_dbe_lock(fp->rf_dbe);
4520 tvp = fp->rf_vp;
4521 if (tvp)
4522 VN_HOLD(tvp);
4523 rfs4_dbe_unlock(fp->rf_dbe);
4524
4525 if (tvp) {
4526 va.va_mask = AT_NLINK;
4527 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4528 va.va_nlink == 0) {
4529 /* The file is gone and so should the state */
4530 if (in_crit_targ) {
4531 nbl_end_crit(targvp);
4532 in_crit_targ = 0;
4533 }
4534 rfs4_close_all_state(fp);
4535 }
4536 VN_RELE(tvp);
4537 }
4538 }
4539 if (error == 0)
4540 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4541
4542 if (in_crit_src)
4543 nbl_end_crit(srcvp);
4544 if (srcvp)
4545 VN_RELE(srcvp);
4546 if (in_crit_targ)
4547 nbl_end_crit(targvp);
4548 if (targvp)
4549 VN_RELE(targvp);
4550
4551 if (sfp) {
4552 rfs4_clear_dont_grant(sfp);
4553 rfs4_file_rele(sfp);
4554 }
4555 if (fp) {
4556 rfs4_clear_dont_grant(fp);
4557 rfs4_file_rele(fp);
4558 }
4559
4560 if (converted_onm != onm)
4561 kmem_free(converted_onm, MAXPATHLEN + 1);
4562 kmem_free(onm, olen);
4563 if (converted_nnm != nnm)
4564 kmem_free(converted_nnm, MAXPATHLEN + 1);
4565 kmem_free(nnm, nlen);
4566
4567 /*
4568 * Get the initial "after" sequence number, if it fails, set to zero
4569 */
4570 oidva.va_mask = AT_SEQ;
4571 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4572 oidva.va_seq = 0;
4573
4574 nidva.va_mask = AT_SEQ;
4575 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4576 nidva.va_seq = 0;
4577
4578 /*
4579 * Force modified data and metadata out to stable storage.
4580 */
4581 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4582 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4583
4584 if (error) {
4585 *cs->statusp = resp->status = puterrno4(error);
4586 goto out;
4587 }
4588
4589 /*
4590 * Get "after" change values, if it fails, simply return the
4591 * before value.
4592 */
4593 oadva.va_mask = AT_CTIME|AT_SEQ;
4594 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4595 oadva.va_ctime = obdva.va_ctime;
4596 oadva.va_seq = 0;
4597 }
4598
4599 nadva.va_mask = AT_CTIME|AT_SEQ;
4600 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4601 nadva.va_ctime = nbdva.va_ctime;
4602 nadva.va_seq = 0;
4603 }
4604
4605 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4606 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4607
4608 /*
4609 * The cinfo.atomic = TRUE only if we have
4610 * non-zero va_seq's, and it has incremented by exactly one
4611 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4612 */
4613 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4614 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4615 resp->source_cinfo.atomic = TRUE;
4616 else
4617 resp->source_cinfo.atomic = FALSE;
4618
4619 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4620 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4621 resp->target_cinfo.atomic = TRUE;
4622 else
4623 resp->target_cinfo.atomic = FALSE;
4624
4625 #ifdef VOLATILE_FH_TEST
4626 {
4627 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4628
4629 /*
4630 * Add the renamed file handle to the volatile rename list
4631 */
4632 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4633 /* file handles may expire on rename */
4634 vnode_t *vp;
4635
4636 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4637 /*
4638 * Already know that nnm will be a valid string
4639 */
4640 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4641 NULL, NULL, NULL);
4642 kmem_free(nnm, nlen);
4643 if (!error) {
4644 add_volrnm_fh(cs->exi, vp);
4645 VN_RELE(vp);
4646 }
4647 }
4648 }
4649 #endif /* VOLATILE_FH_TEST */
4650
4651 *cs->statusp = resp->status = NFS4_OK;
4652 out:
4653 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4654 RENAME4res *, resp);
4655 return;
4656
4657 err_out:
4658 if (onm != converted_onm)
4659 kmem_free(converted_onm, MAXPATHLEN + 1);
4660 if (onm != NULL)
4661 kmem_free(onm, olen);
4662 if (nnm != converted_nnm)
4663 kmem_free(converted_nnm, MAXPATHLEN + 1);
4664 if (nnm != NULL)
4665 kmem_free(nnm, nlen);
4666
4667 if (in_crit_src) nbl_end_crit(srcvp);
4668 if (in_crit_targ) nbl_end_crit(targvp);
4669 if (targvp) VN_RELE(targvp);
4670 if (srcvp) VN_RELE(srcvp);
4671 if (sfp) {
4672 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4673 rfs4_file_rele(sfp);
4674 }
4675 if (fp) {
4676 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4677 rfs4_file_rele(fp);
4678 }
4679
4680 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4681 RENAME4res *, resp);
4682 }
4683
4684 /* ARGSUSED */
4685 static void
4686 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4687 struct compound_state *cs)
4688 {
4689 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4690 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4691 rfs4_client_t *cp;
4692
4693 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4694 RENEW4args *, args);
4695
4696 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4697 *cs->statusp = resp->status =
4698 rfs4_check_clientid(&args->clientid, 0);
4699 goto out;
4700 }
4701
4702 if (rfs4_lease_expired(cp)) {
4703 rfs4_client_rele(cp);
4704 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4705 goto out;
4706 }
4707
4708 rfs4_update_lease(cp);
4709
4710 mutex_enter(cp->rc_cbinfo.cb_lock);
4711 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4712 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4713 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4714 } else {
4715 *cs->statusp = resp->status = NFS4_OK;
4716 }
4717 mutex_exit(cp->rc_cbinfo.cb_lock);
4718
4719 rfs4_client_rele(cp);
4720
4721 out:
4722 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4723 RENEW4res *, resp);
4724 }
4725
4726 /* ARGSUSED */
4727 static void
4728 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4729 struct compound_state *cs)
4730 {
4731 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4732
4733 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4734
4735 /* No need to check cs->access - we are not accessing any object */
4736 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4737 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4738 goto out;
4739 }
4740 if (cs->vp != NULL) {
4741 VN_RELE(cs->vp);
4742 }
4743 cs->vp = cs->saved_vp;
4744 cs->saved_vp = NULL;
4745 cs->exi = cs->saved_exi;
4746 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4747 *cs->statusp = resp->status = NFS4_OK;
4748 cs->deleg = FALSE;
4749
4750 out:
4751 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4752 RESTOREFH4res *, resp);
4753 }
4754
4755 /* ARGSUSED */
4756 static void
4757 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4758 struct compound_state *cs)
4759 {
4760 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4761
4762 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4763
4764 /* No need to check cs->access - we are not accessing any object */
4765 if (cs->vp == NULL) {
4766 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4767 goto out;
4768 }
4769 if (cs->saved_vp != NULL) {
4770 VN_RELE(cs->saved_vp);
4771 }
4772 cs->saved_vp = cs->vp;
4773 VN_HOLD(cs->saved_vp);
4774 cs->saved_exi = cs->exi;
4775 /*
4776 * since SAVEFH is fairly rare, don't alloc space for its fh
4777 * unless necessary.
4778 */
4779 if (cs->saved_fh.nfs_fh4_val == NULL) {
4780 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4781 }
4782 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4783 *cs->statusp = resp->status = NFS4_OK;
4784
4785 out:
4786 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4787 SAVEFH4res *, resp);
4788 }
4789
4790 /*
4791 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4792 * return the bitmap of attrs that were set successfully. It is also
4793 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4794 * always be called only after rfs4_do_set_attrs().
4795 *
4796 * Verify that the attributes are same as the expected ones. sargp->vap
4797 * and sargp->sbp contain the input attributes as translated from fattr4.
4798 *
4799 * This function verifies only the attrs that correspond to a vattr or
4800 * vfsstat struct. That is because of the extra step needed to get the
4801 * corresponding system structs. Other attributes have already been set or
4802 * verified by do_rfs4_set_attrs.
4803 *
4804 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4805 */
4806 static int
4807 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4808 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4809 {
4810 int error, ret_error = 0;
4811 int i, k;
4812 uint_t sva_mask = sargp->vap->va_mask;
4813 uint_t vbit;
4814 union nfs4_attr_u *na;
4815 uint8_t *amap;
4816 bool_t getsb = ntovp->vfsstat;
4817
4818 if (sva_mask != 0) {
4819 /*
4820 * Okay to overwrite sargp->vap because we verify based
4821 * on the incoming values.
4822 */
4823 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4824 sargp->cs->cr, NULL);
4825 if (ret_error) {
4826 if (resp == NULL)
4827 return (ret_error);
4828 /*
4829 * Must return bitmap of successful attrs
4830 */
4831 sva_mask = 0; /* to prevent checking vap later */
4832 } else {
4833 /*
4834 * Some file systems clobber va_mask. it is probably
4835 * wrong of them to do so, nonethless we practice
4836 * defensive coding.
4837 * See bug id 4276830.
4838 */
4839 sargp->vap->va_mask = sva_mask;
4840 }
4841 }
4842
4843 if (getsb) {
4844 /*
4845 * Now get the superblock and loop on the bitmap, as there is
4846 * no simple way of translating from superblock to bitmap4.
4847 */
4848 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4849 if (ret_error) {
4850 if (resp == NULL)
4851 goto errout;
4852 getsb = FALSE;
4853 }
4854 }
4855
4856 /*
4857 * Now loop and verify each attribute which getattr returned
4858 * whether it's the same as the input.
4859 */
4860 if (resp == NULL && !getsb && (sva_mask == 0))
4861 goto errout;
4862
4863 na = ntovp->na;
4864 amap = ntovp->amap;
4865 k = 0;
4866 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4867 k = *amap;
4868 ASSERT(nfs4_ntov_map[k].nval == k);
4869 vbit = nfs4_ntov_map[k].vbit;
4870
4871 /*
4872 * If vattr attribute but VOP_GETATTR failed, or it's
4873 * superblock attribute but VFS_STATVFS failed, skip
4874 */
4875 if (vbit) {
4876 if ((vbit & sva_mask) == 0)
4877 continue;
4878 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4879 continue;
4880 }
4881 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4882 if (resp != NULL) {
4883 if (error)
4884 ret_error = -1; /* not all match */
4885 else /* update response bitmap */
4886 *resp |= nfs4_ntov_map[k].fbit;
4887 continue;
4888 }
4889 if (error) {
4890 ret_error = -1; /* not all match */
4891 break;
4892 }
4893 }
4894 errout:
4895 return (ret_error);
4896 }
4897
4898 /*
4899 * Decode the attribute to be set/verified. If the attr requires a sys op
4900 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4901 * call the sv_getit function for it, because the sys op hasn't yet been done.
4902 * Return 0 for success, error code if failed.
4903 *
4904 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4905 */
4906 static int
4907 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4908 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4909 {
4910 int error = 0;
4911 bool_t set_later;
4912
4913 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4914
4915 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4916 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4917 /*
4918 * don't verify yet if a vattr or sb dependent attr,
4919 * because we don't have their sys values yet.
4920 * Will be done later.
4921 */
4922 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4923 /*
4924 * ACLs are a special case, since setting the MODE
4925 * conflicts with setting the ACL. We delay setting
4926 * the ACL until all other attributes have been set.
4927 * The ACL gets set in do_rfs4_op_setattr().
4928 */
4929 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4930 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4931 sargp, nap);
4932 if (error) {
4933 xdr_free(nfs4_ntov_map[k].xfunc,
4934 (caddr_t)nap);
4935 }
4936 }
4937 }
4938 } else {
4939 #ifdef DEBUG
4940 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4941 "decoding attribute %d\n", k);
4942 #endif
4943 error = EINVAL;
4944 }
4945 if (!error && resp_bval && !set_later) {
4946 *resp_bval |= nfs4_ntov_map[k].fbit;
4947 }
4948
4949 return (error);
4950 }
4951
4952 /*
4953 * Set vattr based on incoming fattr4 attrs - used by setattr.
4954 * Set response mask. Ignore any values that are not writable vattr attrs.
4955 */
4956 static nfsstat4
4957 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4958 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4959 nfs4_attr_cmd_t cmd)
4960 {
4961 int error = 0;
4962 int i;
4963 char *attrs = fattrp->attrlist4;
4964 uint32_t attrslen = fattrp->attrlist4_len;
4965 XDR xdr;
4966 nfsstat4 status = NFS4_OK;
4967 vnode_t *vp = cs->vp;
4968 union nfs4_attr_u *na;
4969 uint8_t *amap;
4970
4971 #ifndef lint
4972 /*
4973 * Make sure that maximum attribute number can be expressed as an
4974 * 8 bit quantity.
4975 */
4976 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4977 #endif
4978
4979 if (vp == NULL) {
4980 if (resp)
4981 *resp = 0;
4982 return (NFS4ERR_NOFILEHANDLE);
4983 }
4984 if (cs->access == CS_ACCESS_DENIED) {
4985 if (resp)
4986 *resp = 0;
4987 return (NFS4ERR_ACCESS);
4988 }
4989
4990 sargp->op = cmd;
4991 sargp->cs = cs;
4992 sargp->flag = 0; /* may be set later */
4993 sargp->vap->va_mask = 0;
4994 sargp->rdattr_error = NFS4_OK;
4995 sargp->rdattr_error_req = FALSE;
4996 /* sargp->sbp is set by the caller */
4997
4998 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4999
5000 na = ntovp->na;
5001 amap = ntovp->amap;
5002
5003 /*
5004 * The following loop iterates on the nfs4_ntov_map checking
5005 * if the fbit is set in the requested bitmap.
5006 * If set then we process the arguments using the
5007 * rfs4_fattr4 conversion functions to populate the setattr
5008 * vattr and va_mask. Any settable attrs that are not using vattr
5009 * will be set in this loop.
5010 */
5011 for (i = 0; i < nfs4_ntov_map_size; i++) {
5012 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5013 continue;
5014 }
5015 /*
5016 * If setattr, must be a writable attr.
5017 * If verify/nverify, must be a readable attr.
5018 */
5019 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5020 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5021 /*
5022 * Client tries to set/verify an
5023 * unsupported attribute, tries to set
5024 * a read only attr or verify a write
5025 * only one - error!
5026 */
5027 break;
5028 }
5029 /*
5030 * Decode the attribute to set/verify
5031 */
5032 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5033 &xdr, resp ? resp : NULL, na);
5034 if (error)
5035 break;
5036 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5037 na++;
5038 (ntovp->attrcnt)++;
5039 if (nfs4_ntov_map[i].vfsstat)
5040 ntovp->vfsstat = TRUE;
5041 }
5042
5043 if (error != 0)
5044 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5045 puterrno4(error));
5046 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5047 return (status);
5048 }
5049
5050 static nfsstat4
5051 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5052 stateid4 *stateid)
5053 {
5054 int error = 0;
5055 struct nfs4_svgetit_arg sarg;
5056 bool_t trunc;
5057
5058 nfsstat4 status = NFS4_OK;
5059 cred_t *cr = cs->cr;
5060 vnode_t *vp = cs->vp;
5061 struct nfs4_ntov_table ntov;
5062 struct statvfs64 sb;
5063 struct vattr bva;
5064 struct flock64 bf;
5065 int in_crit = 0;
5066 uint_t saved_mask = 0;
5067 caller_context_t ct;
5068
5069 *resp = 0;
5070 sarg.sbp = &sb;
5071 sarg.is_referral = B_FALSE;
5072 nfs4_ntov_table_init(&ntov);
5073 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5074 NFS4ATTR_SETIT);
5075 if (status != NFS4_OK) {
5076 /*
5077 * failed set attrs
5078 */
5079 goto done;
5080 }
5081 if ((sarg.vap->va_mask == 0) &&
5082 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5083 /*
5084 * no further work to be done
5085 */
5086 goto done;
5087 }
5088
5089 /*
5090 * If we got a request to set the ACL and the MODE, only
5091 * allow changing VSUID, VSGID, and VSVTX. Attempting
5092 * to change any other bits, along with setting an ACL,
5093 * gives NFS4ERR_INVAL.
5094 */
5095 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5096 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5097 vattr_t va;
5098
5099 va.va_mask = AT_MODE;
5100 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5101 if (error) {
5102 status = puterrno4(error);
5103 goto done;
5104 }
5105 if ((sarg.vap->va_mode ^ va.va_mode) &
5106 ~(VSUID | VSGID | VSVTX)) {
5107 status = NFS4ERR_INVAL;
5108 goto done;
5109 }
5110 }
5111
5112 /* Check stateid only if size has been set */
5113 if (sarg.vap->va_mask & AT_SIZE) {
5114 trunc = (sarg.vap->va_size == 0);
5115 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5116 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5117 if (status != NFS4_OK)
5118 goto done;
5119 } else {
5120 ct.cc_sysid = 0;
5121 ct.cc_pid = 0;
5122 ct.cc_caller_id = nfs4_srv_caller_id;
5123 ct.cc_flags = CC_DONTBLOCK;
5124 }
5125
5126 /* XXX start of possible race with delegations */
5127
5128 /*
5129 * We need to specially handle size changes because it is
5130 * possible for the client to create a file with read-only
5131 * modes, but with the file opened for writing. If the client
5132 * then tries to set the file size, e.g. ftruncate(3C),
5133 * fcntl(F_FREESP), the normal access checking done in
5134 * VOP_SETATTR would prevent the client from doing it even though
5135 * it should be allowed to do so. To get around this, we do the
5136 * access checking for ourselves and use VOP_SPACE which doesn't
5137 * do the access checking.
5138 * Also the client should not be allowed to change the file
5139 * size if there is a conflicting non-blocking mandatory lock in
5140 * the region of the change.
5141 */
5142 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5143 u_offset_t offset;
5144 ssize_t length;
5145
5146 /*
5147 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5148 * before returning, sarg.vap->va_mask is used to
5149 * generate the setattr reply bitmap. We also clear
5150 * AT_SIZE below before calling VOP_SPACE. For both
5151 * of these cases, the va_mask needs to be saved here
5152 * and restored after calling VOP_SETATTR.
5153 */
5154 saved_mask = sarg.vap->va_mask;
5155
5156 /*
5157 * Check any possible conflict due to NBMAND locks.
5158 * Get into critical region before VOP_GETATTR, so the
5159 * size attribute is valid when checking conflicts.
5160 */
5161 if (nbl_need_check(vp)) {
5162 nbl_start_crit(vp, RW_READER);
5163 in_crit = 1;
5164 }
5165
5166 bva.va_mask = AT_UID|AT_SIZE;
5167 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5168 status = puterrno4(error);
5169 goto done;
5170 }
5171
5172 if (in_crit) {
5173 if (sarg.vap->va_size < bva.va_size) {
5174 offset = sarg.vap->va_size;
5175 length = bva.va_size - sarg.vap->va_size;
5176 } else {
5177 offset = bva.va_size;
5178 length = sarg.vap->va_size - bva.va_size;
5179 }
5180 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5181 &ct)) {
5182 status = NFS4ERR_LOCKED;
5183 goto done;
5184 }
5185 }
5186
5187 if (crgetuid(cr) == bva.va_uid) {
5188 sarg.vap->va_mask &= ~AT_SIZE;
5189 bf.l_type = F_WRLCK;
5190 bf.l_whence = 0;
5191 bf.l_start = (off64_t)sarg.vap->va_size;
5192 bf.l_len = 0;
5193 bf.l_sysid = 0;
5194 bf.l_pid = 0;
5195 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5196 (offset_t)sarg.vap->va_size, cr, &ct);
5197 }
5198 }
5199
5200 if (!error && sarg.vap->va_mask != 0)
5201 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5202
5203 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5204 if (saved_mask & AT_SIZE)
5205 sarg.vap->va_mask |= AT_SIZE;
5206
5207 /*
5208 * If an ACL was being set, it has been delayed until now,
5209 * in order to set the mode (via the VOP_SETATTR() above) first.
5210 */
5211 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5212 int i;
5213
5214 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5215 if (ntov.amap[i] == FATTR4_ACL)
5216 break;
5217 if (i < NFS4_MAXNUM_ATTRS) {
5218 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5219 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5220 if (error == 0) {
5221 *resp |= FATTR4_ACL_MASK;
5222 } else if (error == ENOTSUP) {
5223 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5224 status = NFS4ERR_ATTRNOTSUPP;
5225 goto done;
5226 }
5227 } else {
5228 NFS4_DEBUG(rfs4_debug,
5229 (CE_NOTE, "do_rfs4_op_setattr: "
5230 "unable to find ACL in fattr4"));
5231 error = EINVAL;
5232 }
5233 }
5234
5235 if (error) {
5236 /* check if a monitor detected a delegation conflict */
5237 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5238 status = NFS4ERR_DELAY;
5239 else
5240 status = puterrno4(error);
5241
5242 /*
5243 * Set the response bitmap when setattr failed.
5244 * If VOP_SETATTR partially succeeded, test by doing a
5245 * VOP_GETATTR on the object and comparing the data
5246 * to the setattr arguments.
5247 */
5248 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5249 } else {
5250 /*
5251 * Force modified metadata out to stable storage.
5252 */
5253 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5254 /*
5255 * Set response bitmap
5256 */
5257 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5258 }
5259
5260 /* Return early and already have a NFSv4 error */
5261 done:
5262 /*
5263 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5264 * conversion sets both readable and writeable NFS4 attrs
5265 * for AT_MTIME and AT_ATIME. The line below masks out
5266 * unrequested attrs from the setattr result bitmap. This
5267 * is placed after the done: label to catch the ATTRNOTSUP
5268 * case.
5269 */
5270 *resp &= fattrp->attrmask;
5271
5272 if (in_crit)
5273 nbl_end_crit(vp);
5274
5275 nfs4_ntov_table_free(&ntov, &sarg);
5276
5277 return (status);
5278 }
5279
5280 /* ARGSUSED */
5281 static void
5282 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5283 struct compound_state *cs)
5284 {
5285 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5286 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5287 bslabel_t *clabel;
5288
5289 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5290 SETATTR4args *, args);
5291
5292 if (cs->vp == NULL) {
5293 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5294 goto out;
5295 }
5296
5297 /*
5298 * If there is an unshared filesystem mounted on this vnode,
5299 * do not allow to setattr on this vnode.
5300 */
5301 if (vn_ismntpt(cs->vp)) {
5302 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5303 goto out;
5304 }
5305
5306 resp->attrsset = 0;
5307
5308 if (rdonly4(req, cs)) {
5309 *cs->statusp = resp->status = NFS4ERR_ROFS;
5310 goto out;
5311 }
5312
5313 /* check label before setting attributes */
5314 if (is_system_labeled()) {
5315 ASSERT(req->rq_label != NULL);
5316 clabel = req->rq_label;
5317 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5318 "got client label from request(1)",
5319 struct svc_req *, req);
5320 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5321 if (!do_rfs_label_check(clabel, cs->vp,
5322 EQUALITY_CHECK, cs->exi)) {
5323 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5324 goto out;
5325 }
5326 }
5327 }
5328
5329 *cs->statusp = resp->status =
5330 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5331 &args->stateid);
5332
5333 out:
5334 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5335 SETATTR4res *, resp);
5336 }
5337
5338 /* ARGSUSED */
5339 static void
5340 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5341 struct compound_state *cs)
5342 {
5343 /*
5344 * verify and nverify are exactly the same, except that nverify
5345 * succeeds when some argument changed, and verify succeeds when
5346 * when none changed.
5347 */
5348
5349 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5350 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5351
5352 int error;
5353 struct nfs4_svgetit_arg sarg;
5354 struct statvfs64 sb;
5355 struct nfs4_ntov_table ntov;
5356
5357 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5358 VERIFY4args *, args);
5359
5360 if (cs->vp == NULL) {
5361 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5362 goto out;
5363 }
5364
5365 sarg.sbp = &sb;
5366 sarg.is_referral = B_FALSE;
5367 nfs4_ntov_table_init(&ntov);
5368 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5369 &sarg, &ntov, NFS4ATTR_VERIT);
5370 if (resp->status != NFS4_OK) {
5371 /*
5372 * do_rfs4_set_attrs will try to verify systemwide attrs,
5373 * so could return -1 for "no match".
5374 */
5375 if (resp->status == -1)
5376 resp->status = NFS4ERR_NOT_SAME;
5377 goto done;
5378 }
5379 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5380 switch (error) {
5381 case 0:
5382 resp->status = NFS4_OK;
5383 break;
5384 case -1:
5385 resp->status = NFS4ERR_NOT_SAME;
5386 break;
5387 default:
5388 resp->status = puterrno4(error);
5389 break;
5390 }
5391 done:
5392 *cs->statusp = resp->status;
5393 nfs4_ntov_table_free(&ntov, &sarg);
5394 out:
5395 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5396 VERIFY4res *, resp);
5397 }
5398
5399 /* ARGSUSED */
5400 static void
5401 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5402 struct compound_state *cs)
5403 {
5404 /*
5405 * verify and nverify are exactly the same, except that nverify
5406 * succeeds when some argument changed, and verify succeeds when
5407 * when none changed.
5408 */
5409
5410 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5411 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5412
5413 int error;
5414 struct nfs4_svgetit_arg sarg;
5415 struct statvfs64 sb;
5416 struct nfs4_ntov_table ntov;
5417
5418 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5419 NVERIFY4args *, args);
5420
5421 if (cs->vp == NULL) {
5422 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5423 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5424 NVERIFY4res *, resp);
5425 return;
5426 }
5427 sarg.sbp = &sb;
5428 sarg.is_referral = B_FALSE;
5429 nfs4_ntov_table_init(&ntov);
5430 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5431 &sarg, &ntov, NFS4ATTR_VERIT);
5432 if (resp->status != NFS4_OK) {
5433 /*
5434 * do_rfs4_set_attrs will try to verify systemwide attrs,
5435 * so could return -1 for "no match".
5436 */
5437 if (resp->status == -1)
5438 resp->status = NFS4_OK;
5439 goto done;
5440 }
5441 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5442 switch (error) {
5443 case 0:
5444 resp->status = NFS4ERR_SAME;
5445 break;
5446 case -1:
5447 resp->status = NFS4_OK;
5448 break;
5449 default:
5450 resp->status = puterrno4(error);
5451 break;
5452 }
5453 done:
5454 *cs->statusp = resp->status;
5455 nfs4_ntov_table_free(&ntov, &sarg);
5456
5457 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5458 NVERIFY4res *, resp);
5459 }
5460
5461 /*
5462 * XXX - This should live in an NFS header file.
5463 */
5464 #define MAX_IOVECS 12
5465
5466 /* ARGSUSED */
5467 static void
5468 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5469 struct compound_state *cs)
5470 {
5471 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5472 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5473 int error;
5474 vnode_t *vp;
5475 struct vattr bva;
5476 u_offset_t rlimit;
5477 struct uio uio;
5478 struct iovec iov[MAX_IOVECS];
5479 struct iovec *iovp;
5480 int iovcnt;
5481 int ioflag;
5482 cred_t *savecred, *cr;
5483 bool_t *deleg = &cs->deleg;
5484 nfsstat4 stat;
5485 int in_crit = 0;
5486 caller_context_t ct;
5487
5488 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5489 WRITE4args *, args);
5490
5491 vp = cs->vp;
5492 if (vp == NULL) {
5493 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5494 goto out;
5495 }
5496 if (cs->access == CS_ACCESS_DENIED) {
5497 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5498 goto out;
5499 }
5500
5501 cr = cs->cr;
5502
5503 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5504 deleg, TRUE, &ct)) != NFS4_OK) {
5505 *cs->statusp = resp->status = stat;
5506 goto out;
5507 }
5508
5509 /*
5510 * We have to enter the critical region before calling VOP_RWLOCK
5511 * to avoid a deadlock with ufs.
5512 */
5513 if (nbl_need_check(vp)) {
5514 nbl_start_crit(vp, RW_READER);
5515 in_crit = 1;
5516 if (nbl_conflict(vp, NBL_WRITE,
5517 args->offset, args->data_len, 0, &ct)) {
5518 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5519 goto out;
5520 }
5521 }
5522
5523 bva.va_mask = AT_MODE | AT_UID;
5524 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5525
5526 /*
5527 * If we can't get the attributes, then we can't do the
5528 * right access checking. So, we'll fail the request.
5529 */
5530 if (error) {
5531 *cs->statusp = resp->status = puterrno4(error);
5532 goto out;
5533 }
5534
5535 if (rdonly4(req, cs)) {
5536 *cs->statusp = resp->status = NFS4ERR_ROFS;
5537 goto out;
5538 }
5539
5540 if (vp->v_type != VREG) {
5541 *cs->statusp = resp->status =
5542 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5543 goto out;
5544 }
5545
5546 if (crgetuid(cr) != bva.va_uid &&
5547 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5548 *cs->statusp = resp->status = puterrno4(error);
5549 goto out;
5550 }
5551
5552 if (MANDLOCK(vp, bva.va_mode)) {
5553 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5554 goto out;
5555 }
5556
5557 if (args->data_len == 0) {
5558 *cs->statusp = resp->status = NFS4_OK;
5559 resp->count = 0;
5560 resp->committed = args->stable;
5561 resp->writeverf = Write4verf;
5562 goto out;
5563 }
5564
5565 if (args->mblk != NULL) {
5566 mblk_t *m;
5567 uint_t bytes, round_len;
5568
5569 iovcnt = 0;
5570 bytes = 0;
5571 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5572 for (m = args->mblk;
5573 m != NULL && bytes < round_len;
5574 m = m->b_cont) {
5575 iovcnt++;
5576 bytes += MBLKL(m);
5577 }
5578 #ifdef DEBUG
5579 /* should have ended on an mblk boundary */
5580 if (bytes != round_len) {
5581 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5582 bytes, round_len, args->data_len);
5583 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5584 (void *)args->mblk, (void *)m);
5585 ASSERT(bytes == round_len);
5586 }
5587 #endif
5588 if (iovcnt <= MAX_IOVECS) {
5589 iovp = iov;
5590 } else {
5591 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5592 }
5593 mblk_to_iov(args->mblk, iovcnt, iovp);
5594 } else if (args->rlist != NULL) {
5595 iovcnt = 1;
5596 iovp = iov;
5597 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5598 iovp->iov_len = args->data_len;
5599 } else {
5600 iovcnt = 1;
5601 iovp = iov;
5602 iovp->iov_base = args->data_val;
5603 iovp->iov_len = args->data_len;
5604 }
5605
5606 uio.uio_iov = iovp;
5607 uio.uio_iovcnt = iovcnt;
5608
5609 uio.uio_segflg = UIO_SYSSPACE;
5610 uio.uio_extflg = UIO_COPY_DEFAULT;
5611 uio.uio_loffset = args->offset;
5612 uio.uio_resid = args->data_len;
5613 uio.uio_llimit = curproc->p_fsz_ctl;
5614 rlimit = uio.uio_llimit - args->offset;
5615 if (rlimit < (u_offset_t)uio.uio_resid)
5616 uio.uio_resid = (int)rlimit;
5617
5618 if (args->stable == UNSTABLE4)
5619 ioflag = 0;
5620 else if (args->stable == FILE_SYNC4)
5621 ioflag = FSYNC;
5622 else if (args->stable == DATA_SYNC4)
5623 ioflag = FDSYNC;
5624 else {
5625 if (iovp != iov)
5626 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5627 *cs->statusp = resp->status = NFS4ERR_INVAL;
5628 goto out;
5629 }
5630
5631 /*
5632 * We're changing creds because VM may fault and we need
5633 * the cred of the current thread to be used if quota
5634 * checking is enabled.
5635 */
5636 savecred = curthread->t_cred;
5637 curthread->t_cred = cr;
5638 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5639 curthread->t_cred = savecred;
5640
5641 if (iovp != iov)
5642 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5643
5644 if (error) {
5645 *cs->statusp = resp->status = puterrno4(error);
5646 goto out;
5647 }
5648
5649 *cs->statusp = resp->status = NFS4_OK;
5650 resp->count = args->data_len - uio.uio_resid;
5651
5652 if (ioflag == 0)
5653 resp->committed = UNSTABLE4;
5654 else
5655 resp->committed = FILE_SYNC4;
5656
5657 resp->writeverf = Write4verf;
5658
5659 out:
5660 if (in_crit)
5661 nbl_end_crit(vp);
5662
5663 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5664 WRITE4res *, resp);
5665 }
5666
5667
5668 /* XXX put in a header file */
5669 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5670
5671 void
5672 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5673 struct svc_req *req, cred_t *cr, int *rv)
5674 {
5675 uint_t i;
5676 struct compound_state cs;
5677
5678 if (rv != NULL)
5679 *rv = 0;
5680 rfs4_init_compound_state(&cs);
5681 /*
5682 * Form a reply tag by copying over the reqeuest tag.
5683 */
5684 resp->tag.utf8string_val =
5685 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5686 resp->tag.utf8string_len = args->tag.utf8string_len;
5687 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5688 resp->tag.utf8string_len);
5689
5690 cs.statusp = &resp->status;
5691 cs.req = req;
5692 resp->array = NULL;
5693 resp->array_len = 0;
5694
5695 /*
5696 * XXX for now, minorversion should be zero
5697 */
5698 if (args->minorversion != NFS4_MINORVERSION) {
5699 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5700 &cs, COMPOUND4args *, args);
5701 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5702 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5703 &cs, COMPOUND4res *, resp);
5704 return;
5705 }
5706
5707 if (args->array_len == 0) {
5708 resp->status = NFS4_OK;
5709 return;
5710 }
5711
5712 ASSERT(exi == NULL);
5713 ASSERT(cr == NULL);
5714
5715 cr = crget();
5716 ASSERT(cr != NULL);
5717
5718 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5719 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5720 &cs, COMPOUND4args *, args);
5721 crfree(cr);
5722 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5723 &cs, COMPOUND4res *, resp);
5724 svcerr_badcred(req->rq_xprt);
5725 if (rv != NULL)
5726 *rv = 1;
5727 return;
5728 }
5729 resp->array_len = args->array_len;
5730 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5731 KM_SLEEP);
5732
5733 cs.basecr = cr;
5734
5735 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5736 COMPOUND4args *, args);
5737
5738 /*
5739 * For now, NFS4 compound processing must be protected by
5740 * exported_lock because it can access more than one exportinfo
5741 * per compound and share/unshare can now change multiple
5742 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5743 * per proc (excluding public exinfo), and exi_count design
5744 * is sufficient to protect concurrent execution of NFS2/3
5745 * ops along with unexport. This lock will be removed as
5746 * part of the NFSv4 phase 2 namespace redesign work.
5747 */
5748 rw_enter(&exported_lock, RW_READER);
5749
5750 /*
5751 * If this is the first compound we've seen, we need to start all
5752 * new instances' grace periods.
5753 */
5754 if (rfs4_seen_first_compound == 0) {
5755 rfs4_grace_start_new();
5756 /*
5757 * This must be set after rfs4_grace_start_new(), otherwise
5758 * another thread could proceed past here before the former
5759 * is finished.
5760 */
5761 rfs4_seen_first_compound = 1;
5762 }
5763
5764 for (i = 0; i < args->array_len && cs.cont; i++) {
5765 nfs_argop4 *argop;
5766 nfs_resop4 *resop;
5767 uint_t op;
5768
5769 argop = &args->array[i];
5770 resop = &resp->array[i];
5771 resop->resop = argop->argop;
5772 op = (uint_t)resop->resop;
5773
5774 if (op < rfsv4disp_cnt) {
5775 /*
5776 * Count the individual ops here; NULL and COMPOUND
5777 * are counted in common_dispatch()
5778 */
5779 rfsproccnt_v4_ptr[op].value.ui64++;
5780
5781 NFS4_DEBUG(rfs4_debug > 1,
5782 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5783 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5784 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5785 rfs4_op_string[op], *cs.statusp));
5786 if (*cs.statusp != NFS4_OK)
5787 cs.cont = FALSE;
5788 } else {
5789 /*
5790 * This is effectively dead code since XDR code
5791 * will have already returned BADXDR if op doesn't
5792 * decode to legal value. This only done for a
5793 * day when XDR code doesn't verify v4 opcodes.
5794 */
5795 op = OP_ILLEGAL;
5796 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5797
5798 rfs4_op_illegal(argop, resop, req, &cs);
5799 cs.cont = FALSE;
5800 }
5801
5802 /*
5803 * If not at last op, and if we are to stop, then
5804 * compact the results array.
5805 */
5806 if ((i + 1) < args->array_len && !cs.cont) {
5807 nfs_resop4 *new_res = kmem_alloc(
5808 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5809 bcopy(resp->array,
5810 new_res, (i+1) * sizeof (nfs_resop4));
5811 kmem_free(resp->array,
5812 args->array_len * sizeof (nfs_resop4));
5813
5814 resp->array_len = i + 1;
5815 resp->array = new_res;
5816 }
5817 }
5818
5819 rw_exit(&exported_lock);
5820
5821 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5822 COMPOUND4res *, resp);
5823
5824 if (cs.vp)
5825 VN_RELE(cs.vp);
5826 if (cs.saved_vp)
5827 VN_RELE(cs.saved_vp);
5828 if (cs.saved_fh.nfs_fh4_val)
5829 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5830
5831 if (cs.basecr)
5832 crfree(cs.basecr);
5833 if (cs.cr)
5834 crfree(cs.cr);
5835 /*
5836 * done with this compound request, free the label
5837 */
5838
5839 if (req->rq_label != NULL) {
5840 kmem_free(req->rq_label, sizeof (bslabel_t));
5841 req->rq_label = NULL;
5842 }
5843 }
5844
5845 /*
5846 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5847 * XXX zero out the tag and array values. Need to investigate why the
5848 * XXX calls occur, but at least prevent the panic for now.
5849 */
5850 void
5851 rfs4_compound_free(COMPOUND4res *resp)
5852 {
5853 uint_t i;
5854
5855 if (resp->tag.utf8string_val) {
5856 UTF8STRING_FREE(resp->tag)
5857 }
5858
5859 for (i = 0; i < resp->array_len; i++) {
5860 nfs_resop4 *resop;
5861 uint_t op;
5862
5863 resop = &resp->array[i];
5864 op = (uint_t)resop->resop;
5865 if (op < rfsv4disp_cnt) {
5866 (*rfsv4disptab[op].dis_resfree)(resop);
5867 }
5868 }
5869 if (resp->array != NULL) {
5870 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5871 }
5872 }
5873
5874 /*
5875 * Process the value of the compound request rpc flags, as a bit-AND
5876 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5877 */
5878 void
5879 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5880 {
5881 int i;
5882 int flag = RPC_ALL;
5883
5884 for (i = 0; flag && i < args->array_len; i++) {
5885 uint_t op;
5886
5887 op = (uint_t)args->array[i].argop;
5888
5889 if (op < rfsv4disp_cnt)
5890 flag &= rfsv4disptab[op].dis_flags;
5891 else
5892 flag = 0;
5893 }
5894 *flagp = flag;
5895 }
5896
5897 nfsstat4
5898 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5899 {
5900 nfsstat4 e;
5901
5902 rfs4_dbe_lock(cp->rc_dbe);
5903
5904 if (cp->rc_sysidt != LM_NOSYSID) {
5905 *sp = cp->rc_sysidt;
5906 e = NFS4_OK;
5907
5908 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5909 *sp = cp->rc_sysidt;
5910 e = NFS4_OK;
5911
5912 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5913 "rfs4_client_sysid: allocated 0x%x\n", *sp));
5914 } else
5915 e = NFS4ERR_DELAY;
5916
5917 rfs4_dbe_unlock(cp->rc_dbe);
5918 return (e);
5919 }
5920
5921 #if defined(DEBUG) && ! defined(lint)
5922 static void lock_print(char *str, int operation, struct flock64 *flk)
5923 {
5924 char *op, *type;
5925
5926 switch (operation) {
5927 case F_GETLK: op = "F_GETLK";
5928 break;
5929 case F_SETLK: op = "F_SETLK";
5930 break;
5931 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
5932 break;
5933 default: op = "F_UNKNOWN";
5934 break;
5935 }
5936 switch (flk->l_type) {
5937 case F_UNLCK: type = "F_UNLCK";
5938 break;
5939 case F_RDLCK: type = "F_RDLCK";
5940 break;
5941 case F_WRLCK: type = "F_WRLCK";
5942 break;
5943 default: type = "F_UNKNOWN";
5944 break;
5945 }
5946
5947 ASSERT(flk->l_whence == 0);
5948 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
5949 str, op, type, (longlong_t)flk->l_start,
5950 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
5951 }
5952
5953 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5954 #else
5955 #define LOCK_PRINT(d, s, t, f)
5956 #endif
5957
5958 /*ARGSUSED*/
5959 static bool_t
5960 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5961 {
5962 return (TRUE);
5963 }
5964
5965 /*
5966 * Look up the pathname using the vp in cs as the directory vnode.
5967 * cs->vp will be the vnode for the file on success
5968 */
5969
5970 static nfsstat4
5971 rfs4_lookup(component4 *component, struct svc_req *req,
5972 struct compound_state *cs)
5973 {
5974 char *nm;
5975 uint32_t len;
5976 nfsstat4 status;
5977 struct sockaddr *ca;
5978 char *name;
5979
5980 if (cs->vp == NULL) {
5981 return (NFS4ERR_NOFILEHANDLE);
5982 }
5983 if (cs->vp->v_type != VDIR) {
5984 return (NFS4ERR_NOTDIR);
5985 }
5986
5987 status = utf8_dir_verify(component);
5988 if (status != NFS4_OK)
5989 return (status);
5990
5991 nm = utf8_to_fn(component, &len, NULL);
5992 if (nm == NULL) {
5993 return (NFS4ERR_INVAL);
5994 }
5995
5996 if (len > MAXNAMELEN) {
5997 kmem_free(nm, len);
5998 return (NFS4ERR_NAMETOOLONG);
5999 }
6000
6001 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6002 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6003 MAXPATHLEN + 1);
6004
6005 if (name == NULL) {
6006 kmem_free(nm, len);
6007 return (NFS4ERR_INVAL);
6008 }
6009
6010 status = do_rfs4_op_lookup(name, req, cs);
6011
6012 if (name != nm)
6013 kmem_free(name, MAXPATHLEN + 1);
6014
6015 kmem_free(nm, len);
6016
6017 return (status);
6018 }
6019
6020 static nfsstat4
6021 rfs4_lookupfile(component4 *component, struct svc_req *req,
6022 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6023 {
6024 nfsstat4 status;
6025 vnode_t *dvp = cs->vp;
6026 vattr_t bva, ava, fva;
6027 int error;
6028
6029 /* Get "before" change value */
6030 bva.va_mask = AT_CTIME|AT_SEQ;
6031 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6032 if (error)
6033 return (puterrno4(error));
6034
6035 /* rfs4_lookup may VN_RELE directory */
6036 VN_HOLD(dvp);
6037
6038 status = rfs4_lookup(component, req, cs);
6039 if (status != NFS4_OK) {
6040 VN_RELE(dvp);
6041 return (status);
6042 }
6043
6044 /*
6045 * Get "after" change value, if it fails, simply return the
6046 * before value.
6047 */
6048 ava.va_mask = AT_CTIME|AT_SEQ;
6049 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6050 ava.va_ctime = bva.va_ctime;
6051 ava.va_seq = 0;
6052 }
6053 VN_RELE(dvp);
6054
6055 /*
6056 * Validate the file is a file
6057 */
6058 fva.va_mask = AT_TYPE|AT_MODE;
6059 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6060 if (error)
6061 return (puterrno4(error));
6062
6063 if (fva.va_type != VREG) {
6064 if (fva.va_type == VDIR)
6065 return (NFS4ERR_ISDIR);
6066 if (fva.va_type == VLNK)
6067 return (NFS4ERR_SYMLINK);
6068 return (NFS4ERR_INVAL);
6069 }
6070
6071 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6072 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6073
6074 /*
6075 * It is undefined if VOP_LOOKUP will change va_seq, so
6076 * cinfo.atomic = TRUE only if we have
6077 * non-zero va_seq's, and they have not changed.
6078 */
6079 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6080 cinfo->atomic = TRUE;
6081 else
6082 cinfo->atomic = FALSE;
6083
6084 /* Check for mandatory locking */
6085 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6086 return (check_open_access(access, cs, req));
6087 }
6088
6089 static nfsstat4
6090 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6091 cred_t *cr, vnode_t **vpp, bool_t *created)
6092 {
6093 int error;
6094 nfsstat4 status = NFS4_OK;
6095 vattr_t va;
6096
6097 tryagain:
6098
6099 /*
6100 * The file open mode used is VWRITE. If the client needs
6101 * some other semantic, then it should do the access checking
6102 * itself. It would have been nice to have the file open mode
6103 * passed as part of the arguments.
6104 */
6105
6106 *created = TRUE;
6107 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6108
6109 if (error) {
6110 *created = FALSE;
6111
6112 /*
6113 * If we got something other than file already exists
6114 * then just return this error. Otherwise, we got
6115 * EEXIST. If we were doing a GUARDED create, then
6116 * just return this error. Otherwise, we need to
6117 * make sure that this wasn't a duplicate of an
6118 * exclusive create request.
6119 *
6120 * The assumption is made that a non-exclusive create
6121 * request will never return EEXIST.
6122 */
6123
6124 if (error != EEXIST || mode == GUARDED4) {
6125 status = puterrno4(error);
6126 return (status);
6127 }
6128 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6129 NULL, NULL, NULL);
6130
6131 if (error) {
6132 /*
6133 * We couldn't find the file that we thought that
6134 * we just created. So, we'll just try creating
6135 * it again.
6136 */
6137 if (error == ENOENT)
6138 goto tryagain;
6139
6140 status = puterrno4(error);
6141 return (status);
6142 }
6143
6144 if (mode == UNCHECKED4) {
6145 /* existing object must be regular file */
6146 if ((*vpp)->v_type != VREG) {
6147 if ((*vpp)->v_type == VDIR)
6148 status = NFS4ERR_ISDIR;
6149 else if ((*vpp)->v_type == VLNK)
6150 status = NFS4ERR_SYMLINK;
6151 else
6152 status = NFS4ERR_INVAL;
6153 VN_RELE(*vpp);
6154 return (status);
6155 }
6156
6157 return (NFS4_OK);
6158 }
6159
6160 /* Check for duplicate request */
6161 va.va_mask = AT_MTIME;
6162 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6163 if (!error) {
6164 /* We found the file */
6165 const timestruc_t *mtime = &vap->va_mtime;
6166
6167 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6168 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6169 /* but its not our creation */
6170 VN_RELE(*vpp);
6171 return (NFS4ERR_EXIST);
6172 }
6173 *created = TRUE; /* retrans of create == created */
6174 return (NFS4_OK);
6175 }
6176 VN_RELE(*vpp);
6177 return (NFS4ERR_EXIST);
6178 }
6179
6180 return (NFS4_OK);
6181 }
6182
6183 static nfsstat4
6184 check_open_access(uint32_t access, struct compound_state *cs,
6185 struct svc_req *req)
6186 {
6187 int error;
6188 vnode_t *vp;
6189 bool_t readonly;
6190 cred_t *cr = cs->cr;
6191
6192 /* For now we don't allow mandatory locking as per V2/V3 */
6193 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6194 return (NFS4ERR_ACCESS);
6195 }
6196
6197 vp = cs->vp;
6198 ASSERT(cr != NULL && vp->v_type == VREG);
6199
6200 /*
6201 * If the file system is exported read only and we are trying
6202 * to open for write, then return NFS4ERR_ROFS
6203 */
6204
6205 readonly = rdonly4(req, cs);
6206
6207 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6208 return (NFS4ERR_ROFS);
6209
6210 if (access & OPEN4_SHARE_ACCESS_READ) {
6211 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6212 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6213 return (NFS4ERR_ACCESS);
6214 }
6215 }
6216
6217 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6218 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6219 if (error)
6220 return (NFS4ERR_ACCESS);
6221 }
6222
6223 return (NFS4_OK);
6224 }
6225
6226 static nfsstat4
6227 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6228 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6229 {
6230 struct nfs4_svgetit_arg sarg;
6231 struct nfs4_ntov_table ntov;
6232
6233 bool_t ntov_table_init = FALSE;
6234 struct statvfs64 sb;
6235 nfsstat4 status;
6236 vnode_t *vp;
6237 vattr_t bva, ava, iva, cva, *vap;
6238 vnode_t *dvp;
6239 timespec32_t *mtime;
6240 char *nm = NULL;
6241 uint_t buflen;
6242 bool_t created;
6243 bool_t setsize = FALSE;
6244 len_t reqsize;
6245 int error;
6246 bool_t trunc;
6247 caller_context_t ct;
6248 component4 *component;
6249 bslabel_t *clabel;
6250 struct sockaddr *ca;
6251 char *name = NULL;
6252
6253 sarg.sbp = &sb;
6254 sarg.is_referral = B_FALSE;
6255
6256 dvp = cs->vp;
6257
6258 /* Check if the file system is read only */
6259 if (rdonly4(req, cs))
6260 return (NFS4ERR_ROFS);
6261
6262 /* check the label of including directory */
6263 if (is_system_labeled()) {
6264 ASSERT(req->rq_label != NULL);
6265 clabel = req->rq_label;
6266 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6267 "got client label from request(1)",
6268 struct svc_req *, req);
6269 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6270 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6271 cs->exi)) {
6272 return (NFS4ERR_ACCESS);
6273 }
6274 }
6275 }
6276
6277 /*
6278 * Get the last component of path name in nm. cs will reference
6279 * the including directory on success.
6280 */
6281 component = &args->open_claim4_u.file;
6282 status = utf8_dir_verify(component);
6283 if (status != NFS4_OK)
6284 return (status);
6285
6286 nm = utf8_to_fn(component, &buflen, NULL);
6287
6288 if (nm == NULL)
6289 return (NFS4ERR_RESOURCE);
6290
6291 if (buflen > MAXNAMELEN) {
6292 kmem_free(nm, buflen);
6293 return (NFS4ERR_NAMETOOLONG);
6294 }
6295
6296 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6297 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6298 if (error) {
6299 kmem_free(nm, buflen);
6300 return (puterrno4(error));
6301 }
6302
6303 if (bva.va_type != VDIR) {
6304 kmem_free(nm, buflen);
6305 return (NFS4ERR_NOTDIR);
6306 }
6307
6308 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6309
6310 switch (args->mode) {
6311 case GUARDED4:
6312 /*FALLTHROUGH*/
6313 case UNCHECKED4:
6314 nfs4_ntov_table_init(&ntov);
6315 ntov_table_init = TRUE;
6316
6317 *attrset = 0;
6318 status = do_rfs4_set_attrs(attrset,
6319 &args->createhow4_u.createattrs,
6320 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6321
6322 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6323 sarg.vap->va_type != VREG) {
6324 if (sarg.vap->va_type == VDIR)
6325 status = NFS4ERR_ISDIR;
6326 else if (sarg.vap->va_type == VLNK)
6327 status = NFS4ERR_SYMLINK;
6328 else
6329 status = NFS4ERR_INVAL;
6330 }
6331
6332 if (status != NFS4_OK) {
6333 kmem_free(nm, buflen);
6334 nfs4_ntov_table_free(&ntov, &sarg);
6335 *attrset = 0;
6336 return (status);
6337 }
6338
6339 vap = sarg.vap;
6340 vap->va_type = VREG;
6341 vap->va_mask |= AT_TYPE;
6342
6343 if ((vap->va_mask & AT_MODE) == 0) {
6344 vap->va_mask |= AT_MODE;
6345 vap->va_mode = (mode_t)0600;
6346 }
6347
6348 if (vap->va_mask & AT_SIZE) {
6349
6350 /* Disallow create with a non-zero size */
6351
6352 if ((reqsize = sarg.vap->va_size) != 0) {
6353 kmem_free(nm, buflen);
6354 nfs4_ntov_table_free(&ntov, &sarg);
6355 *attrset = 0;
6356 return (NFS4ERR_INVAL);
6357 }
6358 setsize = TRUE;
6359 }
6360 break;
6361
6362 case EXCLUSIVE4:
6363 /* prohibit EXCL create of named attributes */
6364 if (dvp->v_flag & V_XATTRDIR) {
6365 kmem_free(nm, buflen);
6366 *attrset = 0;
6367 return (NFS4ERR_INVAL);
6368 }
6369
6370 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6371 cva.va_type = VREG;
6372 /*
6373 * Ensure no time overflows. Assumes underlying
6374 * filesystem supports at least 32 bits.
6375 * Truncate nsec to usec resolution to allow valid
6376 * compares even if the underlying filesystem truncates.
6377 */
6378 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6379 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6380 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6381 cva.va_mode = (mode_t)0;
6382 vap = &cva;
6383
6384 /*
6385 * For EXCL create, attrset is set to the server attr
6386 * used to cache the client's verifier.
6387 */
6388 *attrset = FATTR4_TIME_MODIFY_MASK;
6389 break;
6390 }
6391
6392 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6393 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6394 MAXPATHLEN + 1);
6395
6396 if (name == NULL) {
6397 kmem_free(nm, buflen);
6398 return (NFS4ERR_SERVERFAULT);
6399 }
6400
6401 status = create_vnode(dvp, name, vap, args->mode,
6402 cs->cr, &vp, &created);
6403 if (nm != name)
6404 kmem_free(name, MAXPATHLEN + 1);
6405 kmem_free(nm, buflen);
6406
6407 if (status != NFS4_OK) {
6408 if (ntov_table_init)
6409 nfs4_ntov_table_free(&ntov, &sarg);
6410 *attrset = 0;
6411 return (status);
6412 }
6413
6414 trunc = (setsize && !created);
6415
6416 if (args->mode != EXCLUSIVE4) {
6417 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6418
6419 /*
6420 * True verification that object was created with correct
6421 * attrs is impossible. The attrs could have been changed
6422 * immediately after object creation. If attributes did
6423 * not verify, the only recourse for the server is to
6424 * destroy the object. Maybe if some attrs (like gid)
6425 * are set incorrectly, the object should be destroyed;
6426 * however, seems bad as a default policy. Do we really
6427 * want to destroy an object over one of the times not
6428 * verifying correctly? For these reasons, the server
6429 * currently sets bits in attrset for createattrs
6430 * that were set; however, no verification is done.
6431 *
6432 * vmask_to_nmask accounts for vattr bits set on create
6433 * [do_rfs4_set_attrs() only sets resp bits for
6434 * non-vattr/vfs bits.]
6435 * Mask off any bits we set by default so as not to return
6436 * more attrset bits than were requested in createattrs
6437 */
6438 if (created) {
6439 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6440 *attrset &= createmask;
6441 } else {
6442 /*
6443 * We did not create the vnode (we tried but it
6444 * already existed). In this case, the only createattr
6445 * that the spec allows the server to set is size,
6446 * and even then, it can only be set if it is 0.
6447 */
6448 *attrset = 0;
6449 if (trunc)
6450 *attrset = FATTR4_SIZE_MASK;
6451 }
6452 }
6453 if (ntov_table_init)
6454 nfs4_ntov_table_free(&ntov, &sarg);
6455
6456 /*
6457 * Get the initial "after" sequence number, if it fails,
6458 * set to zero, time to before.
6459 */
6460 iva.va_mask = AT_CTIME|AT_SEQ;
6461 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6462 iva.va_seq = 0;
6463 iva.va_ctime = bva.va_ctime;
6464 }
6465
6466 /*
6467 * create_vnode attempts to create the file exclusive,
6468 * if it already exists the VOP_CREATE will fail and
6469 * may not increase va_seq. It is atomic if
6470 * we haven't changed the directory, but if it has changed
6471 * we don't know what changed it.
6472 */
6473 if (!created) {
6474 if (bva.va_seq && iva.va_seq &&
6475 bva.va_seq == iva.va_seq)
6476 cinfo->atomic = TRUE;
6477 else
6478 cinfo->atomic = FALSE;
6479 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6480 } else {
6481 /*
6482 * The entry was created, we need to sync the
6483 * directory metadata.
6484 */
6485 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6486
6487 /*
6488 * Get "after" change value, if it fails, simply return the
6489 * before value.
6490 */
6491 ava.va_mask = AT_CTIME|AT_SEQ;
6492 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6493 ava.va_ctime = bva.va_ctime;
6494 ava.va_seq = 0;
6495 }
6496
6497 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6498
6499 /*
6500 * The cinfo->atomic = TRUE only if we have
6501 * non-zero va_seq's, and it has incremented by exactly one
6502 * during the create_vnode and it didn't
6503 * change during the VOP_FSYNC.
6504 */
6505 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6506 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6507 cinfo->atomic = TRUE;
6508 else
6509 cinfo->atomic = FALSE;
6510 }
6511
6512 /* Check for mandatory locking and that the size gets set. */
6513 cva.va_mask = AT_MODE;
6514 if (setsize)
6515 cva.va_mask |= AT_SIZE;
6516
6517 /* Assume the worst */
6518 cs->mandlock = TRUE;
6519
6520 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6521 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6522
6523 /*
6524 * Truncate the file if necessary; this would be
6525 * the case for create over an existing file.
6526 */
6527
6528 if (trunc) {
6529 int in_crit = 0;
6530 rfs4_file_t *fp;
6531 bool_t create = FALSE;
6532
6533 /*
6534 * We are writing over an existing file.
6535 * Check to see if we need to recall a delegation.
6536 */
6537 rfs4_hold_deleg_policy();
6538 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6539 if (rfs4_check_delegated_byfp(FWRITE, fp,
6540 (reqsize == 0), FALSE, FALSE, &clientid)) {
6541 rfs4_file_rele(fp);
6542 rfs4_rele_deleg_policy();
6543 VN_RELE(vp);
6544 *attrset = 0;
6545 return (NFS4ERR_DELAY);
6546 }
6547 rfs4_file_rele(fp);
6548 }
6549 rfs4_rele_deleg_policy();
6550
6551 if (nbl_need_check(vp)) {
6552 in_crit = 1;
6553
6554 ASSERT(reqsize == 0);
6555
6556 nbl_start_crit(vp, RW_READER);
6557 if (nbl_conflict(vp, NBL_WRITE, 0,
6558 cva.va_size, 0, NULL)) {
6559 in_crit = 0;
6560 nbl_end_crit(vp);
6561 VN_RELE(vp);
6562 *attrset = 0;
6563 return (NFS4ERR_ACCESS);
6564 }
6565 }
6566 ct.cc_sysid = 0;
6567 ct.cc_pid = 0;
6568 ct.cc_caller_id = nfs4_srv_caller_id;
6569 ct.cc_flags = CC_DONTBLOCK;
6570
6571 cva.va_mask = AT_SIZE;
6572 cva.va_size = reqsize;
6573 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6574 if (in_crit)
6575 nbl_end_crit(vp);
6576 }
6577 }
6578
6579 error = makefh4(&cs->fh, vp, cs->exi);
6580
6581 /*
6582 * Force modified data and metadata out to stable storage.
6583 */
6584 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6585
6586 if (error) {
6587 VN_RELE(vp);
6588 *attrset = 0;
6589 return (puterrno4(error));
6590 }
6591
6592 /* if parent dir is attrdir, set namedattr fh flag */
6593 if (dvp->v_flag & V_XATTRDIR)
6594 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6595
6596 if (cs->vp)
6597 VN_RELE(cs->vp);
6598
6599 cs->vp = vp;
6600
6601 /*
6602 * if we did not create the file, we will need to check
6603 * the access bits on the file
6604 */
6605
6606 if (!created) {
6607 if (setsize)
6608 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6609 status = check_open_access(args->share_access, cs, req);
6610 if (status != NFS4_OK)
6611 *attrset = 0;
6612 }
6613 return (status);
6614 }
6615
6616 /*ARGSUSED*/
6617 static void
6618 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6619 rfs4_openowner_t *oo, delegreq_t deleg,
6620 uint32_t access, uint32_t deny,
6621 OPEN4res *resp, int deleg_cur)
6622 {
6623 /* XXX Currently not using req */
6624 rfs4_state_t *sp;
6625 rfs4_file_t *fp;
6626 bool_t screate = TRUE;
6627 bool_t fcreate = TRUE;
6628 uint32_t open_a, share_a;
6629 uint32_t open_d, share_d;
6630 rfs4_deleg_state_t *dsp;
6631 sysid_t sysid;
6632 nfsstat4 status;
6633 caller_context_t ct;
6634 int fflags = 0;
6635 int recall = 0;
6636 int err;
6637 int first_open;
6638
6639 /* get the file struct and hold a lock on it during initial open */
6640 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6641 if (fp == NULL) {
6642 resp->status = NFS4ERR_RESOURCE;
6643 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6644 return;
6645 }
6646
6647 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6648 if (sp == NULL) {
6649 resp->status = NFS4ERR_RESOURCE;
6650 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6651 /* No need to keep any reference */
6652 rw_exit(&fp->rf_file_rwlock);
6653 rfs4_file_rele(fp);
6654 return;
6655 }
6656
6657 /* try to get the sysid before continuing */
6658 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6659 resp->status = status;
6660 rfs4_file_rele(fp);
6661 /* Not a fully formed open; "close" it */
6662 if (screate == TRUE)
6663 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6664 rfs4_state_rele(sp);
6665 return;
6666 }
6667
6668 /* Calculate the fflags for this OPEN. */
6669 if (access & OPEN4_SHARE_ACCESS_READ)
6670 fflags |= FREAD;
6671 if (access & OPEN4_SHARE_ACCESS_WRITE)
6672 fflags |= FWRITE;
6673
6674 rfs4_dbe_lock(sp->rs_dbe);
6675
6676 /*
6677 * Calculate the new deny and access mode that this open is adding to
6678 * the file for this open owner;
6679 */
6680 open_d = (deny & ~sp->rs_open_deny);
6681 open_a = (access & ~sp->rs_open_access);
6682
6683 /*
6684 * Calculate the new share access and share deny modes that this open
6685 * is adding to the file for this open owner;
6686 */
6687 share_a = (access & ~sp->rs_share_access);
6688 share_d = (deny & ~sp->rs_share_deny);
6689
6690 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6691
6692 /*
6693 * Check to see the client has already sent an open for this
6694 * open owner on this file with the same share/deny modes.
6695 * If so, we don't need to check for a conflict and we don't
6696 * need to add another shrlock. If not, then we need to
6697 * check for conflicts in deny and access before checking for
6698 * conflicts in delegation. We don't want to recall a
6699 * delegation based on an open that will eventually fail based
6700 * on shares modes.
6701 */
6702
6703 if (share_a || share_d) {
6704 if ((err = rfs4_share(sp, access, deny)) != 0) {
6705 rfs4_dbe_unlock(sp->rs_dbe);
6706 resp->status = err;
6707
6708 rfs4_file_rele(fp);
6709 /* Not a fully formed open; "close" it */
6710 if (screate == TRUE)
6711 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6712 rfs4_state_rele(sp);
6713 return;
6714 }
6715 }
6716
6717 rfs4_dbe_lock(fp->rf_dbe);
6718
6719 /*
6720 * Check to see if this file is delegated and if so, if a
6721 * recall needs to be done.
6722 */
6723 if (rfs4_check_recall(sp, access)) {
6724 rfs4_dbe_unlock(fp->rf_dbe);
6725 rfs4_dbe_unlock(sp->rs_dbe);
6726 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6727 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6728 rfs4_dbe_lock(sp->rs_dbe);
6729
6730 /* if state closed while lock was dropped */
6731 if (sp->rs_closed) {
6732 if (share_a || share_d)
6733 (void) rfs4_unshare(sp);
6734 rfs4_dbe_unlock(sp->rs_dbe);
6735 rfs4_file_rele(fp);
6736 /* Not a fully formed open; "close" it */
6737 if (screate == TRUE)
6738 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6739 rfs4_state_rele(sp);
6740 resp->status = NFS4ERR_OLD_STATEID;
6741 return;
6742 }
6743
6744 rfs4_dbe_lock(fp->rf_dbe);
6745 /* Let's see if the delegation was returned */
6746 if (rfs4_check_recall(sp, access)) {
6747 rfs4_dbe_unlock(fp->rf_dbe);
6748 if (share_a || share_d)
6749 (void) rfs4_unshare(sp);
6750 rfs4_dbe_unlock(sp->rs_dbe);
6751 rfs4_file_rele(fp);
6752 rfs4_update_lease(sp->rs_owner->ro_client);
6753
6754 /* Not a fully formed open; "close" it */
6755 if (screate == TRUE)
6756 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6757 rfs4_state_rele(sp);
6758 resp->status = NFS4ERR_DELAY;
6759 return;
6760 }
6761 }
6762 /*
6763 * the share check passed and any delegation conflict has been
6764 * taken care of, now call vop_open.
6765 * if this is the first open then call vop_open with fflags.
6766 * if not, call vn_open_upgrade with just the upgrade flags.
6767 *
6768 * if the file has been opened already, it will have the current
6769 * access mode in the state struct. if it has no share access, then
6770 * this is a new open.
6771 *
6772 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6773 * call VOP_OPEN(), just do the open upgrade.
6774 */
6775 if (first_open && !deleg_cur) {
6776 ct.cc_sysid = sysid;
6777 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6778 ct.cc_caller_id = nfs4_srv_caller_id;
6779 ct.cc_flags = CC_DONTBLOCK;
6780 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6781 if (err) {
6782 rfs4_dbe_unlock(fp->rf_dbe);
6783 if (share_a || share_d)
6784 (void) rfs4_unshare(sp);
6785 rfs4_dbe_unlock(sp->rs_dbe);
6786 rfs4_file_rele(fp);
6787
6788 /* Not a fully formed open; "close" it */
6789 if (screate == TRUE)
6790 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6791 rfs4_state_rele(sp);
6792 /* check if a monitor detected a delegation conflict */
6793 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6794 resp->status = NFS4ERR_DELAY;
6795 else
6796 resp->status = NFS4ERR_SERVERFAULT;
6797 return;
6798 }
6799 } else { /* open upgrade */
6800 /*
6801 * calculate the fflags for the new mode that is being added
6802 * by this upgrade.
6803 */
6804 fflags = 0;
6805 if (open_a & OPEN4_SHARE_ACCESS_READ)
6806 fflags |= FREAD;
6807 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6808 fflags |= FWRITE;
6809 vn_open_upgrade(cs->vp, fflags);
6810 }
6811 sp->rs_open_access |= access;
6812 sp->rs_open_deny |= deny;
6813
6814 if (open_d & OPEN4_SHARE_DENY_READ)
6815 fp->rf_deny_read++;
6816 if (open_d & OPEN4_SHARE_DENY_WRITE)
6817 fp->rf_deny_write++;
6818 fp->rf_share_deny |= deny;
6819
6820 if (open_a & OPEN4_SHARE_ACCESS_READ)
6821 fp->rf_access_read++;
6822 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6823 fp->rf_access_write++;
6824 fp->rf_share_access |= access;
6825
6826 /*
6827 * Check for delegation here. if the deleg argument is not
6828 * DELEG_ANY, then this is a reclaim from a client and
6829 * we must honor the delegation requested. If necessary we can
6830 * set the recall flag.
6831 */
6832
6833 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6834
6835 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6836
6837 next_stateid(&sp->rs_stateid);
6838
6839 resp->stateid = sp->rs_stateid.stateid;
6840
6841 rfs4_dbe_unlock(fp->rf_dbe);
6842 rfs4_dbe_unlock(sp->rs_dbe);
6843
6844 if (dsp) {
6845 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6846 rfs4_deleg_state_rele(dsp);
6847 }
6848
6849 rfs4_file_rele(fp);
6850 rfs4_state_rele(sp);
6851
6852 resp->status = NFS4_OK;
6853 }
6854
6855 /*ARGSUSED*/
6856 static void
6857 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6858 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6859 {
6860 change_info4 *cinfo = &resp->cinfo;
6861 bitmap4 *attrset = &resp->attrset;
6862
6863 if (args->opentype == OPEN4_NOCREATE)
6864 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6865 req, cs, args->share_access, cinfo);
6866 else {
6867 /* inhibit delegation grants during exclusive create */
6868
6869 if (args->mode == EXCLUSIVE4)
6870 rfs4_disable_delegation();
6871
6872 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6873 oo->ro_client->rc_clientid);
6874 }
6875
6876 if (resp->status == NFS4_OK) {
6877
6878 /* cs->vp cs->fh now reference the desired file */
6879
6880 rfs4_do_open(cs, req, oo,
6881 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6882 args->share_access, args->share_deny, resp, 0);
6883
6884 /*
6885 * If rfs4_createfile set attrset, we must
6886 * clear this attrset before the response is copied.
6887 */
6888 if (resp->status != NFS4_OK && resp->attrset) {
6889 resp->attrset = 0;
6890 }
6891 }
6892 else
6893 *cs->statusp = resp->status;
6894
6895 if (args->mode == EXCLUSIVE4)
6896 rfs4_enable_delegation();
6897 }
6898
6899 /*ARGSUSED*/
6900 static void
6901 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6902 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6903 {
6904 change_info4 *cinfo = &resp->cinfo;
6905 vattr_t va;
6906 vtype_t v_type = cs->vp->v_type;
6907 int error = 0;
6908
6909 /* Verify that we have a regular file */
6910 if (v_type != VREG) {
6911 if (v_type == VDIR)
6912 resp->status = NFS4ERR_ISDIR;
6913 else if (v_type == VLNK)
6914 resp->status = NFS4ERR_SYMLINK;
6915 else
6916 resp->status = NFS4ERR_INVAL;
6917 return;
6918 }
6919
6920 va.va_mask = AT_MODE|AT_UID;
6921 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
6922 if (error) {
6923 resp->status = puterrno4(error);
6924 return;
6925 }
6926
6927 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6928
6929 /*
6930 * Check if we have access to the file, Note the the file
6931 * could have originally been open UNCHECKED or GUARDED
6932 * with mode bits that will now fail, but there is nothing
6933 * we can really do about that except in the case that the
6934 * owner of the file is the one requesting the open.
6935 */
6936 if (crgetuid(cs->cr) != va.va_uid) {
6937 resp->status = check_open_access(args->share_access, cs, req);
6938 if (resp->status != NFS4_OK) {
6939 return;
6940 }
6941 }
6942
6943 /*
6944 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6945 */
6946 cinfo->before = 0;
6947 cinfo->after = 0;
6948 cinfo->atomic = FALSE;
6949
6950 rfs4_do_open(cs, req, oo,
6951 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6952 args->share_access, args->share_deny, resp, 0);
6953 }
6954
6955 static void
6956 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6957 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6958 {
6959 int error;
6960 nfsstat4 status;
6961 stateid4 stateid =
6962 args->open_claim4_u.delegate_cur_info.delegate_stateid;
6963 rfs4_deleg_state_t *dsp;
6964
6965 /*
6966 * Find the state info from the stateid and confirm that the
6967 * file is delegated. If the state openowner is the same as
6968 * the supplied openowner we're done. If not, get the file
6969 * info from the found state info. Use that file info to
6970 * create the state for this lock owner. Note solaris doen't
6971 * really need the pathname to find the file. We may want to
6972 * lookup the pathname and make sure that the vp exist and
6973 * matches the vp in the file structure. However it is
6974 * possible that the pathname nolonger exists (local process
6975 * unlinks the file), so this may not be that useful.
6976 */
6977
6978 status = rfs4_get_deleg_state(&stateid, &dsp);
6979 if (status != NFS4_OK) {
6980 resp->status = status;
6981 return;
6982 }
6983
6984 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
6985
6986 /*
6987 * New lock owner, create state. Since this was probably called
6988 * in response to a CB_RECALL we set deleg to DELEG_NONE
6989 */
6990
6991 ASSERT(cs->vp != NULL);
6992 VN_RELE(cs->vp);
6993 VN_HOLD(dsp->rds_finfo->rf_vp);
6994 cs->vp = dsp->rds_finfo->rf_vp;
6995
6996 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6997 rfs4_deleg_state_rele(dsp);
6998 *cs->statusp = resp->status = puterrno4(error);
6999 return;
7000 }
7001
7002 /* Mark progress for delegation returns */
7003 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7004 rfs4_deleg_state_rele(dsp);
7005 rfs4_do_open(cs, req, oo, DELEG_NONE,
7006 args->share_access, args->share_deny, resp, 1);
7007 }
7008
7009 /*ARGSUSED*/
7010 static void
7011 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7012 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7013 {
7014 /*
7015 * Lookup the pathname, it must already exist since this file
7016 * was delegated.
7017 *
7018 * Find the file and state info for this vp and open owner pair.
7019 * check that they are in fact delegated.
7020 * check that the state access and deny modes are the same.
7021 *
7022 * Return the delgation possibly seting the recall flag.
7023 */
7024 rfs4_file_t *fp;
7025 rfs4_state_t *sp;
7026 bool_t create = FALSE;
7027 bool_t dcreate = FALSE;
7028 rfs4_deleg_state_t *dsp;
7029 nfsace4 *ace;
7030
7031 /* Note we ignore oflags */
7032 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7033 req, cs, args->share_access, &resp->cinfo);
7034
7035 if (resp->status != NFS4_OK) {
7036 return;
7037 }
7038
7039 /* get the file struct and hold a lock on it during initial open */
7040 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7041 if (fp == NULL) {
7042 resp->status = NFS4ERR_RESOURCE;
7043 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7044 return;
7045 }
7046
7047 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7048 if (sp == NULL) {
7049 resp->status = NFS4ERR_SERVERFAULT;
7050 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7051 rw_exit(&fp->rf_file_rwlock);
7052 rfs4_file_rele(fp);
7053 return;
7054 }
7055
7056 rfs4_dbe_lock(sp->rs_dbe);
7057 rfs4_dbe_lock(fp->rf_dbe);
7058 if (args->share_access != sp->rs_share_access ||
7059 args->share_deny != sp->rs_share_deny ||
7060 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7061 NFS4_DEBUG(rfs4_debug,
7062 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7063 rfs4_dbe_unlock(fp->rf_dbe);
7064 rfs4_dbe_unlock(sp->rs_dbe);
7065 rfs4_file_rele(fp);
7066 rfs4_state_rele(sp);
7067 resp->status = NFS4ERR_SERVERFAULT;
7068 return;
7069 }
7070 rfs4_dbe_unlock(fp->rf_dbe);
7071 rfs4_dbe_unlock(sp->rs_dbe);
7072
7073 dsp = rfs4_finddeleg(sp, &dcreate);
7074 if (dsp == NULL) {
7075 rfs4_state_rele(sp);
7076 rfs4_file_rele(fp);
7077 resp->status = NFS4ERR_SERVERFAULT;
7078 return;
7079 }
7080
7081 next_stateid(&sp->rs_stateid);
7082
7083 resp->stateid = sp->rs_stateid.stateid;
7084
7085 resp->delegation.delegation_type = dsp->rds_dtype;
7086
7087 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7088 open_read_delegation4 *rv =
7089 &resp->delegation.open_delegation4_u.read;
7090
7091 rv->stateid = dsp->rds_delegid.stateid;
7092 rv->recall = FALSE; /* no policy in place to set to TRUE */
7093 ace = &rv->permissions;
7094 } else {
7095 open_write_delegation4 *rv =
7096 &resp->delegation.open_delegation4_u.write;
7097
7098 rv->stateid = dsp->rds_delegid.stateid;
7099 rv->recall = FALSE; /* no policy in place to set to TRUE */
7100 ace = &rv->permissions;
7101 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7102 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7103 }
7104
7105 /* XXX For now */
7106 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7107 ace->flag = 0;
7108 ace->access_mask = 0;
7109 ace->who.utf8string_len = 0;
7110 ace->who.utf8string_val = 0;
7111
7112 rfs4_deleg_state_rele(dsp);
7113 rfs4_state_rele(sp);
7114 rfs4_file_rele(fp);
7115 }
7116
7117 typedef enum {
7118 NFS4_CHKSEQ_OKAY = 0,
7119 NFS4_CHKSEQ_REPLAY = 1,
7120 NFS4_CHKSEQ_BAD = 2
7121 } rfs4_chkseq_t;
7122
7123 /*
7124 * Generic function for sequence number checks.
7125 */
7126 static rfs4_chkseq_t
7127 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7128 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7129 {
7130 /* Same sequence ids and matching operations? */
7131 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7132 if (copyres == TRUE) {
7133 rfs4_free_reply(resop);
7134 rfs4_copy_reply(resop, lastop);
7135 }
7136 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7137 "Replayed SEQID %d\n", seqid));
7138 return (NFS4_CHKSEQ_REPLAY);
7139 }
7140
7141 /* If the incoming sequence is not the next expected then it is bad */
7142 if (rqst_seq != seqid + 1) {
7143 if (rqst_seq == seqid) {
7144 NFS4_DEBUG(rfs4_debug,
7145 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7146 "but last op was %d current op is %d\n",
7147 lastop->resop, resop->resop));
7148 return (NFS4_CHKSEQ_BAD);
7149 }
7150 NFS4_DEBUG(rfs4_debug,
7151 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7152 rqst_seq, seqid));
7153 return (NFS4_CHKSEQ_BAD);
7154 }
7155
7156 /* Everything okay -- next expected */
7157 return (NFS4_CHKSEQ_OKAY);
7158 }
7159
7160
7161 static rfs4_chkseq_t
7162 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7163 {
7164 rfs4_chkseq_t rc;
7165
7166 rfs4_dbe_lock(op->ro_dbe);
7167 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7168 TRUE);
7169 rfs4_dbe_unlock(op->ro_dbe);
7170
7171 if (rc == NFS4_CHKSEQ_OKAY)
7172 rfs4_update_lease(op->ro_client);
7173
7174 return (rc);
7175 }
7176
7177 static rfs4_chkseq_t
7178 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7179 {
7180 rfs4_chkseq_t rc;
7181
7182 rfs4_dbe_lock(op->ro_dbe);
7183 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7184 olo_seqid, resop, FALSE);
7185 rfs4_dbe_unlock(op->ro_dbe);
7186
7187 return (rc);
7188 }
7189
7190 static rfs4_chkseq_t
7191 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7192 {
7193 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7194
7195 rfs4_dbe_lock(lsp->rls_dbe);
7196 if (!lsp->rls_skip_seqid_check)
7197 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7198 resop, TRUE);
7199 rfs4_dbe_unlock(lsp->rls_dbe);
7200
7201 return (rc);
7202 }
7203
7204 static void
7205 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7206 struct svc_req *req, struct compound_state *cs)
7207 {
7208 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7209 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7210 open_owner4 *owner = &args->owner;
7211 open_claim_type4 claim = args->claim;
7212 rfs4_client_t *cp;
7213 rfs4_openowner_t *oo;
7214 bool_t create;
7215 bool_t replay = FALSE;
7216 int can_reclaim;
7217
7218 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7219 OPEN4args *, args);
7220
7221 if (cs->vp == NULL) {
7222 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7223 goto end;
7224 }
7225
7226 /*
7227 * Need to check clientid and lease expiration first based on
7228 * error ordering and incrementing sequence id.
7229 */
7230 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7231 if (cp == NULL) {
7232 *cs->statusp = resp->status =
7233 rfs4_check_clientid(&owner->clientid, 0);
7234 goto end;
7235 }
7236
7237 if (rfs4_lease_expired(cp)) {
7238 rfs4_client_close(cp);
7239 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7240 goto end;
7241 }
7242 can_reclaim = cp->rc_can_reclaim;
7243
7244 /*
7245 * Find the open_owner for use from this point forward. Take
7246 * care in updating the sequence id based on the type of error
7247 * being returned.
7248 */
7249 retry:
7250 create = TRUE;
7251 oo = rfs4_findopenowner(owner, &create, args->seqid);
7252 if (oo == NULL) {
7253 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7254 rfs4_client_rele(cp);
7255 goto end;
7256 }
7257
7258 /* Hold off access to the sequence space while the open is done */
7259 rfs4_sw_enter(&oo->ro_sw);
7260
7261 /*
7262 * If the open_owner existed before at the server, then check
7263 * the sequence id.
7264 */
7265 if (!create && !oo->ro_postpone_confirm) {
7266 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7267 case NFS4_CHKSEQ_BAD:
7268 if ((args->seqid > oo->ro_open_seqid) &&
7269 oo->ro_need_confirm) {
7270 rfs4_free_opens(oo, TRUE, FALSE);
7271 rfs4_sw_exit(&oo->ro_sw);
7272 rfs4_openowner_rele(oo);
7273 goto retry;
7274 }
7275 resp->status = NFS4ERR_BAD_SEQID;
7276 goto out;
7277 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7278 replay = TRUE;
7279 goto out;
7280 default:
7281 break;
7282 }
7283
7284 /*
7285 * Sequence was ok and open owner exists
7286 * check to see if we have yet to see an
7287 * open_confirm.
7288 */
7289 if (oo->ro_need_confirm) {
7290 rfs4_free_opens(oo, TRUE, FALSE);
7291 rfs4_sw_exit(&oo->ro_sw);
7292 rfs4_openowner_rele(oo);
7293 goto retry;
7294 }
7295 }
7296 /* Grace only applies to regular-type OPENs */
7297 if (rfs4_clnt_in_grace(cp) &&
7298 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7299 *cs->statusp = resp->status = NFS4ERR_GRACE;
7300 goto out;
7301 }
7302
7303 /*
7304 * If previous state at the server existed then can_reclaim
7305 * will be set. If not reply NFS4ERR_NO_GRACE to the
7306 * client.
7307 */
7308 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7309 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7310 goto out;
7311 }
7312
7313
7314 /*
7315 * Reject the open if the client has missed the grace period
7316 */
7317 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7318 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7319 goto out;
7320 }
7321
7322 /* Couple of up-front bookkeeping items */
7323 if (oo->ro_need_confirm) {
7324 /*
7325 * If this is a reclaim OPEN then we should not ask
7326 * for a confirmation of the open_owner per the
7327 * protocol specification.
7328 */
7329 if (claim == CLAIM_PREVIOUS)
7330 oo->ro_need_confirm = FALSE;
7331 else
7332 resp->rflags |= OPEN4_RESULT_CONFIRM;
7333 }
7334 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7335
7336 /*
7337 * If there is an unshared filesystem mounted on this vnode,
7338 * do not allow to open/create in this directory.
7339 */
7340 if (vn_ismntpt(cs->vp)) {
7341 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7342 goto out;
7343 }
7344
7345 /*
7346 * access must READ, WRITE, or BOTH. No access is invalid.
7347 * deny can be READ, WRITE, BOTH, or NONE.
7348 * bits not defined for access/deny are invalid.
7349 */
7350 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7351 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7352 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7353 *cs->statusp = resp->status = NFS4ERR_INVAL;
7354 goto out;
7355 }
7356
7357
7358 /*
7359 * make sure attrset is zero before response is built.
7360 */
7361 resp->attrset = 0;
7362
7363 switch (claim) {
7364 case CLAIM_NULL:
7365 rfs4_do_opennull(cs, req, args, oo, resp);
7366 break;
7367 case CLAIM_PREVIOUS:
7368 rfs4_do_openprev(cs, req, args, oo, resp);
7369 break;
7370 case CLAIM_DELEGATE_CUR:
7371 rfs4_do_opendelcur(cs, req, args, oo, resp);
7372 break;
7373 case CLAIM_DELEGATE_PREV:
7374 rfs4_do_opendelprev(cs, req, args, oo, resp);
7375 break;
7376 default:
7377 resp->status = NFS4ERR_INVAL;
7378 break;
7379 }
7380
7381 out:
7382 rfs4_client_rele(cp);
7383
7384 /* Catch sequence id handling here to make it a little easier */
7385 switch (resp->status) {
7386 case NFS4ERR_BADXDR:
7387 case NFS4ERR_BAD_SEQID:
7388 case NFS4ERR_BAD_STATEID:
7389 case NFS4ERR_NOFILEHANDLE:
7390 case NFS4ERR_RESOURCE:
7391 case NFS4ERR_STALE_CLIENTID:
7392 case NFS4ERR_STALE_STATEID:
7393 /*
7394 * The protocol states that if any of these errors are
7395 * being returned, the sequence id should not be
7396 * incremented. Any other return requires an
7397 * increment.
7398 */
7399 break;
7400 default:
7401 /* Always update the lease in this case */
7402 rfs4_update_lease(oo->ro_client);
7403
7404 /* Regular response - copy the result */
7405 if (!replay)
7406 rfs4_update_open_resp(oo, resop, &cs->fh);
7407
7408 /*
7409 * REPLAY case: Only if the previous response was OK
7410 * do we copy the filehandle. If not OK, no
7411 * filehandle to copy.
7412 */
7413 if (replay == TRUE &&
7414 resp->status == NFS4_OK &&
7415 oo->ro_reply_fh.nfs_fh4_val) {
7416 /*
7417 * If this is a replay, we must restore the
7418 * current filehandle/vp to that of what was
7419 * returned originally. Try our best to do
7420 * it.
7421 */
7422 nfs_fh4_fmt_t *fh_fmtp =
7423 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7424
7425 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7426 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7427
7428 if (cs->exi == NULL) {
7429 resp->status = NFS4ERR_STALE;
7430 goto finish;
7431 }
7432
7433 VN_RELE(cs->vp);
7434
7435 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7436 &resp->status);
7437
7438 if (cs->vp == NULL)
7439 goto finish;
7440
7441 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7442 }
7443
7444 /*
7445 * If this was a replay, no need to update the
7446 * sequence id. If the open_owner was not created on
7447 * this pass, then update. The first use of an
7448 * open_owner will not bump the sequence id.
7449 */
7450 if (replay == FALSE && !create)
7451 rfs4_update_open_sequence(oo);
7452 /*
7453 * If the client is receiving an error and the
7454 * open_owner needs to be confirmed, there is no way
7455 * to notify the client of this fact ignoring the fact
7456 * that the server has no method of returning a
7457 * stateid to confirm. Therefore, the server needs to
7458 * mark this open_owner in a way as to avoid the
7459 * sequence id checking the next time the client uses
7460 * this open_owner.
7461 */
7462 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7463 oo->ro_postpone_confirm = TRUE;
7464 /*
7465 * If OK response then clear the postpone flag and
7466 * reset the sequence id to keep in sync with the
7467 * client.
7468 */
7469 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7470 oo->ro_postpone_confirm = FALSE;
7471 oo->ro_open_seqid = args->seqid;
7472 }
7473 break;
7474 }
7475
7476 finish:
7477 *cs->statusp = resp->status;
7478
7479 rfs4_sw_exit(&oo->ro_sw);
7480 rfs4_openowner_rele(oo);
7481
7482 end:
7483 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7484 OPEN4res *, resp);
7485 }
7486
7487 /*ARGSUSED*/
7488 void
7489 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7490 struct svc_req *req, struct compound_state *cs)
7491 {
7492 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7493 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7494 rfs4_state_t *sp;
7495 nfsstat4 status;
7496
7497 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7498 OPEN_CONFIRM4args *, args);
7499
7500 if (cs->vp == NULL) {
7501 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7502 goto out;
7503 }
7504
7505 if (cs->vp->v_type != VREG) {
7506 *cs->statusp = resp->status =
7507 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7508 return;
7509 }
7510
7511 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7512 if (status != NFS4_OK) {
7513 *cs->statusp = resp->status = status;
7514 goto out;
7515 }
7516
7517 /* Ensure specified filehandle matches */
7518 if (cs->vp != sp->rs_finfo->rf_vp) {
7519 rfs4_state_rele(sp);
7520 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7521 goto out;
7522 }
7523
7524 /* hold off other access to open_owner while we tinker */
7525 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7526
7527 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7528 case NFS4_CHECK_STATEID_OKAY:
7529 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7530 resop) != 0) {
7531 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7532 break;
7533 }
7534 /*
7535 * If it is the appropriate stateid and determined to
7536 * be "OKAY" then this means that the stateid does not
7537 * need to be confirmed and the client is in error for
7538 * sending an OPEN_CONFIRM.
7539 */
7540 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7541 break;
7542 case NFS4_CHECK_STATEID_OLD:
7543 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7544 break;
7545 case NFS4_CHECK_STATEID_BAD:
7546 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7547 break;
7548 case NFS4_CHECK_STATEID_EXPIRED:
7549 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7550 break;
7551 case NFS4_CHECK_STATEID_CLOSED:
7552 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7553 break;
7554 case NFS4_CHECK_STATEID_REPLAY:
7555 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7556 resop)) {
7557 case NFS4_CHKSEQ_OKAY:
7558 /*
7559 * This is replayed stateid; if seqid matches
7560 * next expected, then client is using wrong seqid.
7561 */
7562 /* fall through */
7563 case NFS4_CHKSEQ_BAD:
7564 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7565 break;
7566 case NFS4_CHKSEQ_REPLAY:
7567 /*
7568 * Note this case is the duplicate case so
7569 * resp->status is already set.
7570 */
7571 *cs->statusp = resp->status;
7572 rfs4_update_lease(sp->rs_owner->ro_client);
7573 break;
7574 }
7575 break;
7576 case NFS4_CHECK_STATEID_UNCONFIRMED:
7577 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7578 resop) != NFS4_CHKSEQ_OKAY) {
7579 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7580 break;
7581 }
7582 *cs->statusp = resp->status = NFS4_OK;
7583
7584 next_stateid(&sp->rs_stateid);
7585 resp->open_stateid = sp->rs_stateid.stateid;
7586 sp->rs_owner->ro_need_confirm = FALSE;
7587 rfs4_update_lease(sp->rs_owner->ro_client);
7588 rfs4_update_open_sequence(sp->rs_owner);
7589 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7590 break;
7591 default:
7592 ASSERT(FALSE);
7593 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7594 break;
7595 }
7596 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7597 rfs4_state_rele(sp);
7598
7599 out:
7600 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7601 OPEN_CONFIRM4res *, resp);
7602 }
7603
7604 /*ARGSUSED*/
7605 void
7606 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7607 struct svc_req *req, struct compound_state *cs)
7608 {
7609 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7610 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7611 uint32_t access = args->share_access;
7612 uint32_t deny = args->share_deny;
7613 nfsstat4 status;
7614 rfs4_state_t *sp;
7615 rfs4_file_t *fp;
7616 int fflags = 0;
7617
7618 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7619 OPEN_DOWNGRADE4args *, args);
7620
7621 if (cs->vp == NULL) {
7622 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7623 goto out;
7624 }
7625
7626 if (cs->vp->v_type != VREG) {
7627 *cs->statusp = resp->status = NFS4ERR_INVAL;
7628 return;
7629 }
7630
7631 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7632 if (status != NFS4_OK) {
7633 *cs->statusp = resp->status = status;
7634 goto out;
7635 }
7636
7637 /* Ensure specified filehandle matches */
7638 if (cs->vp != sp->rs_finfo->rf_vp) {
7639 rfs4_state_rele(sp);
7640 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7641 goto out;
7642 }
7643
7644 /* hold off other access to open_owner while we tinker */
7645 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7646
7647 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7648 case NFS4_CHECK_STATEID_OKAY:
7649 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7650 resop) != NFS4_CHKSEQ_OKAY) {
7651 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7652 goto end;
7653 }
7654 break;
7655 case NFS4_CHECK_STATEID_OLD:
7656 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7657 goto end;
7658 case NFS4_CHECK_STATEID_BAD:
7659 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7660 goto end;
7661 case NFS4_CHECK_STATEID_EXPIRED:
7662 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7663 goto end;
7664 case NFS4_CHECK_STATEID_CLOSED:
7665 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7666 goto end;
7667 case NFS4_CHECK_STATEID_UNCONFIRMED:
7668 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7669 goto end;
7670 case NFS4_CHECK_STATEID_REPLAY:
7671 /* Check the sequence id for the open owner */
7672 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7673 resop)) {
7674 case NFS4_CHKSEQ_OKAY:
7675 /*
7676 * This is replayed stateid; if seqid matches
7677 * next expected, then client is using wrong seqid.
7678 */
7679 /* fall through */
7680 case NFS4_CHKSEQ_BAD:
7681 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7682 goto end;
7683 case NFS4_CHKSEQ_REPLAY:
7684 /*
7685 * Note this case is the duplicate case so
7686 * resp->status is already set.
7687 */
7688 *cs->statusp = resp->status;
7689 rfs4_update_lease(sp->rs_owner->ro_client);
7690 goto end;
7691 }
7692 break;
7693 default:
7694 ASSERT(FALSE);
7695 break;
7696 }
7697
7698 rfs4_dbe_lock(sp->rs_dbe);
7699 /*
7700 * Check that the new access modes and deny modes are valid.
7701 * Check that no invalid bits are set.
7702 */
7703 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7704 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7705 *cs->statusp = resp->status = NFS4ERR_INVAL;
7706 rfs4_update_open_sequence(sp->rs_owner);
7707 rfs4_dbe_unlock(sp->rs_dbe);
7708 goto end;
7709 }
7710
7711 /*
7712 * The new modes must be a subset of the current modes and
7713 * the access must specify at least one mode. To test that
7714 * the new mode is a subset of the current modes we bitwise
7715 * AND them together and check that the result equals the new
7716 * mode. For example:
7717 * New mode, access == R and current mode, sp->rs_open_access == RW
7718 * access & sp->rs_open_access == R == access, so the new access mode
7719 * is valid. Consider access == RW, sp->rs_open_access = R
7720 * access & sp->rs_open_access == R != access, so the new access mode
7721 * is invalid.
7722 */
7723 if ((access & sp->rs_open_access) != access ||
7724 (deny & sp->rs_open_deny) != deny ||
7725 (access &
7726 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7727 *cs->statusp = resp->status = NFS4ERR_INVAL;
7728 rfs4_update_open_sequence(sp->rs_owner);
7729 rfs4_dbe_unlock(sp->rs_dbe);
7730 goto end;
7731 }
7732
7733 /*
7734 * Release any share locks associated with this stateID.
7735 * Strictly speaking, this violates the spec because the
7736 * spec effectively requires that open downgrade be atomic.
7737 * At present, fs_shrlock does not have this capability.
7738 */
7739 (void) rfs4_unshare(sp);
7740
7741 status = rfs4_share(sp, access, deny);
7742 if (status != NFS4_OK) {
7743 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7744 rfs4_update_open_sequence(sp->rs_owner);
7745 rfs4_dbe_unlock(sp->rs_dbe);
7746 goto end;
7747 }
7748
7749 fp = sp->rs_finfo;
7750 rfs4_dbe_lock(fp->rf_dbe);
7751
7752 /*
7753 * If the current mode has deny read and the new mode
7754 * does not, decrement the number of deny read mode bits
7755 * and if it goes to zero turn off the deny read bit
7756 * on the file.
7757 */
7758 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7759 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7760 fp->rf_deny_read--;
7761 if (fp->rf_deny_read == 0)
7762 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7763 }
7764
7765 /*
7766 * If the current mode has deny write and the new mode
7767 * does not, decrement the number of deny write mode bits
7768 * and if it goes to zero turn off the deny write bit
7769 * on the file.
7770 */
7771 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7772 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7773 fp->rf_deny_write--;
7774 if (fp->rf_deny_write == 0)
7775 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7776 }
7777
7778 /*
7779 * If the current mode has access read and the new mode
7780 * does not, decrement the number of access read mode bits
7781 * and if it goes to zero turn off the access read bit
7782 * on the file. set fflags to FREAD for the call to
7783 * vn_open_downgrade().
7784 */
7785 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7786 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7787 fp->rf_access_read--;
7788 if (fp->rf_access_read == 0)
7789 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7790 fflags |= FREAD;
7791 }
7792
7793 /*
7794 * If the current mode has access write and the new mode
7795 * does not, decrement the number of access write mode bits
7796 * and if it goes to zero turn off the access write bit
7797 * on the file. set fflags to FWRITE for the call to
7798 * vn_open_downgrade().
7799 */
7800 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7801 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7802 fp->rf_access_write--;
7803 if (fp->rf_access_write == 0)
7804 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7805 fflags |= FWRITE;
7806 }
7807
7808 /* Check that the file is still accessible */
7809 ASSERT(fp->rf_share_access);
7810
7811 rfs4_dbe_unlock(fp->rf_dbe);
7812
7813 /* now set the new open access and deny modes */
7814 sp->rs_open_access = access;
7815 sp->rs_open_deny = deny;
7816
7817 /*
7818 * we successfully downgraded the share lock, now we need to downgrade
7819 * the open. it is possible that the downgrade was only for a deny
7820 * mode and we have nothing else to do.
7821 */
7822 if ((fflags & (FREAD|FWRITE)) != 0)
7823 vn_open_downgrade(cs->vp, fflags);
7824
7825 /* Update the stateid */
7826 next_stateid(&sp->rs_stateid);
7827 resp->open_stateid = sp->rs_stateid.stateid;
7828
7829 rfs4_dbe_unlock(sp->rs_dbe);
7830
7831 *cs->statusp = resp->status = NFS4_OK;
7832 /* Update the lease */
7833 rfs4_update_lease(sp->rs_owner->ro_client);
7834 /* And the sequence */
7835 rfs4_update_open_sequence(sp->rs_owner);
7836 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7837
7838 end:
7839 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7840 rfs4_state_rele(sp);
7841 out:
7842 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7843 OPEN_DOWNGRADE4res *, resp);
7844 }
7845
7846 static void *
7847 memstr(const void *s1, const char *s2, size_t n)
7848 {
7849 size_t l = strlen(s2);
7850 char *p = (char *)s1;
7851
7852 while (n >= l) {
7853 if (bcmp(p, s2, l) == 0)
7854 return (p);
7855 p++;
7856 n--;
7857 }
7858
7859 return (NULL);
7860 }
7861
7862 /*
7863 * The logic behind this function is detailed in the NFSv4 RFC in the
7864 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7865 * that section for explicit guidance to server behavior for
7866 * SETCLIENTID.
7867 */
7868 void
7869 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7870 struct svc_req *req, struct compound_state *cs)
7871 {
7872 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7873 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7874 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7875 rfs4_clntip_t *ci;
7876 bool_t create;
7877 char *addr, *netid;
7878 int len;
7879
7880 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7881 SETCLIENTID4args *, args);
7882 retry:
7883 newcp = cp_confirmed = cp_unconfirmed = NULL;
7884
7885 /*
7886 * Save the caller's IP address
7887 */
7888 args->client.cl_addr =
7889 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7890
7891 /*
7892 * Record if it is a Solaris client that cannot handle referrals.
7893 */
7894 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7895 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7896 /* Add a "yes, it's downrev" record */
7897 create = TRUE;
7898 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7899 ASSERT(ci != NULL);
7900 rfs4_dbe_rele(ci->ri_dbe);
7901 } else {
7902 /* Remove any previous record */
7903 rfs4_invalidate_clntip(args->client.cl_addr);
7904 }
7905
7906 /*
7907 * In search of an EXISTING client matching the incoming
7908 * request to establish a new client identifier at the server
7909 */
7910 create = TRUE;
7911 cp = rfs4_findclient(&args->client, &create, NULL);
7912
7913 /* Should never happen */
7914 ASSERT(cp != NULL);
7915
7916 if (cp == NULL) {
7917 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7918 goto out;
7919 }
7920
7921 /*
7922 * Easiest case. Client identifier is newly created and is
7923 * unconfirmed. Also note that for this case, no other
7924 * entries exist for the client identifier. Nothing else to
7925 * check. Just setup the response and respond.
7926 */
7927 if (create) {
7928 *cs->statusp = res->status = NFS4_OK;
7929 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7930 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7931 cp->rc_confirm_verf;
7932 /* Setup callback information; CB_NULL confirmation later */
7933 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7934
7935 rfs4_client_rele(cp);
7936 goto out;
7937 }
7938
7939 /*
7940 * An existing, confirmed client may exist but it may not have
7941 * been active for at least one lease period. If so, then
7942 * "close" the client and create a new client identifier
7943 */
7944 if (rfs4_lease_expired(cp)) {
7945 rfs4_client_close(cp);
7946 goto retry;
7947 }
7948
7949 if (cp->rc_need_confirm == TRUE)
7950 cp_unconfirmed = cp;
7951 else
7952 cp_confirmed = cp;
7953
7954 cp = NULL;
7955
7956 /*
7957 * We have a confirmed client, now check for an
7958 * unconfimred entry
7959 */
7960 if (cp_confirmed) {
7961 /* If creds don't match then client identifier is inuse */
7962 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
7963 rfs4_cbinfo_t *cbp;
7964 /*
7965 * Some one else has established this client
7966 * id. Try and say * who they are. We will use
7967 * the call back address supplied by * the
7968 * first client.
7969 */
7970 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7971
7972 addr = netid = NULL;
7973
7974 cbp = &cp_confirmed->rc_cbinfo;
7975 if (cbp->cb_callback.cb_location.r_addr &&
7976 cbp->cb_callback.cb_location.r_netid) {
7977 cb_client4 *cbcp = &cbp->cb_callback;
7978
7979 len = strlen(cbcp->cb_location.r_addr)+1;
7980 addr = kmem_alloc(len, KM_SLEEP);
7981 bcopy(cbcp->cb_location.r_addr, addr, len);
7982 len = strlen(cbcp->cb_location.r_netid)+1;
7983 netid = kmem_alloc(len, KM_SLEEP);
7984 bcopy(cbcp->cb_location.r_netid, netid, len);
7985 }
7986
7987 res->SETCLIENTID4res_u.client_using.r_addr = addr;
7988 res->SETCLIENTID4res_u.client_using.r_netid = netid;
7989
7990 rfs4_client_rele(cp_confirmed);
7991 }
7992
7993 /*
7994 * Confirmed, creds match, and verifier matches; must
7995 * be an update of the callback info
7996 */
7997 if (cp_confirmed->rc_nfs_client.verifier ==
7998 args->client.verifier) {
7999 /* Setup callback information */
8000 rfs4_client_setcb(cp_confirmed, &args->callback,
8001 args->callback_ident);
8002
8003 /* everything okay -- move ahead */
8004 *cs->statusp = res->status = NFS4_OK;
8005 res->SETCLIENTID4res_u.resok4.clientid =
8006 cp_confirmed->rc_clientid;
8007
8008 /* update the confirm_verifier and return it */
8009 rfs4_client_scv_next(cp_confirmed);
8010 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8011 cp_confirmed->rc_confirm_verf;
8012
8013 rfs4_client_rele(cp_confirmed);
8014 goto out;
8015 }
8016
8017 /*
8018 * Creds match but the verifier doesn't. Must search
8019 * for an unconfirmed client that would be replaced by
8020 * this request.
8021 */
8022 create = FALSE;
8023 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8024 cp_confirmed);
8025 }
8026
8027 /*
8028 * At this point, we have taken care of the brand new client
8029 * struct, INUSE case, update of an existing, and confirmed
8030 * client struct.
8031 */
8032
8033 /*
8034 * check to see if things have changed while we originally
8035 * picked up the client struct. If they have, then return and
8036 * retry the processing of this SETCLIENTID request.
8037 */
8038 if (cp_unconfirmed) {
8039 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8040 if (!cp_unconfirmed->rc_need_confirm) {
8041 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8042 rfs4_client_rele(cp_unconfirmed);
8043 if (cp_confirmed)
8044 rfs4_client_rele(cp_confirmed);
8045 goto retry;
8046 }
8047 /* do away with the old unconfirmed one */
8048 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8049 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8050 rfs4_client_rele(cp_unconfirmed);
8051 cp_unconfirmed = NULL;
8052 }
8053
8054 /*
8055 * This search will temporarily hide the confirmed client
8056 * struct while a new client struct is created as the
8057 * unconfirmed one.
8058 */
8059 create = TRUE;
8060 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8061
8062 ASSERT(newcp != NULL);
8063
8064 if (newcp == NULL) {
8065 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8066 rfs4_client_rele(cp_confirmed);
8067 goto out;
8068 }
8069
8070 /*
8071 * If one was not created, then a similar request must be in
8072 * process so release and start over with this one
8073 */
8074 if (create != TRUE) {
8075 rfs4_client_rele(newcp);
8076 if (cp_confirmed)
8077 rfs4_client_rele(cp_confirmed);
8078 goto retry;
8079 }
8080
8081 *cs->statusp = res->status = NFS4_OK;
8082 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8083 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8084 newcp->rc_confirm_verf;
8085 /* Setup callback information; CB_NULL confirmation later */
8086 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8087
8088 newcp->rc_cp_confirmed = cp_confirmed;
8089
8090 rfs4_client_rele(newcp);
8091
8092 out:
8093 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8094 SETCLIENTID4res *, res);
8095 }
8096
8097 /*ARGSUSED*/
8098 void
8099 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8100 struct svc_req *req, struct compound_state *cs)
8101 {
8102 SETCLIENTID_CONFIRM4args *args =
8103 &argop->nfs_argop4_u.opsetclientid_confirm;
8104 SETCLIENTID_CONFIRM4res *res =
8105 &resop->nfs_resop4_u.opsetclientid_confirm;
8106 rfs4_client_t *cp, *cptoclose = NULL;
8107
8108 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8109 struct compound_state *, cs,
8110 SETCLIENTID_CONFIRM4args *, args);
8111
8112 *cs->statusp = res->status = NFS4_OK;
8113
8114 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8115
8116 if (cp == NULL) {
8117 *cs->statusp = res->status =
8118 rfs4_check_clientid(&args->clientid, 1);
8119 goto out;
8120 }
8121
8122 if (!creds_ok(cp, req, cs)) {
8123 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8124 rfs4_client_rele(cp);
8125 goto out;
8126 }
8127
8128 /* If the verifier doesn't match, the record doesn't match */
8129 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8130 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8131 rfs4_client_rele(cp);
8132 goto out;
8133 }
8134
8135 rfs4_dbe_lock(cp->rc_dbe);
8136 cp->rc_need_confirm = FALSE;
8137 if (cp->rc_cp_confirmed) {
8138 cptoclose = cp->rc_cp_confirmed;
8139 cptoclose->rc_ss_remove = 1;
8140 cp->rc_cp_confirmed = NULL;
8141 }
8142
8143 /*
8144 * Update the client's associated server instance, if it's changed
8145 * since the client was created.
8146 */
8147 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8148 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8149
8150 /*
8151 * Record clientid in stable storage.
8152 * Must be done after server instance has been assigned.
8153 */
8154 rfs4_ss_clid(cp);
8155
8156 rfs4_dbe_unlock(cp->rc_dbe);
8157
8158 if (cptoclose)
8159 /* don't need to rele, client_close does it */
8160 rfs4_client_close(cptoclose);
8161
8162 /* If needed, initiate CB_NULL call for callback path */
8163 rfs4_deleg_cb_check(cp);
8164 rfs4_update_lease(cp);
8165
8166 /*
8167 * Check to see if client can perform reclaims
8168 */
8169 rfs4_ss_chkclid(cp);
8170
8171 rfs4_client_rele(cp);
8172
8173 out:
8174 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8175 struct compound_state *, cs,
8176 SETCLIENTID_CONFIRM4 *, res);
8177 }
8178
8179
8180 /*ARGSUSED*/
8181 void
8182 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8183 struct svc_req *req, struct compound_state *cs)
8184 {
8185 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8186 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8187 rfs4_state_t *sp;
8188 nfsstat4 status;
8189
8190 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8191 CLOSE4args *, args);
8192
8193 if (cs->vp == NULL) {
8194 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8195 goto out;
8196 }
8197
8198 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8199 if (status != NFS4_OK) {
8200 *cs->statusp = resp->status = status;
8201 goto out;
8202 }
8203
8204 /* Ensure specified filehandle matches */
8205 if (cs->vp != sp->rs_finfo->rf_vp) {
8206 rfs4_state_rele(sp);
8207 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8208 goto out;
8209 }
8210
8211 /* hold off other access to open_owner while we tinker */
8212 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8213
8214 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8215 case NFS4_CHECK_STATEID_OKAY:
8216 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8217 resop) != NFS4_CHKSEQ_OKAY) {
8218 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8219 goto end;
8220 }
8221 break;
8222 case NFS4_CHECK_STATEID_OLD:
8223 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8224 goto end;
8225 case NFS4_CHECK_STATEID_BAD:
8226 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8227 goto end;
8228 case NFS4_CHECK_STATEID_EXPIRED:
8229 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8230 goto end;
8231 case NFS4_CHECK_STATEID_CLOSED:
8232 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8233 goto end;
8234 case NFS4_CHECK_STATEID_UNCONFIRMED:
8235 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8236 goto end;
8237 case NFS4_CHECK_STATEID_REPLAY:
8238 /* Check the sequence id for the open owner */
8239 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8240 resop)) {
8241 case NFS4_CHKSEQ_OKAY:
8242 /*
8243 * This is replayed stateid; if seqid matches
8244 * next expected, then client is using wrong seqid.
8245 */
8246 /* FALL THROUGH */
8247 case NFS4_CHKSEQ_BAD:
8248 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8249 goto end;
8250 case NFS4_CHKSEQ_REPLAY:
8251 /*
8252 * Note this case is the duplicate case so
8253 * resp->status is already set.
8254 */
8255 *cs->statusp = resp->status;
8256 rfs4_update_lease(sp->rs_owner->ro_client);
8257 goto end;
8258 }
8259 break;
8260 default:
8261 ASSERT(FALSE);
8262 break;
8263 }
8264
8265 rfs4_dbe_lock(sp->rs_dbe);
8266
8267 /* Update the stateid. */
8268 next_stateid(&sp->rs_stateid);
8269 resp->open_stateid = sp->rs_stateid.stateid;
8270
8271 rfs4_dbe_unlock(sp->rs_dbe);
8272
8273 rfs4_update_lease(sp->rs_owner->ro_client);
8274 rfs4_update_open_sequence(sp->rs_owner);
8275 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8276
8277 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8278
8279 *cs->statusp = resp->status = status;
8280
8281 end:
8282 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8283 rfs4_state_rele(sp);
8284 out:
8285 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8286 CLOSE4res *, resp);
8287 }
8288
8289 /*
8290 * Manage the counts on the file struct and close all file locks
8291 */
8292 /*ARGSUSED*/
8293 void
8294 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8295 bool_t close_of_client)
8296 {
8297 rfs4_file_t *fp = sp->rs_finfo;
8298 rfs4_lo_state_t *lsp;
8299 int fflags = 0;
8300
8301 /*
8302 * If this call is part of the larger closing down of client
8303 * state then it is just easier to release all locks
8304 * associated with this client instead of going through each
8305 * individual file and cleaning locks there.
8306 */
8307 if (close_of_client) {
8308 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8309 !list_is_empty(&sp->rs_lostatelist) &&
8310 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8311 /* Is the PxFS kernel module loaded? */
8312 if (lm_remove_file_locks != NULL) {
8313 int new_sysid;
8314
8315 /* Encode the cluster nodeid in new sysid */
8316 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8317 lm_set_nlmid_flk(&new_sysid);
8318
8319 /*
8320 * This PxFS routine removes file locks for a
8321 * client over all nodes of a cluster.
8322 */
8323 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8324 "lm_remove_file_locks(sysid=0x%x)\n",
8325 new_sysid));
8326 (*lm_remove_file_locks)(new_sysid);
8327 } else {
8328 struct flock64 flk;
8329
8330 /* Release all locks for this client */
8331 flk.l_type = F_UNLKSYS;
8332 flk.l_whence = 0;
8333 flk.l_start = 0;
8334 flk.l_len = 0;
8335 flk.l_sysid =
8336 sp->rs_owner->ro_client->rc_sysidt;
8337 flk.l_pid = 0;
8338 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8339 &flk, F_REMOTELOCK | FREAD | FWRITE,
8340 (u_offset_t)0, NULL, CRED(), NULL);
8341 }
8342
8343 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8344 }
8345 }
8346
8347 /*
8348 * Release all locks on this file by this lock owner or at
8349 * least mark the locks as having been released
8350 */
8351 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8352 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8353 lsp->rls_locks_cleaned = TRUE;
8354
8355 /* Was this already taken care of above? */
8356 if (!close_of_client &&
8357 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8358 (void) cleanlocks(sp->rs_finfo->rf_vp,
8359 lsp->rls_locker->rl_pid,
8360 lsp->rls_locker->rl_client->rc_sysidt);
8361 }
8362
8363 /*
8364 * Release any shrlocks associated with this open state ID.
8365 * This must be done before the rfs4_state gets marked closed.
8366 */
8367 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8368 (void) rfs4_unshare(sp);
8369
8370 if (sp->rs_open_access) {
8371 rfs4_dbe_lock(fp->rf_dbe);
8372
8373 /*
8374 * Decrement the count for each access and deny bit that this
8375 * state has contributed to the file.
8376 * If the file counts go to zero
8377 * clear the appropriate bit in the appropriate mask.
8378 */
8379 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8380 fp->rf_access_read--;
8381 fflags |= FREAD;
8382 if (fp->rf_access_read == 0)
8383 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8384 }
8385 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8386 fp->rf_access_write--;
8387 fflags |= FWRITE;
8388 if (fp->rf_access_write == 0)
8389 fp->rf_share_access &=
8390 ~OPEN4_SHARE_ACCESS_WRITE;
8391 }
8392 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8393 fp->rf_deny_read--;
8394 if (fp->rf_deny_read == 0)
8395 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8396 }
8397 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8398 fp->rf_deny_write--;
8399 if (fp->rf_deny_write == 0)
8400 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8401 }
8402
8403 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8404
8405 rfs4_dbe_unlock(fp->rf_dbe);
8406
8407 sp->rs_open_access = 0;
8408 sp->rs_open_deny = 0;
8409 }
8410 }
8411
8412 /*
8413 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8414 */
8415 static nfsstat4
8416 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8417 {
8418 rfs4_lockowner_t *lo;
8419 rfs4_client_t *cp;
8420 uint32_t len;
8421
8422 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8423 if (lo != NULL) {
8424 cp = lo->rl_client;
8425 if (rfs4_lease_expired(cp)) {
8426 rfs4_lockowner_rele(lo);
8427 rfs4_dbe_hold(cp->rc_dbe);
8428 rfs4_client_close(cp);
8429 return (NFS4ERR_EXPIRED);
8430 }
8431 dp->owner.clientid = lo->rl_owner.clientid;
8432 len = lo->rl_owner.owner_len;
8433 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8434 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8435 dp->owner.owner_len = len;
8436 rfs4_lockowner_rele(lo);
8437 goto finish;
8438 }
8439
8440 /*
8441 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8442 * of the client id contain the boot time for a NFS4 lock. So we
8443 * fabricate and identity by setting clientid to the sysid, and
8444 * the lock owner to the pid.
8445 */
8446 dp->owner.clientid = flk->l_sysid;
8447 len = sizeof (pid_t);
8448 dp->owner.owner_len = len;
8449 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8450 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8451 finish:
8452 dp->offset = flk->l_start;
8453 dp->length = flk->l_len;
8454
8455 if (flk->l_type == F_RDLCK)
8456 dp->locktype = READ_LT;
8457 else if (flk->l_type == F_WRLCK)
8458 dp->locktype = WRITE_LT;
8459 else
8460 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8461
8462 return (NFS4_OK);
8463 }
8464
8465 /*
8466 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8467 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8468 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8469 * for that (obviously); they are sending the LOCK requests with some delays
8470 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8471 * locking and delay implementation at the client side.
8472 *
8473 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8474 * fast retries on its own (the for loop below) in a hope the lock will be
8475 * available soon. And if not, the client won't need to resend the LOCK
8476 * requests so fast to check the lock availability. This basically saves some
8477 * network traffic and tries to make sure the client gets the lock ASAP.
8478 */
8479 static int
8480 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8481 {
8482 int error;
8483 struct flock64 flk;
8484 int i;
8485 clock_t delaytime;
8486 int cmd;
8487 int spin_cnt = 0;
8488
8489 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8490 retry:
8491 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8492
8493 for (i = 0; i < rfs4_maxlock_tries; i++) {
8494 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8495 error = VOP_FRLOCK(vp, cmd,
8496 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8497
8498 if (error != EAGAIN && error != EACCES)
8499 break;
8500
8501 if (i < rfs4_maxlock_tries - 1) {
8502 delay(delaytime);
8503 delaytime *= 2;
8504 }
8505 }
8506
8507 if (error == EAGAIN || error == EACCES) {
8508 /* Get the owner of the lock */
8509 flk = *flock;
8510 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8511 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8512 NULL) == 0) {
8513 /*
8514 * There's a race inherent in the current VOP_FRLOCK
8515 * design where:
8516 * a: "other guy" takes a lock that conflicts with a
8517 * lock we want
8518 * b: we attempt to take our lock (non-blocking) and
8519 * the attempt fails.
8520 * c: "other guy" releases the conflicting lock
8521 * d: we ask what lock conflicts with the lock we want,
8522 * getting F_UNLCK (no lock blocks us)
8523 *
8524 * If we retry the non-blocking lock attempt in this
8525 * case (restart at step 'b') there's some possibility
8526 * that many such attempts might fail. However a test
8527 * designed to actually provoke this race shows that
8528 * the vast majority of cases require no retry, and
8529 * only a few took as many as three retries. Here's
8530 * the test outcome:
8531 *
8532 * number of retries how many times we needed
8533 * that many retries
8534 * 0 79461
8535 * 1 862
8536 * 2 49
8537 * 3 5
8538 *
8539 * Given those empirical results, we arbitrarily limit
8540 * the retry count to ten.
8541 *
8542 * If we actually make to ten retries and give up,
8543 * nothing catastrophic happens, but we're unable to
8544 * return the information about the conflicting lock to
8545 * the NFS client. That's an acceptable trade off vs.
8546 * letting this retry loop run forever.
8547 */
8548 if (flk.l_type == F_UNLCK) {
8549 if (spin_cnt++ < 10) {
8550 /* No longer locked, retry */
8551 goto retry;
8552 }
8553 } else {
8554 *flock = flk;
8555 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8556 F_GETLK, &flk);
8557 }
8558 }
8559 }
8560
8561 return (error);
8562 }
8563
8564 /*ARGSUSED*/
8565 static nfsstat4
8566 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8567 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8568 {
8569 nfsstat4 status;
8570 rfs4_lockowner_t *lo = lsp->rls_locker;
8571 rfs4_state_t *sp = lsp->rls_state;
8572 struct flock64 flock;
8573 int16_t ltype;
8574 int flag;
8575 int error;
8576 sysid_t sysid;
8577 LOCK4res *lres;
8578 vnode_t *vp;
8579
8580 if (rfs4_lease_expired(lo->rl_client)) {
8581 return (NFS4ERR_EXPIRED);
8582 }
8583
8584 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8585 return (status);
8586
8587 /* Check for zero length. To lock to end of file use all ones for V4 */
8588 if (length == 0)
8589 return (NFS4ERR_INVAL);
8590 else if (length == (length4)(~0))
8591 length = 0; /* Posix to end of file */
8592
8593 retry:
8594 rfs4_dbe_lock(sp->rs_dbe);
8595 if (sp->rs_closed == TRUE) {
8596 rfs4_dbe_unlock(sp->rs_dbe);
8597 return (NFS4ERR_OLD_STATEID);
8598 }
8599
8600 if (resop->resop != OP_LOCKU) {
8601 switch (locktype) {
8602 case READ_LT:
8603 case READW_LT:
8604 if ((sp->rs_share_access
8605 & OPEN4_SHARE_ACCESS_READ) == 0) {
8606 rfs4_dbe_unlock(sp->rs_dbe);
8607
8608 return (NFS4ERR_OPENMODE);
8609 }
8610 ltype = F_RDLCK;
8611 break;
8612 case WRITE_LT:
8613 case WRITEW_LT:
8614 if ((sp->rs_share_access
8615 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8616 rfs4_dbe_unlock(sp->rs_dbe);
8617
8618 return (NFS4ERR_OPENMODE);
8619 }
8620 ltype = F_WRLCK;
8621 break;
8622 }
8623 } else
8624 ltype = F_UNLCK;
8625
8626 flock.l_type = ltype;
8627 flock.l_whence = 0; /* SEEK_SET */
8628 flock.l_start = offset;
8629 flock.l_len = length;
8630 flock.l_sysid = sysid;
8631 flock.l_pid = lsp->rls_locker->rl_pid;
8632
8633 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8634 if (flock.l_len < 0 || flock.l_start < 0) {
8635 rfs4_dbe_unlock(sp->rs_dbe);
8636 return (NFS4ERR_INVAL);
8637 }
8638
8639 /*
8640 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8641 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8642 */
8643 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8644
8645 vp = sp->rs_finfo->rf_vp;
8646 VN_HOLD(vp);
8647
8648 /*
8649 * We need to unlock sp before we call the underlying filesystem to
8650 * acquire the file lock.
8651 */
8652 rfs4_dbe_unlock(sp->rs_dbe);
8653
8654 error = setlock(vp, &flock, flag, cred);
8655
8656 /*
8657 * Make sure the file is still open. In a case the file was closed in
8658 * the meantime, clean the lock we acquired using the setlock() call
8659 * above, and return the appropriate error.
8660 */
8661 rfs4_dbe_lock(sp->rs_dbe);
8662 if (sp->rs_closed == TRUE) {
8663 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8664 rfs4_dbe_unlock(sp->rs_dbe);
8665
8666 VN_RELE(vp);
8667
8668 return (NFS4ERR_OLD_STATEID);
8669 }
8670 rfs4_dbe_unlock(sp->rs_dbe);
8671
8672 VN_RELE(vp);
8673
8674 if (error == 0) {
8675 rfs4_dbe_lock(lsp->rls_dbe);
8676 next_stateid(&lsp->rls_lockid);
8677 rfs4_dbe_unlock(lsp->rls_dbe);
8678 }
8679
8680 /*
8681 * N.B. We map error values to nfsv4 errors. This is differrent
8682 * than puterrno4 routine.
8683 */
8684 switch (error) {
8685 case 0:
8686 status = NFS4_OK;
8687 break;
8688 case EAGAIN:
8689 case EACCES: /* Old value */
8690 /* Can only get here if op is OP_LOCK */
8691 ASSERT(resop->resop == OP_LOCK);
8692 lres = &resop->nfs_resop4_u.oplock;
8693 status = NFS4ERR_DENIED;
8694 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8695 == NFS4ERR_EXPIRED)
8696 goto retry;
8697 break;
8698 case ENOLCK:
8699 status = NFS4ERR_DELAY;
8700 break;
8701 case EOVERFLOW:
8702 status = NFS4ERR_INVAL;
8703 break;
8704 case EINVAL:
8705 status = NFS4ERR_NOTSUPP;
8706 break;
8707 default:
8708 status = NFS4ERR_SERVERFAULT;
8709 break;
8710 }
8711
8712 return (status);
8713 }
8714
8715 /*ARGSUSED*/
8716 void
8717 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8718 struct svc_req *req, struct compound_state *cs)
8719 {
8720 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8721 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8722 nfsstat4 status;
8723 stateid4 *stateid;
8724 rfs4_lockowner_t *lo;
8725 rfs4_client_t *cp;
8726 rfs4_state_t *sp = NULL;
8727 rfs4_lo_state_t *lsp = NULL;
8728 bool_t ls_sw_held = FALSE;
8729 bool_t create = TRUE;
8730 bool_t lcreate = TRUE;
8731 bool_t dup_lock = FALSE;
8732 int rc;
8733
8734 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8735 LOCK4args *, args);
8736
8737 if (cs->vp == NULL) {
8738 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8739 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8740 cs, LOCK4res *, resp);
8741 return;
8742 }
8743
8744 if (args->locker.new_lock_owner) {
8745 /* Create a new lockowner for this instance */
8746 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8747
8748 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8749
8750 stateid = &olo->open_stateid;
8751 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8752 if (status != NFS4_OK) {
8753 NFS4_DEBUG(rfs4_debug,
8754 (CE_NOTE, "Get state failed in lock %d", status));
8755 *cs->statusp = resp->status = status;
8756 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8757 cs, LOCK4res *, resp);
8758 return;
8759 }
8760
8761 /* Ensure specified filehandle matches */
8762 if (cs->vp != sp->rs_finfo->rf_vp) {
8763 rfs4_state_rele(sp);
8764 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8765 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8766 cs, LOCK4res *, resp);
8767 return;
8768 }
8769
8770 /* hold off other access to open_owner while we tinker */
8771 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8772
8773 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8774 case NFS4_CHECK_STATEID_OLD:
8775 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8776 goto end;
8777 case NFS4_CHECK_STATEID_BAD:
8778 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8779 goto end;
8780 case NFS4_CHECK_STATEID_EXPIRED:
8781 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8782 goto end;
8783 case NFS4_CHECK_STATEID_UNCONFIRMED:
8784 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8785 goto end;
8786 case NFS4_CHECK_STATEID_CLOSED:
8787 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8788 goto end;
8789 case NFS4_CHECK_STATEID_OKAY:
8790 case NFS4_CHECK_STATEID_REPLAY:
8791 switch (rfs4_check_olo_seqid(olo->open_seqid,
8792 sp->rs_owner, resop)) {
8793 case NFS4_CHKSEQ_OKAY:
8794 if (rc == NFS4_CHECK_STATEID_OKAY)
8795 break;
8796 /*
8797 * This is replayed stateid; if seqid
8798 * matches next expected, then client
8799 * is using wrong seqid.
8800 */
8801 /* FALLTHROUGH */
8802 case NFS4_CHKSEQ_BAD:
8803 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8804 goto end;
8805 case NFS4_CHKSEQ_REPLAY:
8806 /* This is a duplicate LOCK request */
8807 dup_lock = TRUE;
8808
8809 /*
8810 * For a duplicate we do not want to
8811 * create a new lockowner as it should
8812 * already exist.
8813 * Turn off the lockowner create flag.
8814 */
8815 lcreate = FALSE;
8816 }
8817 break;
8818 }
8819
8820 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8821 if (lo == NULL) {
8822 NFS4_DEBUG(rfs4_debug,
8823 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8824 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8825 goto end;
8826 }
8827
8828 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8829 if (lsp == NULL) {
8830 rfs4_update_lease(sp->rs_owner->ro_client);
8831 /*
8832 * Only update theh open_seqid if this is not
8833 * a duplicate request
8834 */
8835 if (dup_lock == FALSE) {
8836 rfs4_update_open_sequence(sp->rs_owner);
8837 }
8838
8839 NFS4_DEBUG(rfs4_debug,
8840 (CE_NOTE, "rfs4_op_lock: no state"));
8841 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8842 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8843 rfs4_lockowner_rele(lo);
8844 goto end;
8845 }
8846
8847 /*
8848 * This is the new_lock_owner branch and the client is
8849 * supposed to be associating a new lock_owner with
8850 * the open file at this point. If we find that a
8851 * lock_owner/state association already exists and a
8852 * successful LOCK request was returned to the client,
8853 * an error is returned to the client since this is
8854 * not appropriate. The client should be using the
8855 * existing lock_owner branch.
8856 */
8857 if (dup_lock == FALSE && create == FALSE) {
8858 if (lsp->rls_lock_completed == TRUE) {
8859 *cs->statusp =
8860 resp->status = NFS4ERR_BAD_SEQID;
8861 rfs4_lockowner_rele(lo);
8862 goto end;
8863 }
8864 }
8865
8866 rfs4_update_lease(sp->rs_owner->ro_client);
8867
8868 /*
8869 * Only update theh open_seqid if this is not
8870 * a duplicate request
8871 */
8872 if (dup_lock == FALSE) {
8873 rfs4_update_open_sequence(sp->rs_owner);
8874 }
8875
8876 /*
8877 * If this is a duplicate lock request, just copy the
8878 * previously saved reply and return.
8879 */
8880 if (dup_lock == TRUE) {
8881 /* verify that lock_seqid's match */
8882 if (lsp->rls_seqid != olo->lock_seqid) {
8883 NFS4_DEBUG(rfs4_debug,
8884 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8885 "lsp->seqid=%d old->seqid=%d",
8886 lsp->rls_seqid, olo->lock_seqid));
8887 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8888 } else {
8889 rfs4_copy_reply(resop, &lsp->rls_reply);
8890 /*
8891 * Make sure to copy the just
8892 * retrieved reply status into the
8893 * overall compound status
8894 */
8895 *cs->statusp = resp->status;
8896 }
8897 rfs4_lockowner_rele(lo);
8898 goto end;
8899 }
8900
8901 rfs4_dbe_lock(lsp->rls_dbe);
8902
8903 /* Make sure to update the lock sequence id */
8904 lsp->rls_seqid = olo->lock_seqid;
8905
8906 NFS4_DEBUG(rfs4_debug,
8907 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8908
8909 /*
8910 * This is used to signify the newly created lockowner
8911 * stateid and its sequence number. The checks for
8912 * sequence number and increment don't occur on the
8913 * very first lock request for a lockowner.
8914 */
8915 lsp->rls_skip_seqid_check = TRUE;
8916
8917 /* hold off other access to lsp while we tinker */
8918 rfs4_sw_enter(&lsp->rls_sw);
8919 ls_sw_held = TRUE;
8920
8921 rfs4_dbe_unlock(lsp->rls_dbe);
8922
8923 rfs4_lockowner_rele(lo);
8924 } else {
8925 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8926 /* get lsp and hold the lock on the underlying file struct */
8927 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8928 != NFS4_OK) {
8929 *cs->statusp = resp->status = status;
8930 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8931 cs, LOCK4res *, resp);
8932 return;
8933 }
8934 create = FALSE; /* We didn't create lsp */
8935
8936 /* Ensure specified filehandle matches */
8937 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8938 rfs4_lo_state_rele(lsp, TRUE);
8939 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8940 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8941 cs, LOCK4res *, resp);
8942 return;
8943 }
8944
8945 /* hold off other access to lsp while we tinker */
8946 rfs4_sw_enter(&lsp->rls_sw);
8947 ls_sw_held = TRUE;
8948
8949 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8950 /*
8951 * The stateid looks like it was okay (expected to be
8952 * the next one)
8953 */
8954 case NFS4_CHECK_STATEID_OKAY:
8955 /*
8956 * The sequence id is now checked. Determine
8957 * if this is a replay or if it is in the
8958 * expected (next) sequence. In the case of a
8959 * replay, there are two replay conditions
8960 * that may occur. The first is the normal
8961 * condition where a LOCK is done with a
8962 * NFS4_OK response and the stateid is
8963 * updated. That case is handled below when
8964 * the stateid is identified as a REPLAY. The
8965 * second is the case where an error is
8966 * returned, like NFS4ERR_DENIED, and the
8967 * sequence number is updated but the stateid
8968 * is not updated. This second case is dealt
8969 * with here. So it may seem odd that the
8970 * stateid is okay but the sequence id is a
8971 * replay but it is okay.
8972 */
8973 switch (rfs4_check_lock_seqid(
8974 args->locker.locker4_u.lock_owner.lock_seqid,
8975 lsp, resop)) {
8976 case NFS4_CHKSEQ_REPLAY:
8977 if (resp->status != NFS4_OK) {
8978 /*
8979 * Here is our replay and need
8980 * to verify that the last
8981 * response was an error.
8982 */
8983 *cs->statusp = resp->status;
8984 goto end;
8985 }
8986 /*
8987 * This is done since the sequence id
8988 * looked like a replay but it didn't
8989 * pass our check so a BAD_SEQID is
8990 * returned as a result.
8991 */
8992 /*FALLTHROUGH*/
8993 case NFS4_CHKSEQ_BAD:
8994 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8995 goto end;
8996 case NFS4_CHKSEQ_OKAY:
8997 /* Everything looks okay move ahead */
8998 break;
8999 }
9000 break;
9001 case NFS4_CHECK_STATEID_OLD:
9002 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9003 goto end;
9004 case NFS4_CHECK_STATEID_BAD:
9005 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9006 goto end;
9007 case NFS4_CHECK_STATEID_EXPIRED:
9008 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9009 goto end;
9010 case NFS4_CHECK_STATEID_CLOSED:
9011 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9012 goto end;
9013 case NFS4_CHECK_STATEID_REPLAY:
9014 switch (rfs4_check_lock_seqid(
9015 args->locker.locker4_u.lock_owner.lock_seqid,
9016 lsp, resop)) {
9017 case NFS4_CHKSEQ_OKAY:
9018 /*
9019 * This is a replayed stateid; if
9020 * seqid matches the next expected,
9021 * then client is using wrong seqid.
9022 */
9023 case NFS4_CHKSEQ_BAD:
9024 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9025 goto end;
9026 case NFS4_CHKSEQ_REPLAY:
9027 rfs4_update_lease(lsp->rls_locker->rl_client);
9028 *cs->statusp = status = resp->status;
9029 goto end;
9030 }
9031 break;
9032 default:
9033 ASSERT(FALSE);
9034 break;
9035 }
9036
9037 rfs4_update_lock_sequence(lsp);
9038 rfs4_update_lease(lsp->rls_locker->rl_client);
9039 }
9040
9041 /*
9042 * NFS4 only allows locking on regular files, so
9043 * verify type of object.
9044 */
9045 if (cs->vp->v_type != VREG) {
9046 if (cs->vp->v_type == VDIR)
9047 status = NFS4ERR_ISDIR;
9048 else
9049 status = NFS4ERR_INVAL;
9050 goto out;
9051 }
9052
9053 cp = lsp->rls_state->rs_owner->ro_client;
9054
9055 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9056 status = NFS4ERR_GRACE;
9057 goto out;
9058 }
9059
9060 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9061 status = NFS4ERR_NO_GRACE;
9062 goto out;
9063 }
9064
9065 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9066 status = NFS4ERR_NO_GRACE;
9067 goto out;
9068 }
9069
9070 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9071 cs->deleg = TRUE;
9072
9073 status = rfs4_do_lock(lsp, args->locktype,
9074 args->offset, args->length, cs->cr, resop);
9075
9076 out:
9077 lsp->rls_skip_seqid_check = FALSE;
9078
9079 *cs->statusp = resp->status = status;
9080
9081 if (status == NFS4_OK) {
9082 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9083 lsp->rls_lock_completed = TRUE;
9084 }
9085 /*
9086 * Only update the "OPEN" response here if this was a new
9087 * lock_owner
9088 */
9089 if (sp)
9090 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9091
9092 rfs4_update_lock_resp(lsp, resop);
9093
9094 end:
9095 if (lsp) {
9096 if (ls_sw_held)
9097 rfs4_sw_exit(&lsp->rls_sw);
9098 /*
9099 * If an sp obtained, then the lsp does not represent
9100 * a lock on the file struct.
9101 */
9102 if (sp != NULL)
9103 rfs4_lo_state_rele(lsp, FALSE);
9104 else
9105 rfs4_lo_state_rele(lsp, TRUE);
9106 }
9107 if (sp) {
9108 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9109 rfs4_state_rele(sp);
9110 }
9111
9112 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9113 LOCK4res *, resp);
9114 }
9115
9116 /* free function for LOCK/LOCKT */
9117 static void
9118 lock_denied_free(nfs_resop4 *resop)
9119 {
9120 LOCK4denied *dp = NULL;
9121
9122 switch (resop->resop) {
9123 case OP_LOCK:
9124 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9125 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9126 break;
9127 case OP_LOCKT:
9128 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9129 dp = &resop->nfs_resop4_u.oplockt.denied;
9130 break;
9131 default:
9132 break;
9133 }
9134
9135 if (dp)
9136 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9137 }
9138
9139 /*ARGSUSED*/
9140 void
9141 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9142 struct svc_req *req, struct compound_state *cs)
9143 {
9144 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9145 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9146 nfsstat4 status;
9147 stateid4 *stateid = &args->lock_stateid;
9148 rfs4_lo_state_t *lsp;
9149
9150 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9151 LOCKU4args *, args);
9152
9153 if (cs->vp == NULL) {
9154 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9155 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9156 LOCKU4res *, resp);
9157 return;
9158 }
9159
9160 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9161 *cs->statusp = resp->status = status;
9162 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9163 LOCKU4res *, resp);
9164 return;
9165 }
9166
9167 /* Ensure specified filehandle matches */
9168 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9169 rfs4_lo_state_rele(lsp, TRUE);
9170 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9171 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9172 LOCKU4res *, resp);
9173 return;
9174 }
9175
9176 /* hold off other access to lsp while we tinker */
9177 rfs4_sw_enter(&lsp->rls_sw);
9178
9179 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9180 case NFS4_CHECK_STATEID_OKAY:
9181 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9182 != NFS4_CHKSEQ_OKAY) {
9183 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9184 goto end;
9185 }
9186 break;
9187 case NFS4_CHECK_STATEID_OLD:
9188 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9189 goto end;
9190 case NFS4_CHECK_STATEID_BAD:
9191 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9192 goto end;
9193 case NFS4_CHECK_STATEID_EXPIRED:
9194 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9195 goto end;
9196 case NFS4_CHECK_STATEID_CLOSED:
9197 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9198 goto end;
9199 case NFS4_CHECK_STATEID_REPLAY:
9200 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9201 case NFS4_CHKSEQ_OKAY:
9202 /*
9203 * This is a replayed stateid; if
9204 * seqid matches the next expected,
9205 * then client is using wrong seqid.
9206 */
9207 case NFS4_CHKSEQ_BAD:
9208 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9209 goto end;
9210 case NFS4_CHKSEQ_REPLAY:
9211 rfs4_update_lease(lsp->rls_locker->rl_client);
9212 *cs->statusp = status = resp->status;
9213 goto end;
9214 }
9215 break;
9216 default:
9217 ASSERT(FALSE);
9218 break;
9219 }
9220
9221 rfs4_update_lock_sequence(lsp);
9222 rfs4_update_lease(lsp->rls_locker->rl_client);
9223
9224 /*
9225 * NFS4 only allows locking on regular files, so
9226 * verify type of object.
9227 */
9228 if (cs->vp->v_type != VREG) {
9229 if (cs->vp->v_type == VDIR)
9230 status = NFS4ERR_ISDIR;
9231 else
9232 status = NFS4ERR_INVAL;
9233 goto out;
9234 }
9235
9236 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9237 status = NFS4ERR_GRACE;
9238 goto out;
9239 }
9240
9241 status = rfs4_do_lock(lsp, args->locktype,
9242 args->offset, args->length, cs->cr, resop);
9243
9244 out:
9245 *cs->statusp = resp->status = status;
9246
9247 if (status == NFS4_OK)
9248 resp->lock_stateid = lsp->rls_lockid.stateid;
9249
9250 rfs4_update_lock_resp(lsp, resop);
9251
9252 end:
9253 rfs4_sw_exit(&lsp->rls_sw);
9254 rfs4_lo_state_rele(lsp, TRUE);
9255
9256 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9257 LOCKU4res *, resp);
9258 }
9259
9260 /*
9261 * LOCKT is a best effort routine, the client can not be guaranteed that
9262 * the status return is still in effect by the time the reply is received.
9263 * They are numerous race conditions in this routine, but we are not required
9264 * and can not be accurate.
9265 */
9266 /*ARGSUSED*/
9267 void
9268 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9269 struct svc_req *req, struct compound_state *cs)
9270 {
9271 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9272 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9273 rfs4_lockowner_t *lo;
9274 rfs4_client_t *cp;
9275 bool_t create = FALSE;
9276 struct flock64 flk;
9277 int error;
9278 int flag = FREAD | FWRITE;
9279 int ltype;
9280 length4 posix_length;
9281 sysid_t sysid;
9282 pid_t pid;
9283
9284 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9285 LOCKT4args *, args);
9286
9287 if (cs->vp == NULL) {
9288 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9289 goto out;
9290 }
9291
9292 /*
9293 * NFS4 only allows locking on regular files, so
9294 * verify type of object.
9295 */
9296 if (cs->vp->v_type != VREG) {
9297 if (cs->vp->v_type == VDIR)
9298 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9299 else
9300 *cs->statusp = resp->status = NFS4ERR_INVAL;
9301 goto out;
9302 }
9303
9304 /*
9305 * Check out the clientid to ensure the server knows about it
9306 * so that we correctly inform the client of a server reboot.
9307 */
9308 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9309 == NULL) {
9310 *cs->statusp = resp->status =
9311 rfs4_check_clientid(&args->owner.clientid, 0);
9312 goto out;
9313 }
9314 if (rfs4_lease_expired(cp)) {
9315 rfs4_client_close(cp);
9316 /*
9317 * Protocol doesn't allow returning NFS4ERR_STALE as
9318 * other operations do on this check so STALE_CLIENTID
9319 * is returned instead
9320 */
9321 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9322 goto out;
9323 }
9324
9325 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9326 *cs->statusp = resp->status = NFS4ERR_GRACE;
9327 rfs4_client_rele(cp);
9328 goto out;
9329 }
9330 rfs4_client_rele(cp);
9331
9332 resp->status = NFS4_OK;
9333
9334 switch (args->locktype) {
9335 case READ_LT:
9336 case READW_LT:
9337 ltype = F_RDLCK;
9338 break;
9339 case WRITE_LT:
9340 case WRITEW_LT:
9341 ltype = F_WRLCK;
9342 break;
9343 }
9344
9345 posix_length = args->length;
9346 /* Check for zero length. To lock to end of file use all ones for V4 */
9347 if (posix_length == 0) {
9348 *cs->statusp = resp->status = NFS4ERR_INVAL;
9349 goto out;
9350 } else if (posix_length == (length4)(~0)) {
9351 posix_length = 0; /* Posix to end of file */
9352 }
9353
9354 /* Find or create a lockowner */
9355 lo = rfs4_findlockowner(&args->owner, &create);
9356
9357 if (lo) {
9358 pid = lo->rl_pid;
9359 if ((resp->status =
9360 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9361 goto err;
9362 } else {
9363 pid = 0;
9364 sysid = lockt_sysid;
9365 }
9366 retry:
9367 flk.l_type = ltype;
9368 flk.l_whence = 0; /* SEEK_SET */
9369 flk.l_start = args->offset;
9370 flk.l_len = posix_length;
9371 flk.l_sysid = sysid;
9372 flk.l_pid = pid;
9373 flag |= F_REMOTELOCK;
9374
9375 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9376
9377 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9378 if (flk.l_len < 0 || flk.l_start < 0) {
9379 resp->status = NFS4ERR_INVAL;
9380 goto err;
9381 }
9382 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9383 NULL, cs->cr, NULL);
9384
9385 /*
9386 * N.B. We map error values to nfsv4 errors. This is differrent
9387 * than puterrno4 routine.
9388 */
9389 switch (error) {
9390 case 0:
9391 if (flk.l_type == F_UNLCK)
9392 resp->status = NFS4_OK;
9393 else {
9394 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9395 goto retry;
9396 resp->status = NFS4ERR_DENIED;
9397 }
9398 break;
9399 case EOVERFLOW:
9400 resp->status = NFS4ERR_INVAL;
9401 break;
9402 case EINVAL:
9403 resp->status = NFS4ERR_NOTSUPP;
9404 break;
9405 default:
9406 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9407 error);
9408 resp->status = NFS4ERR_SERVERFAULT;
9409 break;
9410 }
9411
9412 err:
9413 if (lo)
9414 rfs4_lockowner_rele(lo);
9415 *cs->statusp = resp->status;
9416 out:
9417 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9418 LOCKT4res *, resp);
9419 }
9420
9421 int
9422 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9423 {
9424 int err;
9425 int cmd;
9426 vnode_t *vp;
9427 struct shrlock shr;
9428 struct shr_locowner shr_loco;
9429 int fflags = 0;
9430
9431 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9432 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9433
9434 if (sp->rs_closed)
9435 return (NFS4ERR_OLD_STATEID);
9436
9437 vp = sp->rs_finfo->rf_vp;
9438 ASSERT(vp);
9439
9440 shr.s_access = shr.s_deny = 0;
9441
9442 if (access & OPEN4_SHARE_ACCESS_READ) {
9443 fflags |= FREAD;
9444 shr.s_access |= F_RDACC;
9445 }
9446 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9447 fflags |= FWRITE;
9448 shr.s_access |= F_WRACC;
9449 }
9450 ASSERT(shr.s_access);
9451
9452 if (deny & OPEN4_SHARE_DENY_READ)
9453 shr.s_deny |= F_RDDNY;
9454 if (deny & OPEN4_SHARE_DENY_WRITE)
9455 shr.s_deny |= F_WRDNY;
9456
9457 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9458 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9459 shr_loco.sl_pid = shr.s_pid;
9460 shr_loco.sl_id = shr.s_sysid;
9461 shr.s_owner = (caddr_t)&shr_loco;
9462 shr.s_own_len = sizeof (shr_loco);
9463
9464 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9465
9466 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9467 if (err != 0) {
9468 if (err == EAGAIN)
9469 err = NFS4ERR_SHARE_DENIED;
9470 else
9471 err = puterrno4(err);
9472 return (err);
9473 }
9474
9475 sp->rs_share_access |= access;
9476 sp->rs_share_deny |= deny;
9477
9478 return (0);
9479 }
9480
9481 int
9482 rfs4_unshare(rfs4_state_t *sp)
9483 {
9484 int err;
9485 struct shrlock shr;
9486 struct shr_locowner shr_loco;
9487
9488 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9489
9490 if (sp->rs_closed || sp->rs_share_access == 0)
9491 return (0);
9492
9493 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9494 ASSERT(sp->rs_finfo->rf_vp);
9495
9496 shr.s_access = shr.s_deny = 0;
9497 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9498 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9499 shr_loco.sl_pid = shr.s_pid;
9500 shr_loco.sl_id = shr.s_sysid;
9501 shr.s_owner = (caddr_t)&shr_loco;
9502 shr.s_own_len = sizeof (shr_loco);
9503
9504 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9505 NULL);
9506 if (err != 0) {
9507 err = puterrno4(err);
9508 return (err);
9509 }
9510
9511 sp->rs_share_access = 0;
9512 sp->rs_share_deny = 0;
9513
9514 return (0);
9515
9516 }
9517
9518 static int
9519 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9520 {
9521 struct clist *wcl;
9522 count4 count = rok->data_len;
9523 int wlist_len;
9524
9525 wcl = args->wlist;
9526 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9527 return (FALSE);
9528 }
9529 wcl = args->wlist;
9530 rok->wlist_len = wlist_len;
9531 rok->wlist = wcl;
9532 return (TRUE);
9533 }
9534
9535 /* tunable to disable server referrals */
9536 int rfs4_no_referrals = 0;
9537
9538 /*
9539 * Find an NFS record in reparse point data.
9540 * Returns 0 for success and <0 or an errno value on failure.
9541 */
9542 int
9543 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9544 {
9545 int err;
9546 char *stype, *val;
9547 nvlist_t *nvl;
9548 nvpair_t *curr;
9549
9550 if ((nvl = reparse_init()) == NULL)
9551 return (-1);
9552
9553 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9554 reparse_free(nvl);
9555 return (err);
9556 }
9557
9558 curr = NULL;
9559 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9560 if ((stype = nvpair_name(curr)) == NULL) {
9561 reparse_free(nvl);
9562 return (-2);
9563 }
9564 if (strncasecmp(stype, "NFS", 3) == 0)
9565 break;
9566 }
9567
9568 if ((curr == NULL) ||
9569 (nvpair_value_string(curr, &val))) {
9570 reparse_free(nvl);
9571 return (-3);
9572 }
9573 *nvlp = nvl;
9574 *svcp = stype;
9575 *datap = val;
9576 return (0);
9577 }
9578
9579 int
9580 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9581 {
9582 nvlist_t *nvl;
9583 char *s, *d;
9584
9585 if (rfs4_no_referrals != 0)
9586 return (B_FALSE);
9587
9588 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9589 return (B_FALSE);
9590
9591 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9592 return (B_FALSE);
9593
9594 reparse_free(nvl);
9595
9596 return (B_TRUE);
9597 }
9598
9599 /*
9600 * There is a user-level copy of this routine in ref_subr.c.
9601 * Changes should be kept in sync.
9602 */
9603 static int
9604 nfs4_create_components(char *path, component4 *comp4)
9605 {
9606 int slen, plen, ncomp;
9607 char *ori_path, *nxtc, buf[MAXNAMELEN];
9608
9609 if (path == NULL)
9610 return (0);
9611
9612 plen = strlen(path) + 1; /* include the terminator */
9613 ori_path = path;
9614 ncomp = 0;
9615
9616 /* count number of components in the path */
9617 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9618 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9619 if ((slen = nxtc - path) == 0) {
9620 path = nxtc + 1;
9621 continue;
9622 }
9623
9624 if (comp4 != NULL) {
9625 bcopy(path, buf, slen);
9626 buf[slen] = '\0';
9627 (void) str_to_utf8(buf, &comp4[ncomp]);
9628 }
9629
9630 ncomp++; /* 1 valid component */
9631 path = nxtc + 1;
9632 }
9633 if (*nxtc == '\0' || *nxtc == '\n')
9634 break;
9635 }
9636
9637 return (ncomp);
9638 }
9639
9640 /*
9641 * There is a user-level copy of this routine in ref_subr.c.
9642 * Changes should be kept in sync.
9643 */
9644 static int
9645 make_pathname4(char *path, pathname4 *pathname)
9646 {
9647 int ncomp;
9648 component4 *comp4;
9649
9650 if (pathname == NULL)
9651 return (0);
9652
9653 if (path == NULL) {
9654 pathname->pathname4_val = NULL;
9655 pathname->pathname4_len = 0;
9656 return (0);
9657 }
9658
9659 /* count number of components to alloc buffer */
9660 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9661 pathname->pathname4_val = NULL;
9662 pathname->pathname4_len = 0;
9663 return (0);
9664 }
9665 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9666
9667 /* copy components into allocated buffer */
9668 ncomp = nfs4_create_components(path, comp4);
9669
9670 pathname->pathname4_val = comp4;
9671 pathname->pathname4_len = ncomp;
9672
9673 return (ncomp);
9674 }
9675
9676 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9677
9678 fs_locations4 *
9679 fetch_referral(vnode_t *vp, cred_t *cr)
9680 {
9681 nvlist_t *nvl;
9682 char *stype, *sdata;
9683 fs_locations4 *result;
9684 char buf[1024];
9685 size_t bufsize;
9686 XDR xdr;
9687 int err;
9688
9689 /*
9690 * Check attrs to ensure it's a reparse point
9691 */
9692 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9693 return (NULL);
9694
9695 /*
9696 * Look for an NFS record and get the type and data
9697 */
9698 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9699 return (NULL);
9700
9701 /*
9702 * With the type and data, upcall to get the referral
9703 */
9704 bufsize = sizeof (buf);
9705 bzero(buf, sizeof (buf));
9706 err = reparse_kderef((const char *)stype, (const char *)sdata,
9707 buf, &bufsize);
9708 reparse_free(nvl);
9709
9710 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9711 char *, stype, char *, sdata, char *, buf, int, err);
9712 if (err) {
9713 cmn_err(CE_NOTE,
9714 "reparsed daemon not running: unable to get referral (%d)",
9715 err);
9716 return (NULL);
9717 }
9718
9719 /*
9720 * We get an XDR'ed record back from the kderef call
9721 */
9722 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9723 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9724 err = xdr_fs_locations4(&xdr, result);
9725 XDR_DESTROY(&xdr);
9726 if (err != TRUE) {
9727 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9728 int, err);
9729 return (NULL);
9730 }
9731
9732 /*
9733 * Look at path to recover fs_root, ignoring the leading '/'
9734 */
9735 (void) make_pathname4(vp->v_path, &result->fs_root);
9736
9737 return (result);
9738 }
9739
9740 char *
9741 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9742 {
9743 fs_locations4 *fsl;
9744 fs_location4 *fs;
9745 char *server, *path, *symbuf;
9746 static char *prefix = "/net/";
9747 int i, size, npaths;
9748 uint_t len;
9749
9750 /* Get the referral */
9751 if ((fsl = fetch_referral(vp, cr)) == NULL)
9752 return (NULL);
9753
9754 /* Deal with only the first location and first server */
9755 fs = &fsl->locations_val[0];
9756 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9757 if (server == NULL) {
9758 rfs4_free_fs_locations4(fsl);
9759 kmem_free(fsl, sizeof (fs_locations4));
9760 return (NULL);
9761 }
9762
9763 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9764 size = strlen(prefix) + len;
9765 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9766 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9767
9768 /* Allocate the symlink buffer and fill it */
9769 symbuf = kmem_zalloc(size, KM_SLEEP);
9770 (void) strcat(symbuf, prefix);
9771 (void) strcat(symbuf, server);
9772 kmem_free(server, len);
9773
9774 npaths = 0;
9775 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9776 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9777 if (path == NULL)
9778 continue;
9779 (void) strcat(symbuf, "/");
9780 (void) strcat(symbuf, path);
9781 npaths++;
9782 kmem_free(path, len);
9783 }
9784
9785 rfs4_free_fs_locations4(fsl);
9786 kmem_free(fsl, sizeof (fs_locations4));
9787
9788 if (strsz != NULL)
9789 *strsz = size;
9790 return (symbuf);
9791 }
9792
9793 /*
9794 * Check to see if we have a downrev Solaris client, so that we
9795 * can send it a symlink instead of a referral.
9796 */
9797 int
9798 client_is_downrev(struct svc_req *req)
9799 {
9800 struct sockaddr *ca;
9801 rfs4_clntip_t *ci;
9802 bool_t create = FALSE;
9803 int is_downrev;
9804
9805 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9806 ASSERT(ca);
9807 ci = rfs4_find_clntip(ca, &create);
9808 if (ci == NULL)
9809 return (0);
9810 is_downrev = ci->ri_no_referrals;
9811 rfs4_dbe_rele(ci->ri_dbe);
9812 return (is_downrev);
9813 }