1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All Rights Reserved
31 */
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 #include <sys/sdt.h>
58 #include <sys/ddi.h>
59 #include <sys/zone.h>
60
61 #include <fs/fs_reparse.h>
62
63 #include <rpc/types.h>
64 #include <rpc/auth.h>
65 #include <rpc/rpcsec_gss.h>
66 #include <rpc/svc.h>
67
68 #include <nfs/nfs.h>
69 #include <nfs/export.h>
70 #include <nfs/nfs_cmd.h>
71 #include <nfs/lm.h>
72 #include <nfs/nfs4.h>
73
74 #include <sys/strsubr.h>
75 #include <sys/strsun.h>
76
77 #include <inet/common.h>
78 #include <inet/ip.h>
79 #include <inet/ip6.h>
80
81 #include <sys/tsol/label.h>
82 #include <sys/tsol/tndb.h>
83
84 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
86 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
87 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
88 extern struct svc_ops rdma_svc_ops;
89 extern int nfs_loaned_buffers;
90 /* End of Tunables */
91
92 static int rdma_setup_read_data4(READ4args *, READ4res *);
93
94 /*
95 * Used to bump the stateid4.seqid value and show changes in the stateid
96 */
97 #define next_stateid(sp) (++(sp)->bits.chgseq)
98
99 /*
100 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
101 * This is used to return NFS4ERR_TOOSMALL when clients specify
102 * maxcount that isn't large enough to hold the smallest possible
103 * XDR encoded dirent.
104 *
105 * sizeof cookie (8 bytes) +
106 * sizeof name_len (4 bytes) +
107 * sizeof smallest (padded) name (4 bytes) +
108 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
109 * sizeof attrlist4_len (4 bytes) +
110 * sizeof next boolean (4 bytes)
111 *
112 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
113 * the smallest possible entry4 (assumes no attrs requested).
114 * sizeof nfsstat4 (4 bytes) +
115 * sizeof verifier4 (8 bytes) +
116 * sizeof entry4list bool (4 bytes) +
117 * sizeof entry4 (36 bytes) +
118 * sizeof eof bool (4 bytes)
119 *
120 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
121 * VOP_READDIR. Its value is the size of the maximum possible dirent
122 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
123 * required for a given name length. MAXNAMELEN is the maximum
124 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
125 * macros are to allow for . and .. entries -- just a minor tweak to try
126 * and guarantee that buffer we give to VOP_READDIR will be large enough
127 * to hold ., .., and the largest possible solaris dirent64.
128 */
129 #define RFS4_MINLEN_ENTRY4 36
130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
131 #define RFS4_MINLEN_RDDIR_BUF \
132 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
133
134 /*
135 * It would be better to pad to 4 bytes since that's what XDR would do,
136 * but the dirents UFS gives us are already padded to 8, so just take
137 * what we're given. Dircount is only a hint anyway. Currently the
138 * solaris kernel is ASCII only, so there's no point in calling the
139 * UTF8 functions.
140 *
141 * dirent64: named padded to provide 8 byte struct alignment
142 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
143 *
144 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
145 *
146 */
147 #define DIRENT64_TO_DIRCOUNT(dp) \
148 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
149
150 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
151
152 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
153
154 u_longlong_t nfs4_srv_caller_id;
155 uint_t nfs4_srv_vkey = 0;
156
157 verifier4 Write4verf;
158 verifier4 Readdir4verf;
159
160 void rfs4_init_compound_state(struct compound_state *);
161
162 static void nullfree(caddr_t);
163 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 struct compound_state *);
165 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create_free(nfs_resop4 *resop);
174 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
175 struct svc_req *, struct compound_state *);
176 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 struct compound_state *);
180 static void rfs4_op_getattr_free(nfs_resop4 *);
181 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
183 static void rfs4_op_getfh_free(nfs_resop4 *);
184 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 struct compound_state *);
186 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void lock_denied_free(nfs_resop4 *);
191 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 struct compound_state *);
193 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 struct compound_state *);
195 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
200 struct svc_req *req, struct compound_state *cs);
201 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 struct compound_state *);
203 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 struct compound_state *);
205 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
206 struct svc_req *, struct compound_state *);
207 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
208 struct svc_req *, struct compound_state *);
209 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 struct compound_state *);
211 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 struct compound_state *);
213 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_read_free(nfs_resop4 *);
218 static void rfs4_op_readdir_free(nfs_resop4 *resop);
219 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 struct compound_state *);
221 static void rfs4_op_readlink_free(nfs_resop4 *);
222 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
223 struct svc_req *, struct compound_state *);
224 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
225 struct compound_state *);
226 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 struct compound_state *);
228 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
241 struct svc_req *, struct compound_state *);
242 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *req, struct compound_state *);
244 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
245 struct compound_state *);
246 static void rfs4_op_secinfo_free(nfs_resop4 *);
247
248 static nfsstat4 check_open_access(uint32_t,
249 struct compound_state *, struct svc_req *);
250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
251 void rfs4_ss_clid(rfs4_client_t *);
252
253 /*
254 * translation table for attrs
255 */
256 struct nfs4_ntov_table {
257 union nfs4_attr_u *na;
258 uint8_t amap[NFS4_MAXNUM_ATTRS];
259 int attrcnt;
260 bool_t vfsstat;
261 };
262
263 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
264 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
265 struct nfs4_svgetit_arg *sargp);
266
267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
268 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
269 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
270
271 fem_t *deleg_rdops;
272 fem_t *deleg_wrops;
273
274 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
275 kmutex_t rfs4_servinst_lock; /* protects linked list */
276 int rfs4_seen_first_compound; /* set first time we see one */
277
278 /*
279 * NFS4 op dispatch table
280 */
281
282 struct rfsv4disp {
283 void (*dis_proc)(); /* proc to call */
284 void (*dis_resfree)(); /* frees space allocated by proc */
285 int dis_flags; /* RPC_IDEMPOTENT, etc... */
286 };
287
288 static struct rfsv4disp rfsv4disptab[] = {
289 /*
290 * NFS VERSION 4
291 */
292
293 /* RFS_NULL = 0 */
294 {rfs4_op_illegal, nullfree, 0},
295
296 /* UNUSED = 1 */
297 {rfs4_op_illegal, nullfree, 0},
298
299 /* UNUSED = 2 */
300 {rfs4_op_illegal, nullfree, 0},
301
302 /* OP_ACCESS = 3 */
303 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
304
305 /* OP_CLOSE = 4 */
306 {rfs4_op_close, nullfree, 0},
307
308 /* OP_COMMIT = 5 */
309 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
310
311 /* OP_CREATE = 6 */
312 {rfs4_op_create, nullfree, 0},
313
314 /* OP_DELEGPURGE = 7 */
315 {rfs4_op_delegpurge, nullfree, 0},
316
317 /* OP_DELEGRETURN = 8 */
318 {rfs4_op_delegreturn, nullfree, 0},
319
320 /* OP_GETATTR = 9 */
321 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
322
323 /* OP_GETFH = 10 */
324 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
325
326 /* OP_LINK = 11 */
327 {rfs4_op_link, nullfree, 0},
328
329 /* OP_LOCK = 12 */
330 {rfs4_op_lock, lock_denied_free, 0},
331
332 /* OP_LOCKT = 13 */
333 {rfs4_op_lockt, lock_denied_free, 0},
334
335 /* OP_LOCKU = 14 */
336 {rfs4_op_locku, nullfree, 0},
337
338 /* OP_LOOKUP = 15 */
339 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
340
341 /* OP_LOOKUPP = 16 */
342 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
343
344 /* OP_NVERIFY = 17 */
345 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
346
347 /* OP_OPEN = 18 */
348 {rfs4_op_open, rfs4_free_reply, 0},
349
350 /* OP_OPENATTR = 19 */
351 {rfs4_op_openattr, nullfree, 0},
352
353 /* OP_OPEN_CONFIRM = 20 */
354 {rfs4_op_open_confirm, nullfree, 0},
355
356 /* OP_OPEN_DOWNGRADE = 21 */
357 {rfs4_op_open_downgrade, nullfree, 0},
358
359 /* OP_OPEN_PUTFH = 22 */
360 {rfs4_op_putfh, nullfree, RPC_ALL},
361
362 /* OP_PUTPUBFH = 23 */
363 {rfs4_op_putpubfh, nullfree, RPC_ALL},
364
365 /* OP_PUTROOTFH = 24 */
366 {rfs4_op_putrootfh, nullfree, RPC_ALL},
367
368 /* OP_READ = 25 */
369 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
370
371 /* OP_READDIR = 26 */
372 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
373
374 /* OP_READLINK = 27 */
375 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
376
377 /* OP_REMOVE = 28 */
378 {rfs4_op_remove, nullfree, 0},
379
380 /* OP_RENAME = 29 */
381 {rfs4_op_rename, nullfree, 0},
382
383 /* OP_RENEW = 30 */
384 {rfs4_op_renew, nullfree, 0},
385
386 /* OP_RESTOREFH = 31 */
387 {rfs4_op_restorefh, nullfree, RPC_ALL},
388
389 /* OP_SAVEFH = 32 */
390 {rfs4_op_savefh, nullfree, RPC_ALL},
391
392 /* OP_SECINFO = 33 */
393 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
394
395 /* OP_SETATTR = 34 */
396 {rfs4_op_setattr, nullfree, 0},
397
398 /* OP_SETCLIENTID = 35 */
399 {rfs4_op_setclientid, nullfree, 0},
400
401 /* OP_SETCLIENTID_CONFIRM = 36 */
402 {rfs4_op_setclientid_confirm, nullfree, 0},
403
404 /* OP_VERIFY = 37 */
405 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
406
407 /* OP_WRITE = 38 */
408 {rfs4_op_write, nullfree, 0},
409
410 /* OP_RELEASE_LOCKOWNER = 39 */
411 {rfs4_op_release_lockowner, nullfree, 0},
412 };
413
414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
415
416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
417
418 #ifdef DEBUG
419
420 int rfs4_fillone_debug = 0;
421 int rfs4_no_stub_access = 1;
422 int rfs4_rddir_debug = 0;
423
424 static char *rfs4_op_string[] = {
425 "rfs4_op_null",
426 "rfs4_op_1 unused",
427 "rfs4_op_2 unused",
428 "rfs4_op_access",
429 "rfs4_op_close",
430 "rfs4_op_commit",
431 "rfs4_op_create",
432 "rfs4_op_delegpurge",
433 "rfs4_op_delegreturn",
434 "rfs4_op_getattr",
435 "rfs4_op_getfh",
436 "rfs4_op_link",
437 "rfs4_op_lock",
438 "rfs4_op_lockt",
439 "rfs4_op_locku",
440 "rfs4_op_lookup",
441 "rfs4_op_lookupp",
442 "rfs4_op_nverify",
443 "rfs4_op_open",
444 "rfs4_op_openattr",
445 "rfs4_op_open_confirm",
446 "rfs4_op_open_downgrade",
447 "rfs4_op_putfh",
448 "rfs4_op_putpubfh",
449 "rfs4_op_putrootfh",
450 "rfs4_op_read",
451 "rfs4_op_readdir",
452 "rfs4_op_readlink",
453 "rfs4_op_remove",
454 "rfs4_op_rename",
455 "rfs4_op_renew",
456 "rfs4_op_restorefh",
457 "rfs4_op_savefh",
458 "rfs4_op_secinfo",
459 "rfs4_op_setattr",
460 "rfs4_op_setclientid",
461 "rfs4_op_setclient_confirm",
462 "rfs4_op_verify",
463 "rfs4_op_write",
464 "rfs4_op_release_lockowner",
465 "rfs4_op_illegal"
466 };
467 #endif
468
469 void rfs4_ss_chkclid(rfs4_client_t *);
470
471 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
472
473 extern void rfs4_free_fs_locations4(fs_locations4 *);
474
475 #ifdef nextdp
476 #undef nextdp
477 #endif
478 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
479
480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
481 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
482 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
483 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
484 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
485 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
486 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
487 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
488 NULL, NULL
489 };
490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
491 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
492 VOPNAME_READ, { .femop_read = deleg_wr_read },
493 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
494 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
495 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
496 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
497 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
498 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
499 NULL, NULL
500 };
501
502 int
503 rfs4_srvrinit(void)
504 {
505 timespec32_t verf;
506 int error;
507 extern void rfs4_attr_init();
508 extern krwlock_t rfs4_deleg_policy_lock;
509
510 /*
511 * The following algorithm attempts to find a unique verifier
512 * to be used as the write verifier returned from the server
513 * to the client. It is important that this verifier change
514 * whenever the server reboots. Of secondary importance, it
515 * is important for the verifier to be unique between two
516 * different servers.
517 *
518 * Thus, an attempt is made to use the system hostid and the
519 * current time in seconds when the nfssrv kernel module is
520 * loaded. It is assumed that an NFS server will not be able
521 * to boot and then to reboot in less than a second. If the
522 * hostid has not been set, then the current high resolution
523 * time is used. This will ensure different verifiers each
524 * time the server reboots and minimize the chances that two
525 * different servers will have the same verifier.
526 * XXX - this is broken on LP64 kernels.
527 */
528 verf.tv_sec = (time_t)zone_get_hostid(NULL);
529 if (verf.tv_sec != 0) {
530 verf.tv_nsec = gethrestime_sec();
531 } else {
532 timespec_t tverf;
533
534 gethrestime(&tverf);
535 verf.tv_sec = (time_t)tverf.tv_sec;
536 verf.tv_nsec = tverf.tv_nsec;
537 }
538
539 Write4verf = *(uint64_t *)&verf;
540
541 rfs4_attr_init();
542 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
543
544 /* Used to manage create/destroy of server state */
545 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
546
547 /* Used to manage access to server instance linked list */
548 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
549
550 /* Used to manage access to rfs4_deleg_policy */
551 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
552
553 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
554 if (error != 0) {
555 rfs4_disable_delegation();
556 } else {
557 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
558 &deleg_wrops);
559 if (error != 0) {
560 rfs4_disable_delegation();
561 fem_free(deleg_rdops);
562 }
563 }
564
565 nfs4_srv_caller_id = fs_new_caller_id();
566
567 lockt_sysid = lm_alloc_sysidt();
568
569 vsd_create(&nfs4_srv_vkey, NULL);
570
571 return (0);
572 }
573
574 void
575 rfs4_srvrfini(void)
576 {
577 extern krwlock_t rfs4_deleg_policy_lock;
578
579 if (lockt_sysid != LM_NOSYSID) {
580 lm_free_sysidt(lockt_sysid);
581 lockt_sysid = LM_NOSYSID;
582 }
583
584 mutex_destroy(&rfs4_deleg_lock);
585 mutex_destroy(&rfs4_state_lock);
586 rw_destroy(&rfs4_deleg_policy_lock);
587
588 fem_free(deleg_rdops);
589 fem_free(deleg_wrops);
590 }
591
592 void
593 rfs4_init_compound_state(struct compound_state *cs)
594 {
595 bzero(cs, sizeof (*cs));
596 cs->cont = TRUE;
597 cs->access = CS_ACCESS_DENIED;
598 cs->deleg = FALSE;
599 cs->mandlock = FALSE;
600 cs->fh.nfs_fh4_val = cs->fhbuf;
601 }
602
603 void
604 rfs4_grace_start(rfs4_servinst_t *sip)
605 {
606 rw_enter(&sip->rwlock, RW_WRITER);
607 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
608 sip->grace_period = rfs4_grace_period;
609 rw_exit(&sip->rwlock);
610 }
611
612 /*
613 * returns true if the instance's grace period has never been started
614 */
615 int
616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
617 {
618 time_t start_time;
619
620 rw_enter(&sip->rwlock, RW_READER);
621 start_time = sip->start_time;
622 rw_exit(&sip->rwlock);
623
624 return (start_time == 0);
625 }
626
627 /*
628 * Indicates if server instance is within the
629 * grace period.
630 */
631 int
632 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
633 {
634 time_t grace_expiry;
635
636 rw_enter(&sip->rwlock, RW_READER);
637 grace_expiry = sip->start_time + sip->grace_period;
638 rw_exit(&sip->rwlock);
639
640 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
641 }
642
643 int
644 rfs4_clnt_in_grace(rfs4_client_t *cp)
645 {
646 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
647
648 return (rfs4_servinst_in_grace(cp->rc_server_instance));
649 }
650
651 /*
652 * reset all currently active grace periods
653 */
654 void
655 rfs4_grace_reset_all(void)
656 {
657 rfs4_servinst_t *sip;
658
659 mutex_enter(&rfs4_servinst_lock);
660 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
661 if (rfs4_servinst_in_grace(sip))
662 rfs4_grace_start(sip);
663 mutex_exit(&rfs4_servinst_lock);
664 }
665
666 /*
667 * start any new instances' grace periods
668 */
669 void
670 rfs4_grace_start_new(void)
671 {
672 rfs4_servinst_t *sip;
673
674 mutex_enter(&rfs4_servinst_lock);
675 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
676 if (rfs4_servinst_grace_new(sip))
677 rfs4_grace_start(sip);
678 mutex_exit(&rfs4_servinst_lock);
679 }
680
681 static rfs4_dss_path_t *
682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
683 {
684 size_t len;
685 rfs4_dss_path_t *dss_path;
686
687 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
688
689 /*
690 * Take a copy of the string, since the original may be overwritten.
691 * Sadly, no strdup() in the kernel.
692 */
693 /* allow for NUL */
694 len = strlen(path) + 1;
695 dss_path->path = kmem_alloc(len, KM_SLEEP);
696 (void) strlcpy(dss_path->path, path, len);
697
698 /* associate with servinst */
699 dss_path->sip = sip;
700 dss_path->index = index;
701
702 /*
703 * Add to list of served paths.
704 * No locking required, as we're only ever called at startup.
705 */
706 if (rfs4_dss_pathlist == NULL) {
707 /* this is the first dss_path_t */
708
709 /* needed for insque/remque */
710 dss_path->next = dss_path->prev = dss_path;
711
712 rfs4_dss_pathlist = dss_path;
713 } else {
714 insque(dss_path, rfs4_dss_pathlist);
715 }
716
717 return (dss_path);
718 }
719
720 /*
721 * Create a new server instance, and make it the currently active instance.
722 * Note that starting the grace period too early will reduce the clients'
723 * recovery window.
724 */
725 void
726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
727 {
728 unsigned i;
729 rfs4_servinst_t *sip;
730 rfs4_oldstate_t *oldstate;
731
732 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
733 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
734
735 sip->start_time = (time_t)0;
736 sip->grace_period = (time_t)0;
737 sip->next = NULL;
738 sip->prev = NULL;
739
740 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
741 /*
742 * This initial dummy entry is required to setup for insque/remque.
743 * It must be skipped over whenever the list is traversed.
744 */
745 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
746 /* insque/remque require initial list entry to be self-terminated */
747 oldstate->next = oldstate;
748 oldstate->prev = oldstate;
749 sip->oldstate = oldstate;
750
751
752 sip->dss_npaths = dss_npaths;
753 sip->dss_paths = kmem_alloc(dss_npaths *
754 sizeof (rfs4_dss_path_t *), KM_SLEEP);
755
756 for (i = 0; i < dss_npaths; i++) {
757 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
758 }
759
760 mutex_enter(&rfs4_servinst_lock);
761 if (rfs4_cur_servinst != NULL) {
762 /* add to linked list */
763 sip->prev = rfs4_cur_servinst;
764 rfs4_cur_servinst->next = sip;
765 }
766 if (start_grace)
767 rfs4_grace_start(sip);
768 /* make the new instance "current" */
769 rfs4_cur_servinst = sip;
770
771 mutex_exit(&rfs4_servinst_lock);
772 }
773
774 /*
775 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
776 * all instances directly.
777 */
778 void
779 rfs4_servinst_destroy_all(void)
780 {
781 rfs4_servinst_t *sip, *prev, *current;
782 #ifdef DEBUG
783 int n = 0;
784 #endif
785
786 mutex_enter(&rfs4_servinst_lock);
787 ASSERT(rfs4_cur_servinst != NULL);
788 current = rfs4_cur_servinst;
789 rfs4_cur_servinst = NULL;
790 for (sip = current; sip != NULL; sip = prev) {
791 prev = sip->prev;
792 rw_destroy(&sip->rwlock);
793 if (sip->oldstate)
794 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
795 if (sip->dss_paths)
796 kmem_free(sip->dss_paths,
797 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
798 kmem_free(sip, sizeof (rfs4_servinst_t));
799 #ifdef DEBUG
800 n++;
801 #endif
802 }
803 mutex_exit(&rfs4_servinst_lock);
804 }
805
806 /*
807 * Assign the current server instance to a client_t.
808 * Should be called with cp->rc_dbe held.
809 */
810 void
811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
812 {
813 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
814
815 /*
816 * The lock ensures that if the current instance is in the process
817 * of changing, we will see the new one.
818 */
819 mutex_enter(&rfs4_servinst_lock);
820 cp->rc_server_instance = sip;
821 mutex_exit(&rfs4_servinst_lock);
822 }
823
824 rfs4_servinst_t *
825 rfs4_servinst(rfs4_client_t *cp)
826 {
827 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
828
829 return (cp->rc_server_instance);
830 }
831
832 /* ARGSUSED */
833 static void
834 nullfree(caddr_t resop)
835 {
836 }
837
838 /*
839 * This is a fall-through for invalid or not implemented (yet) ops
840 */
841 /* ARGSUSED */
842 static void
843 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
844 struct compound_state *cs)
845 {
846 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
847 }
848
849 /*
850 * Check if the security flavor, nfsnum, is in the flavor_list.
851 */
852 bool_t
853 in_flavor_list(int nfsnum, int *flavor_list, int count)
854 {
855 int i;
856
857 for (i = 0; i < count; i++) {
858 if (nfsnum == flavor_list[i])
859 return (TRUE);
860 }
861 return (FALSE);
862 }
863
864 /*
865 * Used by rfs4_op_secinfo to get the security information from the
866 * export structure associated with the component.
867 */
868 /* ARGSUSED */
869 static nfsstat4
870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
871 {
872 int error, different_export = 0;
873 vnode_t *dvp, *vp;
874 struct exportinfo *exi = NULL;
875 fid_t fid;
876 uint_t count, i;
877 secinfo4 *resok_val;
878 struct secinfo *secp;
879 seconfig_t *si;
880 bool_t did_traverse = FALSE;
881 int dotdot, walk;
882
883 dvp = cs->vp;
884 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
885
886 /*
887 * If dotdotting, then need to check whether it's above the
888 * root of a filesystem, or above an export point.
889 */
890 if (dotdot) {
891
892 /*
893 * If dotdotting at the root of a filesystem, then
894 * need to traverse back to the mounted-on filesystem
895 * and do the dotdot lookup there.
896 */
897 if (cs->vp->v_flag & VROOT) {
898
899 /*
900 * If at the system root, then can
901 * go up no further.
902 */
903 if (VN_CMP(dvp, rootdir))
904 return (puterrno4(ENOENT));
905
906 /*
907 * Traverse back to the mounted-on filesystem
908 */
909 dvp = untraverse(cs->vp);
910
911 /*
912 * Set the different_export flag so we remember
913 * to pick up a new exportinfo entry for
914 * this new filesystem.
915 */
916 different_export = 1;
917 } else {
918
919 /*
920 * If dotdotting above an export point then set
921 * the different_export to get new export info.
922 */
923 different_export = nfs_exported(cs->exi, cs->vp);
924 }
925 }
926
927 /*
928 * Get the vnode for the component "nm".
929 */
930 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
931 NULL, NULL, NULL);
932 if (error)
933 return (puterrno4(error));
934
935 /*
936 * If the vnode is in a pseudo filesystem, or if the security flavor
937 * used in the request is valid but not an explicitly shared flavor,
938 * or the access bit indicates that this is a limited access,
939 * check whether this vnode is visible.
940 */
941 if (!different_export &&
942 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
943 cs->access & CS_ACCESS_LIMITED)) {
944 if (! nfs_visible(cs->exi, vp, &different_export)) {
945 VN_RELE(vp);
946 return (puterrno4(ENOENT));
947 }
948 }
949
950 /*
951 * If it's a mountpoint, then traverse it.
952 */
953 if (vn_ismntpt(vp)) {
954 if ((error = traverse(&vp)) != 0) {
955 VN_RELE(vp);
956 return (puterrno4(error));
957 }
958 /* remember that we had to traverse mountpoint */
959 did_traverse = TRUE;
960 different_export = 1;
961 } else if (vp->v_vfsp != dvp->v_vfsp) {
962 /*
963 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
964 * then vp is probably an LOFS object. We don't need the
965 * realvp, we just need to know that we might have crossed
966 * a server fs boundary and need to call checkexport4.
967 * (LOFS lookup hides server fs mountpoints, and actually calls
968 * traverse)
969 */
970 different_export = 1;
971 }
972
973 /*
974 * Get the export information for it.
975 */
976 if (different_export) {
977
978 bzero(&fid, sizeof (fid));
979 fid.fid_len = MAXFIDSZ;
980 error = vop_fid_pseudo(vp, &fid);
981 if (error) {
982 VN_RELE(vp);
983 return (puterrno4(error));
984 }
985
986 if (dotdot)
987 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
988 else
989 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
990
991 if (exi == NULL) {
992 if (did_traverse == TRUE) {
993 /*
994 * If this vnode is a mounted-on vnode,
995 * but the mounted-on file system is not
996 * exported, send back the secinfo for
997 * the exported node that the mounted-on
998 * vnode lives in.
999 */
1000 exi = cs->exi;
1001 } else {
1002 VN_RELE(vp);
1003 return (puterrno4(EACCES));
1004 }
1005 }
1006 } else {
1007 exi = cs->exi;
1008 }
1009 ASSERT(exi != NULL);
1010
1011
1012 /*
1013 * Create the secinfo result based on the security information
1014 * from the exportinfo structure (exi).
1015 *
1016 * Return all flavors for a pseudo node.
1017 * For a real export node, return the flavor that the client
1018 * has access with.
1019 */
1020 ASSERT(RW_LOCK_HELD(&exported_lock));
1021 if (PSEUDO(exi)) {
1022 count = exi->exi_export.ex_seccnt; /* total sec count */
1023 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024 secp = exi->exi_export.ex_secinfo;
1025
1026 for (i = 0; i < count; i++) {
1027 si = &secp[i].s_secinfo;
1028 resok_val[i].flavor = si->sc_rpcnum;
1029 if (resok_val[i].flavor == RPCSEC_GSS) {
1030 rpcsec_gss_info *info;
1031
1032 info = &resok_val[i].flavor_info;
1033 info->qop = si->sc_qop;
1034 info->service = (rpc_gss_svc_t)si->sc_service;
1035
1036 /* get oid opaque data */
1037 info->oid.sec_oid4_len =
1038 si->sc_gss_mech_type->length;
1039 info->oid.sec_oid4_val = kmem_alloc(
1040 si->sc_gss_mech_type->length, KM_SLEEP);
1041 bcopy(
1042 si->sc_gss_mech_type->elements,
1043 info->oid.sec_oid4_val,
1044 info->oid.sec_oid4_len);
1045 }
1046 }
1047 resp->SECINFO4resok_len = count;
1048 resp->SECINFO4resok_val = resok_val;
1049 } else {
1050 int ret_cnt = 0, k = 0;
1051 int *flavor_list;
1052
1053 count = exi->exi_export.ex_seccnt; /* total sec count */
1054 secp = exi->exi_export.ex_secinfo;
1055
1056 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1057 /* find out which flavors to return */
1058 for (i = 0; i < count; i ++) {
1059 int access, flavor, perm;
1060
1061 flavor = secp[i].s_secinfo.sc_nfsnum;
1062 perm = secp[i].s_flags;
1063
1064 access = nfsauth4_secinfo_access(exi, cs->req,
1065 flavor, perm, cs->basecr);
1066
1067 if (! (access & NFSAUTH_DENIED) &&
1068 ! (access & NFSAUTH_WRONGSEC)) {
1069 flavor_list[ret_cnt] = flavor;
1070 ret_cnt++;
1071 }
1072 }
1073
1074 /* Create the returning SECINFO value */
1075 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1076
1077 for (i = 0; i < count; i++) {
1078 /*
1079 * If the flavor is in the flavor list,
1080 * fill in resok_val.
1081 */
1082 si = &secp[i].s_secinfo;
1083 if (in_flavor_list(si->sc_nfsnum,
1084 flavor_list, ret_cnt)) {
1085 resok_val[k].flavor = si->sc_rpcnum;
1086 if (resok_val[k].flavor == RPCSEC_GSS) {
1087 rpcsec_gss_info *info;
1088
1089 info = &resok_val[k].flavor_info;
1090 info->qop = si->sc_qop;
1091 info->service = (rpc_gss_svc_t)
1092 si->sc_service;
1093
1094 /* get oid opaque data */
1095 info->oid.sec_oid4_len =
1096 si->sc_gss_mech_type->length;
1097 info->oid.sec_oid4_val = kmem_alloc(
1098 si->sc_gss_mech_type->length,
1099 KM_SLEEP);
1100 bcopy(si->sc_gss_mech_type->elements,
1101 info->oid.sec_oid4_val,
1102 info->oid.sec_oid4_len);
1103 }
1104 k++;
1105 }
1106 if (k >= ret_cnt)
1107 break;
1108 }
1109 resp->SECINFO4resok_len = ret_cnt;
1110 resp->SECINFO4resok_val = resok_val;
1111 kmem_free(flavor_list, count * sizeof (int));
1112 }
1113
1114 VN_RELE(vp);
1115 return (NFS4_OK);
1116 }
1117
1118 /*
1119 * SECINFO (Operation 33): Obtain required security information on
1120 * the component name in the format of (security-mechanism-oid, qop, service)
1121 * triplets.
1122 */
1123 /* ARGSUSED */
1124 static void
1125 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1126 struct compound_state *cs)
1127 {
1128 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1129 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1130 utf8string *utfnm = &args->name;
1131 uint_t len;
1132 char *nm;
1133 struct sockaddr *ca;
1134 char *name = NULL;
1135 nfsstat4 status = NFS4_OK;
1136
1137 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1138 SECINFO4args *, args);
1139
1140 /*
1141 * Current file handle (cfh) should have been set before getting
1142 * into this function. If not, return error.
1143 */
1144 if (cs->vp == NULL) {
1145 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1146 goto out;
1147 }
1148
1149 if (cs->vp->v_type != VDIR) {
1150 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1151 goto out;
1152 }
1153
1154 /*
1155 * Verify the component name. If failed, error out, but
1156 * do not error out if the component name is a "..".
1157 * SECINFO will return its parents secinfo data for SECINFO "..".
1158 */
1159 status = utf8_dir_verify(utfnm);
1160 if (status != NFS4_OK) {
1161 if (utfnm->utf8string_len != 2 ||
1162 utfnm->utf8string_val[0] != '.' ||
1163 utfnm->utf8string_val[1] != '.') {
1164 *cs->statusp = resp->status = status;
1165 goto out;
1166 }
1167 }
1168
1169 nm = utf8_to_str(utfnm, &len, NULL);
1170 if (nm == NULL) {
1171 *cs->statusp = resp->status = NFS4ERR_INVAL;
1172 goto out;
1173 }
1174
1175 if (len > MAXNAMELEN) {
1176 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1177 kmem_free(nm, len);
1178 goto out;
1179 }
1180
1181 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1182 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1183 MAXPATHLEN + 1);
1184
1185 if (name == NULL) {
1186 *cs->statusp = resp->status = NFS4ERR_INVAL;
1187 kmem_free(nm, len);
1188 goto out;
1189 }
1190
1191
1192 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1193
1194 if (name != nm)
1195 kmem_free(name, MAXPATHLEN + 1);
1196 kmem_free(nm, len);
1197
1198 out:
1199 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1200 SECINFO4res *, resp);
1201 }
1202
1203 /*
1204 * Free SECINFO result.
1205 */
1206 /* ARGSUSED */
1207 static void
1208 rfs4_op_secinfo_free(nfs_resop4 *resop)
1209 {
1210 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1211 int count, i;
1212 secinfo4 *resok_val;
1213
1214 /* If this is not an Ok result, nothing to free. */
1215 if (resp->status != NFS4_OK) {
1216 return;
1217 }
1218
1219 count = resp->SECINFO4resok_len;
1220 resok_val = resp->SECINFO4resok_val;
1221
1222 for (i = 0; i < count; i++) {
1223 if (resok_val[i].flavor == RPCSEC_GSS) {
1224 rpcsec_gss_info *info;
1225
1226 info = &resok_val[i].flavor_info;
1227 kmem_free(info->oid.sec_oid4_val,
1228 info->oid.sec_oid4_len);
1229 }
1230 }
1231 kmem_free(resok_val, count * sizeof (secinfo4));
1232 resp->SECINFO4resok_len = 0;
1233 resp->SECINFO4resok_val = NULL;
1234 }
1235
1236 /* ARGSUSED */
1237 static void
1238 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1239 struct compound_state *cs)
1240 {
1241 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1242 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1243 int error;
1244 vnode_t *vp;
1245 struct vattr va;
1246 int checkwriteperm;
1247 cred_t *cr = cs->cr;
1248 bslabel_t *clabel, *slabel;
1249 ts_label_t *tslabel;
1250 boolean_t admin_low_client;
1251
1252 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1253 ACCESS4args *, args);
1254
1255 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1256 if (cs->access == CS_ACCESS_DENIED) {
1257 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1258 goto out;
1259 }
1260 #endif
1261 if (cs->vp == NULL) {
1262 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1263 goto out;
1264 }
1265
1266 ASSERT(cr != NULL);
1267
1268 vp = cs->vp;
1269
1270 /*
1271 * If the file system is exported read only, it is not appropriate
1272 * to check write permissions for regular files and directories.
1273 * Special files are interpreted by the client, so the underlying
1274 * permissions are sent back to the client for interpretation.
1275 */
1276 if (rdonly4(req, cs) &&
1277 (vp->v_type == VREG || vp->v_type == VDIR))
1278 checkwriteperm = 0;
1279 else
1280 checkwriteperm = 1;
1281
1282 /*
1283 * XXX
1284 * We need the mode so that we can correctly determine access
1285 * permissions relative to a mandatory lock file. Access to
1286 * mandatory lock files is denied on the server, so it might
1287 * as well be reflected to the server during the open.
1288 */
1289 va.va_mask = AT_MODE;
1290 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1291 if (error) {
1292 *cs->statusp = resp->status = puterrno4(error);
1293 goto out;
1294 }
1295 resp->access = 0;
1296 resp->supported = 0;
1297
1298 if (is_system_labeled()) {
1299 ASSERT(req->rq_label != NULL);
1300 clabel = req->rq_label;
1301 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1302 "got client label from request(1)",
1303 struct svc_req *, req);
1304 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1305 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1306 *cs->statusp = resp->status = puterrno4(EACCES);
1307 goto out;
1308 }
1309 slabel = label2bslabel(tslabel);
1310 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1311 char *, "got server label(1) for vp(2)",
1312 bslabel_t *, slabel, vnode_t *, vp);
1313
1314 admin_low_client = B_FALSE;
1315 } else
1316 admin_low_client = B_TRUE;
1317 }
1318
1319 if (args->access & ACCESS4_READ) {
1320 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1321 if (!error && !MANDLOCK(vp, va.va_mode) &&
1322 (!is_system_labeled() || admin_low_client ||
1323 bldominates(clabel, slabel)))
1324 resp->access |= ACCESS4_READ;
1325 resp->supported |= ACCESS4_READ;
1326 }
1327 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1328 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1329 if (!error && (!is_system_labeled() || admin_low_client ||
1330 bldominates(clabel, slabel)))
1331 resp->access |= ACCESS4_LOOKUP;
1332 resp->supported |= ACCESS4_LOOKUP;
1333 }
1334 if (checkwriteperm &&
1335 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1336 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1337 if (!error && !MANDLOCK(vp, va.va_mode) &&
1338 (!is_system_labeled() || admin_low_client ||
1339 blequal(clabel, slabel)))
1340 resp->access |=
1341 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1342 resp->supported |=
1343 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1344 }
1345
1346 if (checkwriteperm &&
1347 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1348 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1349 if (!error && (!is_system_labeled() || admin_low_client ||
1350 blequal(clabel, slabel)))
1351 resp->access |= ACCESS4_DELETE;
1352 resp->supported |= ACCESS4_DELETE;
1353 }
1354 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1355 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1356 if (!error && !MANDLOCK(vp, va.va_mode) &&
1357 (!is_system_labeled() || admin_low_client ||
1358 bldominates(clabel, slabel)))
1359 resp->access |= ACCESS4_EXECUTE;
1360 resp->supported |= ACCESS4_EXECUTE;
1361 }
1362
1363 if (is_system_labeled() && !admin_low_client)
1364 label_rele(tslabel);
1365
1366 *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369 ACCESS4res *, resp);
1370 }
1371
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375 struct compound_state *cs)
1376 {
1377 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379 int error;
1380 vnode_t *vp = cs->vp;
1381 cred_t *cr = cs->cr;
1382 vattr_t va;
1383
1384 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385 COMMIT4args *, args);
1386
1387 if (vp == NULL) {
1388 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389 goto out;
1390 }
1391 if (cs->access == CS_ACCESS_DENIED) {
1392 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393 goto out;
1394 }
1395
1396 if (args->offset + args->count < args->offset) {
1397 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398 goto out;
1399 }
1400
1401 va.va_mask = AT_UID;
1402 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1403
1404 /*
1405 * If we can't get the attributes, then we can't do the
1406 * right access checking. So, we'll fail the request.
1407 */
1408 if (error) {
1409 *cs->statusp = resp->status = puterrno4(error);
1410 goto out;
1411 }
1412 if (rdonly4(req, cs)) {
1413 *cs->statusp = resp->status = NFS4ERR_ROFS;
1414 goto out;
1415 }
1416
1417 if (vp->v_type != VREG) {
1418 if (vp->v_type == VDIR)
1419 resp->status = NFS4ERR_ISDIR;
1420 else
1421 resp->status = NFS4ERR_INVAL;
1422 *cs->statusp = resp->status;
1423 goto out;
1424 }
1425
1426 if (crgetuid(cr) != va.va_uid &&
1427 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428 *cs->statusp = resp->status = puterrno4(error);
1429 goto out;
1430 }
1431
1432 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433
1434 if (error) {
1435 *cs->statusp = resp->status = puterrno4(error);
1436 goto out;
1437 }
1438
1439 *cs->statusp = resp->status = NFS4_OK;
1440 resp->writeverf = Write4verf;
1441 out:
1442 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443 COMMIT4res *, resp);
1444 }
1445
1446 /*
1447 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448 * was completed. It does the nfsv4 create for special files.
1449 */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453 struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455 int error;
1456 cred_t *cr = cs->cr;
1457 vnode_t *dvp = cs->vp;
1458 vnode_t *vp = NULL;
1459 int mode;
1460 enum vcexcl excl;
1461
1462 switch (args->type) {
1463 case NF4CHR:
1464 case NF4BLK:
1465 if (secpolicy_sys_devices(cr) != 0) {
1466 *cs->statusp = resp->status = NFS4ERR_PERM;
1467 return (NULL);
1468 }
1469 if (args->type == NF4CHR)
1470 vap->va_type = VCHR;
1471 else
1472 vap->va_type = VBLK;
1473 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1474 args->ftype4_u.devdata.specdata2);
1475 vap->va_mask |= AT_RDEV;
1476 break;
1477 case NF4SOCK:
1478 vap->va_type = VSOCK;
1479 break;
1480 case NF4FIFO:
1481 vap->va_type = VFIFO;
1482 break;
1483 default:
1484 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1485 return (NULL);
1486 }
1487
1488 /*
1489 * Must specify the mode.
1490 */
1491 if (!(vap->va_mask & AT_MODE)) {
1492 *cs->statusp = resp->status = NFS4ERR_INVAL;
1493 return (NULL);
1494 }
1495
1496 excl = EXCL;
1497
1498 mode = 0;
1499
1500 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1501 if (error) {
1502 *cs->statusp = resp->status = puterrno4(error);
1503 return (NULL);
1504 }
1505 return (vp);
1506 }
1507
1508 /*
1509 * nfsv4 create is used to create non-regular files. For regular files,
1510 * use nfsv4 open.
1511 */
1512 /* ARGSUSED */
1513 static void
1514 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1515 struct compound_state *cs)
1516 {
1517 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1518 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1519 int error;
1520 struct vattr bva, iva, iva2, ava, *vap;
1521 cred_t *cr = cs->cr;
1522 vnode_t *dvp = cs->vp;
1523 vnode_t *vp = NULL;
1524 vnode_t *realvp;
1525 char *nm, *lnm;
1526 uint_t len, llen;
1527 int syncval = 0;
1528 struct nfs4_svgetit_arg sarg;
1529 struct nfs4_ntov_table ntov;
1530 struct statvfs64 sb;
1531 nfsstat4 status;
1532 struct sockaddr *ca;
1533 char *name = NULL;
1534 char *lname = NULL;
1535
1536 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1537 CREATE4args *, args);
1538
1539 resp->attrset = 0;
1540
1541 if (dvp == NULL) {
1542 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1543 goto out;
1544 }
1545
1546 /*
1547 * If there is an unshared filesystem mounted on this vnode,
1548 * do not allow to create an object in this directory.
1549 */
1550 if (vn_ismntpt(dvp)) {
1551 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1552 goto out;
1553 }
1554
1555 /* Verify that type is correct */
1556 switch (args->type) {
1557 case NF4LNK:
1558 case NF4BLK:
1559 case NF4CHR:
1560 case NF4SOCK:
1561 case NF4FIFO:
1562 case NF4DIR:
1563 break;
1564 default:
1565 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1566 goto out;
1567 };
1568
1569 if (cs->access == CS_ACCESS_DENIED) {
1570 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1571 goto out;
1572 }
1573 if (dvp->v_type != VDIR) {
1574 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1575 goto out;
1576 }
1577 status = utf8_dir_verify(&args->objname);
1578 if (status != NFS4_OK) {
1579 *cs->statusp = resp->status = status;
1580 goto out;
1581 }
1582
1583 if (rdonly4(req, cs)) {
1584 *cs->statusp = resp->status = NFS4ERR_ROFS;
1585 goto out;
1586 }
1587
1588 /*
1589 * Name of newly created object
1590 */
1591 nm = utf8_to_fn(&args->objname, &len, NULL);
1592 if (nm == NULL) {
1593 *cs->statusp = resp->status = NFS4ERR_INVAL;
1594 goto out;
1595 }
1596
1597 if (len > MAXNAMELEN) {
1598 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1599 kmem_free(nm, len);
1600 goto out;
1601 }
1602
1603 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1604 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1605 MAXPATHLEN + 1);
1606
1607 if (name == NULL) {
1608 *cs->statusp = resp->status = NFS4ERR_INVAL;
1609 kmem_free(nm, len);
1610 goto out;
1611 }
1612
1613 resp->attrset = 0;
1614
1615 sarg.sbp = &sb;
1616 sarg.is_referral = B_FALSE;
1617 nfs4_ntov_table_init(&ntov);
1618
1619 status = do_rfs4_set_attrs(&resp->attrset,
1620 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1621
1622 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1623 status = NFS4ERR_INVAL;
1624
1625 if (status != NFS4_OK) {
1626 *cs->statusp = resp->status = status;
1627 if (name != nm)
1628 kmem_free(name, MAXPATHLEN + 1);
1629 kmem_free(nm, len);
1630 nfs4_ntov_table_free(&ntov, &sarg);
1631 resp->attrset = 0;
1632 goto out;
1633 }
1634
1635 /* Get "before" change value */
1636 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1637 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1638 if (error) {
1639 *cs->statusp = resp->status = puterrno4(error);
1640 if (name != nm)
1641 kmem_free(name, MAXPATHLEN + 1);
1642 kmem_free(nm, len);
1643 nfs4_ntov_table_free(&ntov, &sarg);
1644 resp->attrset = 0;
1645 goto out;
1646 }
1647 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1648
1649 vap = sarg.vap;
1650
1651 /*
1652 * Set the default initial values for attributes when the parent
1653 * directory does not have the VSUID/VSGID bit set and they have
1654 * not been specified in createattrs.
1655 */
1656 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1657 vap->va_uid = crgetuid(cr);
1658 vap->va_mask |= AT_UID;
1659 }
1660 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1661 vap->va_gid = crgetgid(cr);
1662 vap->va_mask |= AT_GID;
1663 }
1664
1665 vap->va_mask |= AT_TYPE;
1666 switch (args->type) {
1667 case NF4DIR:
1668 vap->va_type = VDIR;
1669 if ((vap->va_mask & AT_MODE) == 0) {
1670 vap->va_mode = 0700; /* default: owner rwx only */
1671 vap->va_mask |= AT_MODE;
1672 }
1673 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1674 if (error)
1675 break;
1676
1677 /*
1678 * Get the initial "after" sequence number, if it fails,
1679 * set to zero
1680 */
1681 iva.va_mask = AT_SEQ;
1682 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1683 iva.va_seq = 0;
1684 break;
1685 case NF4LNK:
1686 vap->va_type = VLNK;
1687 if ((vap->va_mask & AT_MODE) == 0) {
1688 vap->va_mode = 0700; /* default: owner rwx only */
1689 vap->va_mask |= AT_MODE;
1690 }
1691
1692 /*
1693 * symlink names must be treated as data
1694 */
1695 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1696 &llen, NULL);
1697
1698 if (lnm == NULL) {
1699 *cs->statusp = resp->status = NFS4ERR_INVAL;
1700 if (name != nm)
1701 kmem_free(name, MAXPATHLEN + 1);
1702 kmem_free(nm, len);
1703 nfs4_ntov_table_free(&ntov, &sarg);
1704 resp->attrset = 0;
1705 goto out;
1706 }
1707
1708 if (llen > MAXPATHLEN) {
1709 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710 if (name != nm)
1711 kmem_free(name, MAXPATHLEN + 1);
1712 kmem_free(nm, len);
1713 kmem_free(lnm, llen);
1714 nfs4_ntov_table_free(&ntov, &sarg);
1715 resp->attrset = 0;
1716 goto out;
1717 }
1718
1719 lname = nfscmd_convname(ca, cs->exi, lnm,
1720 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1721
1722 if (lname == NULL) {
1723 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724 if (name != nm)
1725 kmem_free(name, MAXPATHLEN + 1);
1726 kmem_free(nm, len);
1727 kmem_free(lnm, llen);
1728 nfs4_ntov_table_free(&ntov, &sarg);
1729 resp->attrset = 0;
1730 goto out;
1731 }
1732
1733 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734 if (lname != lnm)
1735 kmem_free(lname, MAXPATHLEN + 1);
1736 kmem_free(lnm, llen);
1737 if (error)
1738 break;
1739
1740 /*
1741 * Get the initial "after" sequence number, if it fails,
1742 * set to zero
1743 */
1744 iva.va_mask = AT_SEQ;
1745 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746 iva.va_seq = 0;
1747
1748 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749 NULL, NULL, NULL);
1750 if (error)
1751 break;
1752
1753 /*
1754 * va_seq is not safe over VOP calls, check it again
1755 * if it has changed zero out iva to force atomic = FALSE.
1756 */
1757 iva2.va_mask = AT_SEQ;
1758 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759 iva2.va_seq != iva.va_seq)
1760 iva.va_seq = 0;
1761 break;
1762 default:
1763 /*
1764 * probably a special file.
1765 */
1766 if ((vap->va_mask & AT_MODE) == 0) {
1767 vap->va_mode = 0600; /* default: owner rw only */
1768 vap->va_mask |= AT_MODE;
1769 }
1770 syncval = FNODSYNC;
1771 /*
1772 * We know this will only generate one VOP call
1773 */
1774 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775
1776 if (vp == NULL) {
1777 if (name != nm)
1778 kmem_free(name, MAXPATHLEN + 1);
1779 kmem_free(nm, len);
1780 nfs4_ntov_table_free(&ntov, &sarg);
1781 resp->attrset = 0;
1782 goto out;
1783 }
1784
1785 /*
1786 * Get the initial "after" sequence number, if it fails,
1787 * set to zero
1788 */
1789 iva.va_mask = AT_SEQ;
1790 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791 iva.va_seq = 0;
1792
1793 break;
1794 }
1795 if (name != nm)
1796 kmem_free(name, MAXPATHLEN + 1);
1797 kmem_free(nm, len);
1798
1799 if (error) {
1800 *cs->statusp = resp->status = puterrno4(error);
1801 }
1802
1803 /*
1804 * Force modified data and metadata out to stable storage.
1805 */
1806 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807
1808 if (resp->status != NFS4_OK) {
1809 if (vp != NULL)
1810 VN_RELE(vp);
1811 nfs4_ntov_table_free(&ntov, &sarg);
1812 resp->attrset = 0;
1813 goto out;
1814 }
1815
1816 /*
1817 * Finish setup of cinfo response, "before" value already set.
1818 * Get "after" change value, if it fails, simply return the
1819 * before value.
1820 */
1821 ava.va_mask = AT_CTIME|AT_SEQ;
1822 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823 ava.va_ctime = bva.va_ctime;
1824 ava.va_seq = 0;
1825 }
1826 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827
1828 /*
1829 * True verification that object was created with correct
1830 * attrs is impossible. The attrs could have been changed
1831 * immediately after object creation. If attributes did
1832 * not verify, the only recourse for the server is to
1833 * destroy the object. Maybe if some attrs (like gid)
1834 * are set incorrectly, the object should be destroyed;
1835 * however, seems bad as a default policy. Do we really
1836 * want to destroy an object over one of the times not
1837 * verifying correctly? For these reasons, the server
1838 * currently sets bits in attrset for createattrs
1839 * that were set; however, no verification is done.
1840 *
1841 * vmask_to_nmask accounts for vattr bits set on create
1842 * [do_rfs4_set_attrs() only sets resp bits for
1843 * non-vattr/vfs bits.]
1844 * Mask off any bits set by default so as not to return
1845 * more attrset bits than were requested in createattrs
1846 */
1847 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848 resp->attrset &= args->createattrs.attrmask;
1849 nfs4_ntov_table_free(&ntov, &sarg);
1850
1851 error = makefh4(&cs->fh, vp, cs->exi);
1852 if (error) {
1853 *cs->statusp = resp->status = puterrno4(error);
1854 }
1855
1856 /*
1857 * The cinfo.atomic = TRUE only if we got no errors, we have
1858 * non-zero va_seq's, and it has incremented by exactly one
1859 * during the creation and it didn't change during the VOP_LOOKUP
1860 * or VOP_FSYNC.
1861 */
1862 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864 resp->cinfo.atomic = TRUE;
1865 else
1866 resp->cinfo.atomic = FALSE;
1867
1868 /*
1869 * Force modified metadata out to stable storage.
1870 *
1871 * if a underlying vp exists, pass it to VOP_FSYNC
1872 */
1873 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875 else
1876 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877
1878 if (resp->status != NFS4_OK) {
1879 VN_RELE(vp);
1880 goto out;
1881 }
1882 if (cs->vp)
1883 VN_RELE(cs->vp);
1884
1885 cs->vp = vp;
1886 *cs->statusp = resp->status = NFS4_OK;
1887 out:
1888 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889 CREATE4res *, resp);
1890 }
1891
1892 /*ARGSUSED*/
1893 static void
1894 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895 struct compound_state *cs)
1896 {
1897 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899
1900 rfs4_op_inval(argop, resop, req, cs);
1901
1902 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 }
1905
1906 /*ARGSUSED*/
1907 static void
1908 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909 struct compound_state *cs)
1910 {
1911 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913 rfs4_deleg_state_t *dsp;
1914 nfsstat4 status;
1915
1916 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917 DELEGRETURN4args *, args);
1918
1919 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920 resp->status = *cs->statusp = status;
1921 if (status != NFS4_OK)
1922 goto out;
1923
1924 /* Ensure specified filehandle matches */
1925 if (cs->vp != dsp->rds_finfo->rf_vp) {
1926 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927 } else
1928 rfs4_return_deleg(dsp, FALSE);
1929
1930 rfs4_update_lease(dsp->rds_client);
1931
1932 rfs4_deleg_state_rele(dsp);
1933 out:
1934 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935 DELEGRETURN4res *, resp);
1936 }
1937
1938 /*
1939 * Check to see if a given "flavor" is an explicitly shared flavor.
1940 * The assumption of this routine is the "flavor" is already a valid
1941 * flavor in the secinfo list of "exi".
1942 *
1943 * e.g.
1944 * # share -o sec=flavor1 /export
1945 * # share -o sec=flavor2 /export/home
1946 *
1947 * flavor2 is not an explicitly shared flavor for /export,
1948 * however it is in the secinfo list for /export thru the
1949 * server namespace setup.
1950 */
1951 int
1952 is_exported_sec(int flavor, struct exportinfo *exi)
1953 {
1954 int i;
1955 struct secinfo *sp;
1956
1957 sp = exi->exi_export.ex_secinfo;
1958 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961 return (SEC_REF_EXPORTED(&sp[i]));
1962 }
1963 }
1964
1965 /* Should not reach this point based on the assumption */
1966 return (0);
1967 }
1968
1969 /*
1970 * Check if the security flavor used in the request matches what is
1971 * required at the export point or at the root pseudo node (exi_root).
1972 *
1973 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974 *
1975 */
1976 static int
1977 secinfo_match_or_authnone(struct compound_state *cs)
1978 {
1979 int i;
1980 struct secinfo *sp;
1981
1982 /*
1983 * Check cs->nfsflavor (from the request) against
1984 * the current export data in cs->exi.
1985 */
1986 sp = cs->exi->exi_export.ex_secinfo;
1987 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990 return (1);
1991 }
1992
1993 return (0);
1994 }
1995
1996 /*
1997 * Check the access authority for the client and return the correct error.
1998 */
1999 nfsstat4
2000 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 {
2002 int authres;
2003
2004 /*
2005 * First, check if the security flavor used in the request
2006 * are among the flavors set in the server namespace.
2007 */
2008 if (!secinfo_match_or_authnone(cs)) {
2009 *cs->statusp = NFS4ERR_WRONGSEC;
2010 return (*cs->statusp);
2011 }
2012
2013 authres = checkauth4(cs, req);
2014
2015 if (authres > 0) {
2016 *cs->statusp = NFS4_OK;
2017 if (! (cs->access & CS_ACCESS_LIMITED))
2018 cs->access = CS_ACCESS_OK;
2019 } else if (authres == 0) {
2020 *cs->statusp = NFS4ERR_ACCESS;
2021 } else if (authres == -2) {
2022 *cs->statusp = NFS4ERR_WRONGSEC;
2023 } else {
2024 *cs->statusp = NFS4ERR_DELAY;
2025 }
2026 return (*cs->statusp);
2027 }
2028
2029 /*
2030 * bitmap4_to_attrmask is called by getattr and readdir.
2031 * It sets up the vattr mask and determines whether vfsstat call is needed
2032 * based on the input bitmap.
2033 * Returns nfsv4 status.
2034 */
2035 static nfsstat4
2036 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 {
2038 int i;
2039 uint_t va_mask;
2040 struct statvfs64 *sbp = sargp->sbp;
2041
2042 sargp->sbp = NULL;
2043 sargp->flag = 0;
2044 sargp->rdattr_error = NFS4_OK;
2045 sargp->mntdfid_set = FALSE;
2046 if (sargp->cs->vp)
2047 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048 FH4_ATTRDIR | FH4_NAMEDATTR);
2049 else
2050 sargp->xattr = 0;
2051
2052 /*
2053 * Set rdattr_error_req to true if return error per
2054 * failed entry rather than fail the readdir.
2055 */
2056 if (breq & FATTR4_RDATTR_ERROR_MASK)
2057 sargp->rdattr_error_req = 1;
2058 else
2059 sargp->rdattr_error_req = 0;
2060
2061 /*
2062 * generate the va_mask
2063 * Handle the easy cases first
2064 */
2065 switch (breq) {
2066 case NFS4_NTOV_ATTR_MASK:
2067 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068 return (NFS4_OK);
2069
2070 case NFS4_FS_ATTR_MASK:
2071 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072 sargp->sbp = sbp;
2073 return (NFS4_OK);
2074
2075 case NFS4_NTOV_ATTR_CACHE_MASK:
2076 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077 return (NFS4_OK);
2078
2079 case FATTR4_LEASE_TIME_MASK:
2080 sargp->vap->va_mask = 0;
2081 return (NFS4_OK);
2082
2083 default:
2084 va_mask = 0;
2085 for (i = 0; i < nfs4_ntov_map_size; i++) {
2086 if ((breq & nfs4_ntov_map[i].fbit) &&
2087 nfs4_ntov_map[i].vbit)
2088 va_mask |= nfs4_ntov_map[i].vbit;
2089 }
2090
2091 /*
2092 * Check is vfsstat is needed
2093 */
2094 if (breq & NFS4_FS_ATTR_MASK)
2095 sargp->sbp = sbp;
2096
2097 sargp->vap->va_mask = va_mask;
2098 return (NFS4_OK);
2099 }
2100 /* NOTREACHED */
2101 }
2102
2103 /*
2104 * bitmap4_get_sysattrs is called by getattr and readdir.
2105 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106 * Returns nfsv4 status.
2107 */
2108 static nfsstat4
2109 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 {
2111 int error;
2112 struct compound_state *cs = sargp->cs;
2113 vnode_t *vp = cs->vp;
2114
2115 if (sargp->sbp != NULL) {
2116 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117 sargp->sbp = NULL; /* to identify error */
2118 return (puterrno4(error));
2119 }
2120 }
2121
2122 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 }
2124
2125 static void
2126 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 {
2128 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129 KM_SLEEP);
2130 ntovp->attrcnt = 0;
2131 ntovp->vfsstat = FALSE;
2132 }
2133
2134 static void
2135 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136 struct nfs4_svgetit_arg *sargp)
2137 {
2138 int i;
2139 union nfs4_attr_u *na;
2140 uint8_t *amap;
2141
2142 /*
2143 * XXX Should do the same checks for whether the bit is set
2144 */
2145 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146 i < ntovp->attrcnt; i++, na++, amap++) {
2147 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148 NFS4ATTR_FREEIT, sargp, na);
2149 }
2150 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151 /*
2152 * xdr_free for getattr will be done later
2153 */
2154 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155 i < ntovp->attrcnt; i++, na++, amap++) {
2156 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157 }
2158 }
2159 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 }
2161
2162 /*
2163 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164 */
2165 static nfsstat4
2166 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167 struct nfs4_svgetit_arg *sargp)
2168 {
2169 int error = 0;
2170 int i, k;
2171 struct nfs4_ntov_table ntov;
2172 XDR xdr;
2173 ulong_t xdr_size;
2174 char *xdr_attrs;
2175 nfsstat4 status = NFS4_OK;
2176 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177 union nfs4_attr_u *na;
2178 uint8_t *amap;
2179
2180 sargp->op = NFS4ATTR_GETIT;
2181 sargp->flag = 0;
2182
2183 fattrp->attrmask = 0;
2184 /* if no bits requested, then return empty fattr4 */
2185 if (breq == 0) {
2186 fattrp->attrlist4_len = 0;
2187 fattrp->attrlist4 = NULL;
2188 return (NFS4_OK);
2189 }
2190
2191 /*
2192 * return NFS4ERR_INVAL when client requests write-only attrs
2193 */
2194 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195 return (NFS4ERR_INVAL);
2196
2197 nfs4_ntov_table_init(&ntov);
2198 na = ntov.na;
2199 amap = ntov.amap;
2200
2201 /*
2202 * Now loop to get or verify the attrs
2203 */
2204 for (i = 0; i < nfs4_ntov_map_size; i++) {
2205 if (breq & nfs4_ntov_map[i].fbit) {
2206 if ((*nfs4_ntov_map[i].sv_getit)(
2207 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208
2209 error = (*nfs4_ntov_map[i].sv_getit)(
2210 NFS4ATTR_GETIT, sargp, na);
2211
2212 /*
2213 * Possible error values:
2214 * >0 if sv_getit failed to
2215 * get the attr; 0 if succeeded;
2216 * <0 if rdattr_error and the
2217 * attribute cannot be returned.
2218 */
2219 if (error && !(sargp->rdattr_error_req))
2220 goto done;
2221 /*
2222 * If error then just for entry
2223 */
2224 if (error == 0) {
2225 fattrp->attrmask |=
2226 nfs4_ntov_map[i].fbit;
2227 *amap++ =
2228 (uint8_t)nfs4_ntov_map[i].nval;
2229 na++;
2230 (ntov.attrcnt)++;
2231 } else if ((error > 0) &&
2232 (sargp->rdattr_error == NFS4_OK)) {
2233 sargp->rdattr_error = puterrno4(error);
2234 }
2235 error = 0;
2236 }
2237 }
2238 }
2239
2240 /*
2241 * If rdattr_error was set after the return value for it was assigned,
2242 * update it.
2243 */
2244 if (prev_rdattr_error != sargp->rdattr_error) {
2245 na = ntov.na;
2246 amap = ntov.amap;
2247 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248 k = *amap;
2249 if (k < FATTR4_RDATTR_ERROR) {
2250 continue;
2251 }
2252 if ((k == FATTR4_RDATTR_ERROR) &&
2253 ((*nfs4_ntov_map[k].sv_getit)(
2254 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255
2256 (void) (*nfs4_ntov_map[k].sv_getit)(
2257 NFS4ATTR_GETIT, sargp, na);
2258 }
2259 break;
2260 }
2261 }
2262
2263 xdr_size = 0;
2264 na = ntov.na;
2265 amap = ntov.amap;
2266 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268 }
2269
2270 fattrp->attrlist4_len = xdr_size;
2271 if (xdr_size) {
2272 /* freed by rfs4_op_getattr_free() */
2273 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274
2275 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276
2277 na = ntov.na;
2278 amap = ntov.amap;
2279 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282 int, *amap);
2283 status = NFS4ERR_SERVERFAULT;
2284 break;
2285 }
2286 }
2287 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2288 } else {
2289 fattrp->attrlist4 = NULL;
2290 }
2291 done:
2292
2293 nfs4_ntov_table_free(&ntov, sargp);
2294
2295 if (error != 0)
2296 status = puterrno4(error);
2297
2298 return (status);
2299 }
2300
2301 /* ARGSUSED */
2302 static void
2303 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304 struct compound_state *cs)
2305 {
2306 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308 struct nfs4_svgetit_arg sarg;
2309 struct statvfs64 sb;
2310 nfsstat4 status;
2311
2312 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313 GETATTR4args *, args);
2314
2315 if (cs->vp == NULL) {
2316 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317 goto out;
2318 }
2319
2320 if (cs->access == CS_ACCESS_DENIED) {
2321 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322 goto out;
2323 }
2324
2325 sarg.sbp = &sb;
2326 sarg.cs = cs;
2327 sarg.is_referral = B_FALSE;
2328
2329 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330 if (status == NFS4_OK) {
2331
2332 status = bitmap4_get_sysattrs(&sarg);
2333 if (status == NFS4_OK) {
2334
2335 /* Is this a referral? */
2336 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337 /* Older V4 Solaris client sees a link */
2338 if (client_is_downrev(req))
2339 sarg.vap->va_type = VLNK;
2340 else
2341 sarg.is_referral = B_TRUE;
2342 }
2343
2344 status = do_rfs4_op_getattr(args->attr_request,
2345 &resp->obj_attributes, &sarg);
2346 }
2347 }
2348 *cs->statusp = resp->status = status;
2349 out:
2350 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351 GETATTR4res *, resp);
2352 }
2353
2354 static void
2355 rfs4_op_getattr_free(nfs_resop4 *resop)
2356 {
2357 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358
2359 nfs4_fattr4_free(&resp->obj_attributes);
2360 }
2361
2362 /* ARGSUSED */
2363 static void
2364 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365 struct compound_state *cs)
2366 {
2367 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368
2369 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370
2371 if (cs->vp == NULL) {
2372 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373 goto out;
2374 }
2375 if (cs->access == CS_ACCESS_DENIED) {
2376 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377 goto out;
2378 }
2379
2380 /* check for reparse point at the share point */
2381 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382 /* it's all bad */
2383 cs->exi->exi_moved = 1;
2384 *cs->statusp = resp->status = NFS4ERR_MOVED;
2385 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387 return;
2388 }
2389
2390 /* check for reparse point at vp */
2391 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392 /* it's not all bad */
2393 *cs->statusp = resp->status = NFS4ERR_MOVED;
2394 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396 return;
2397 }
2398
2399 resp->object.nfs_fh4_val =
2400 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401 nfs_fh4_copy(&cs->fh, &resp->object);
2402 *cs->statusp = resp->status = NFS4_OK;
2403 out:
2404 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405 GETFH4res *, resp);
2406 }
2407
2408 static void
2409 rfs4_op_getfh_free(nfs_resop4 *resop)
2410 {
2411 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412
2413 if (resp->status == NFS4_OK &&
2414 resp->object.nfs_fh4_val != NULL) {
2415 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416 resp->object.nfs_fh4_val = NULL;
2417 resp->object.nfs_fh4_len = 0;
2418 }
2419 }
2420
2421 /*
2422 * illegal: args: void
2423 * res : status (NFS4ERR_OP_ILLEGAL)
2424 */
2425 /* ARGSUSED */
2426 static void
2427 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428 struct svc_req *req, struct compound_state *cs)
2429 {
2430 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431
2432 resop->resop = OP_ILLEGAL;
2433 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 }
2435
2436 /*
2437 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438 * res: status. If success - CURRENT_FH unchanged, return change_info
2439 */
2440 /* ARGSUSED */
2441 static void
2442 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443 struct compound_state *cs)
2444 {
2445 LINK4args *args = &argop->nfs_argop4_u.oplink;
2446 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447 int error;
2448 vnode_t *vp;
2449 vnode_t *dvp;
2450 struct vattr bdva, idva, adva;
2451 char *nm;
2452 uint_t len;
2453 struct sockaddr *ca;
2454 char *name = NULL;
2455 nfsstat4 status;
2456
2457 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458 LINK4args *, args);
2459
2460 /* SAVED_FH: source object */
2461 vp = cs->saved_vp;
2462 if (vp == NULL) {
2463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464 goto out;
2465 }
2466
2467 /* CURRENT_FH: target directory */
2468 dvp = cs->vp;
2469 if (dvp == NULL) {
2470 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471 goto out;
2472 }
2473
2474 /*
2475 * If there is a non-shared filesystem mounted on this vnode,
2476 * do not allow to link any file in this directory.
2477 */
2478 if (vn_ismntpt(dvp)) {
2479 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480 goto out;
2481 }
2482
2483 if (cs->access == CS_ACCESS_DENIED) {
2484 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485 goto out;
2486 }
2487
2488 /* Check source object's type validity */
2489 if (vp->v_type == VDIR) {
2490 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491 goto out;
2492 }
2493
2494 /* Check target directory's type */
2495 if (dvp->v_type != VDIR) {
2496 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497 goto out;
2498 }
2499
2500 if (cs->saved_exi != cs->exi) {
2501 *cs->statusp = resp->status = NFS4ERR_XDEV;
2502 goto out;
2503 }
2504
2505 status = utf8_dir_verify(&args->newname);
2506 if (status != NFS4_OK) {
2507 *cs->statusp = resp->status = status;
2508 goto out;
2509 }
2510
2511 nm = utf8_to_fn(&args->newname, &len, NULL);
2512 if (nm == NULL) {
2513 *cs->statusp = resp->status = NFS4ERR_INVAL;
2514 goto out;
2515 }
2516
2517 if (len > MAXNAMELEN) {
2518 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519 kmem_free(nm, len);
2520 goto out;
2521 }
2522
2523 if (rdonly4(req, cs)) {
2524 *cs->statusp = resp->status = NFS4ERR_ROFS;
2525 kmem_free(nm, len);
2526 goto out;
2527 }
2528
2529 /* Get "before" change value */
2530 bdva.va_mask = AT_CTIME|AT_SEQ;
2531 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532 if (error) {
2533 *cs->statusp = resp->status = puterrno4(error);
2534 kmem_free(nm, len);
2535 goto out;
2536 }
2537
2538 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540 MAXPATHLEN + 1);
2541
2542 if (name == NULL) {
2543 *cs->statusp = resp->status = NFS4ERR_INVAL;
2544 kmem_free(nm, len);
2545 goto out;
2546 }
2547
2548 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549
2550 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551
2552 if (nm != name)
2553 kmem_free(name, MAXPATHLEN + 1);
2554 kmem_free(nm, len);
2555
2556 /*
2557 * Get the initial "after" sequence number, if it fails, set to zero
2558 */
2559 idva.va_mask = AT_SEQ;
2560 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561 idva.va_seq = 0;
2562
2563 /*
2564 * Force modified data and metadata out to stable storage.
2565 */
2566 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568
2569 if (error) {
2570 *cs->statusp = resp->status = puterrno4(error);
2571 goto out;
2572 }
2573
2574 /*
2575 * Get "after" change value, if it fails, simply return the
2576 * before value.
2577 */
2578 adva.va_mask = AT_CTIME|AT_SEQ;
2579 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580 adva.va_ctime = bdva.va_ctime;
2581 adva.va_seq = 0;
2582 }
2583
2584 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585
2586 /*
2587 * The cinfo.atomic = TRUE only if we have
2588 * non-zero va_seq's, and it has incremented by exactly one
2589 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590 */
2591 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593 resp->cinfo.atomic = TRUE;
2594 else
2595 resp->cinfo.atomic = FALSE;
2596
2597 *cs->statusp = resp->status = NFS4_OK;
2598 out:
2599 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600 LINK4res *, resp);
2601 }
2602
2603 /*
2604 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605 */
2606
2607 /* ARGSUSED */
2608 static nfsstat4
2609 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 {
2611 int error;
2612 int different_export = 0;
2613 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2614 struct exportinfo *exi = NULL, *pre_exi = NULL;
2615 nfsstat4 stat;
2616 fid_t fid;
2617 int attrdir, dotdot, walk;
2618 bool_t is_newvp = FALSE;
2619
2620 if (cs->vp->v_flag & V_XATTRDIR) {
2621 attrdir = 1;
2622 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623 } else {
2624 attrdir = 0;
2625 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626 }
2627
2628 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629
2630 /*
2631 * If dotdotting, then need to check whether it's
2632 * above the root of a filesystem, or above an
2633 * export point.
2634 */
2635 if (dotdot) {
2636
2637 /*
2638 * If dotdotting at the root of a filesystem, then
2639 * need to traverse back to the mounted-on filesystem
2640 * and do the dotdot lookup there.
2641 */
2642 if (cs->vp->v_flag & VROOT) {
2643
2644 /*
2645 * If at the system root, then can
2646 * go up no further.
2647 */
2648 if (VN_CMP(cs->vp, rootdir))
2649 return (puterrno4(ENOENT));
2650
2651 /*
2652 * Traverse back to the mounted-on filesystem
2653 */
2654 cs->vp = untraverse(cs->vp);
2655
2656 /*
2657 * Set the different_export flag so we remember
2658 * to pick up a new exportinfo entry for
2659 * this new filesystem.
2660 */
2661 different_export = 1;
2662 } else {
2663
2664 /*
2665 * If dotdotting above an export point then set
2666 * the different_export to get new export info.
2667 */
2668 different_export = nfs_exported(cs->exi, cs->vp);
2669 }
2670 }
2671
2672 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673 NULL, NULL, NULL);
2674 if (error)
2675 return (puterrno4(error));
2676
2677 /*
2678 * If the vnode is in a pseudo filesystem, check whether it is visible.
2679 *
2680 * XXX if the vnode is a symlink and it is not visible in
2681 * a pseudo filesystem, return ENOENT (not following symlink).
2682 * V4 client can not mount such symlink. This is a regression
2683 * from V2/V3.
2684 *
2685 * In the same exported filesystem, if the security flavor used
2686 * is not an explicitly shared flavor, limit the view to the visible
2687 * list entries only. This is not a WRONGSEC case because it's already
2688 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689 */
2690 if (!different_export &&
2691 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692 cs->access & CS_ACCESS_LIMITED)) {
2693 if (! nfs_visible(cs->exi, vp, &different_export)) {
2694 VN_RELE(vp);
2695 return (puterrno4(ENOENT));
2696 }
2697 }
2698
2699 /*
2700 * If it's a mountpoint, then traverse it.
2701 */
2702 if (vn_ismntpt(vp)) {
2703 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2704 pre_tvp = vp; /* save pre-traversed vnode */
2705
2706 /*
2707 * hold pre_tvp to counteract rele by traverse. We will
2708 * need pre_tvp below if checkexport4 fails
2709 */
2710 VN_HOLD(pre_tvp);
2711 if ((error = traverse(&vp)) != 0) {
2712 VN_RELE(vp);
2713 VN_RELE(pre_tvp);
2714 return (puterrno4(error));
2715 }
2716 different_export = 1;
2717 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2718 /*
2719 * The vfsp comparison is to handle the case where
2720 * a LOFS mount is shared. lo_lookup traverses mount points,
2721 * and NFS is unaware of local fs transistions because
2722 * v_vfsmountedhere isn't set. For this special LOFS case,
2723 * the dir and the obj returned by lookup will have different
2724 * vfs ptrs.
2725 */
2726 different_export = 1;
2727 }
2728
2729 if (different_export) {
2730
2731 bzero(&fid, sizeof (fid));
2732 fid.fid_len = MAXFIDSZ;
2733 error = vop_fid_pseudo(vp, &fid);
2734 if (error) {
2735 VN_RELE(vp);
2736 if (pre_tvp)
2737 VN_RELE(pre_tvp);
2738 return (puterrno4(error));
2739 }
2740
2741 if (dotdot)
2742 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2743 else
2744 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2745
2746 if (exi == NULL) {
2747 if (pre_tvp) {
2748 /*
2749 * If this vnode is a mounted-on vnode,
2750 * but the mounted-on file system is not
2751 * exported, send back the filehandle for
2752 * the mounted-on vnode, not the root of
2753 * the mounted-on file system.
2754 */
2755 VN_RELE(vp);
2756 vp = pre_tvp;
2757 exi = pre_exi;
2758 } else {
2759 VN_RELE(vp);
2760 return (puterrno4(EACCES));
2761 }
2762 } else if (pre_tvp) {
2763 /* we're done with pre_tvp now. release extra hold */
2764 VN_RELE(pre_tvp);
2765 }
2766
2767 cs->exi = exi;
2768
2769 /*
2770 * Now we do a checkauth4. The reason is that
2771 * this client/user may not have access to the new
2772 * exported file system, and if they do,
2773 * the client/user may be mapped to a different uid.
2774 *
2775 * We start with a new cr, because the checkauth4 done
2776 * in the PUT*FH operation over wrote the cred's uid,
2777 * gid, etc, and we want the real thing before calling
2778 * checkauth4()
2779 */
2780 crfree(cs->cr);
2781 cs->cr = crdup(cs->basecr);
2782
2783 oldvp = cs->vp;
2784 cs->vp = vp;
2785 is_newvp = TRUE;
2786
2787 stat = call_checkauth4(cs, req);
2788 if (stat != NFS4_OK) {
2789 VN_RELE(cs->vp);
2790 cs->vp = oldvp;
2791 return (stat);
2792 }
2793 }
2794
2795 /*
2796 * After various NFS checks, do a label check on the path
2797 * component. The label on this path should either be the
2798 * global zone's label or a zone's label. We are only
2799 * interested in the zone's label because exported files
2800 * in global zone is accessible (though read-only) to
2801 * clients. The exportability/visibility check is already
2802 * done before reaching this code.
2803 */
2804 if (is_system_labeled()) {
2805 bslabel_t *clabel;
2806
2807 ASSERT(req->rq_label != NULL);
2808 clabel = req->rq_label;
2809 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2810 "got client label from request(1)", struct svc_req *, req);
2811
2812 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2813 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2814 cs->exi)) {
2815 error = EACCES;
2816 goto err_out;
2817 }
2818 } else {
2819 /*
2820 * We grant access to admin_low label clients
2821 * only if the client is trusted, i.e. also
2822 * running Solaris Trusted Extension.
2823 */
2824 struct sockaddr *ca;
2825 int addr_type;
2826 void *ipaddr;
2827 tsol_tpc_t *tp;
2828
2829 ca = (struct sockaddr *)svc_getrpccaller(
2830 req->rq_xprt)->buf;
2831 if (ca->sa_family == AF_INET) {
2832 addr_type = IPV4_VERSION;
2833 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2834 } else if (ca->sa_family == AF_INET6) {
2835 addr_type = IPV6_VERSION;
2836 ipaddr = &((struct sockaddr_in6 *)
2837 ca)->sin6_addr;
2838 }
2839 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2840 if (tp == NULL || tp->tpc_tp.tp_doi !=
2841 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2842 SUN_CIPSO) {
2843 if (tp != NULL)
2844 TPC_RELE(tp);
2845 error = EACCES;
2846 goto err_out;
2847 }
2848 TPC_RELE(tp);
2849 }
2850 }
2851
2852 error = makefh4(&cs->fh, vp, cs->exi);
2853
2854 err_out:
2855 if (error) {
2856 if (is_newvp) {
2857 VN_RELE(cs->vp);
2858 cs->vp = oldvp;
2859 } else
2860 VN_RELE(vp);
2861 return (puterrno4(error));
2862 }
2863
2864 if (!is_newvp) {
2865 if (cs->vp)
2866 VN_RELE(cs->vp);
2867 cs->vp = vp;
2868 } else if (oldvp)
2869 VN_RELE(oldvp);
2870
2871 /*
2872 * if did lookup on attrdir and didn't lookup .., set named
2873 * attr fh flag
2874 */
2875 if (attrdir && ! dotdot)
2876 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2877
2878 /* Assume false for now, open proc will set this */
2879 cs->mandlock = FALSE;
2880
2881 return (NFS4_OK);
2882 }
2883
2884 /* ARGSUSED */
2885 static void
2886 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2887 struct compound_state *cs)
2888 {
2889 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2890 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2891 char *nm;
2892 uint_t len;
2893 struct sockaddr *ca;
2894 char *name = NULL;
2895 nfsstat4 status;
2896
2897 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2898 LOOKUP4args *, args);
2899
2900 if (cs->vp == NULL) {
2901 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2902 goto out;
2903 }
2904
2905 if (cs->vp->v_type == VLNK) {
2906 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2907 goto out;
2908 }
2909
2910 if (cs->vp->v_type != VDIR) {
2911 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2912 goto out;
2913 }
2914
2915 status = utf8_dir_verify(&args->objname);
2916 if (status != NFS4_OK) {
2917 *cs->statusp = resp->status = status;
2918 goto out;
2919 }
2920
2921 nm = utf8_to_str(&args->objname, &len, NULL);
2922 if (nm == NULL) {
2923 *cs->statusp = resp->status = NFS4ERR_INVAL;
2924 goto out;
2925 }
2926
2927 if (len > MAXNAMELEN) {
2928 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2929 kmem_free(nm, len);
2930 goto out;
2931 }
2932
2933 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2934 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2935 MAXPATHLEN + 1);
2936
2937 if (name == NULL) {
2938 *cs->statusp = resp->status = NFS4ERR_INVAL;
2939 kmem_free(nm, len);
2940 goto out;
2941 }
2942
2943 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2944
2945 if (name != nm)
2946 kmem_free(name, MAXPATHLEN + 1);
2947 kmem_free(nm, len);
2948
2949 out:
2950 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2951 LOOKUP4res *, resp);
2952 }
2953
2954 /* ARGSUSED */
2955 static void
2956 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2957 struct compound_state *cs)
2958 {
2959 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2960
2961 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2962
2963 if (cs->vp == NULL) {
2964 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2965 goto out;
2966 }
2967
2968 if (cs->vp->v_type != VDIR) {
2969 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2970 goto out;
2971 }
2972
2973 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2974
2975 /*
2976 * From NFSV4 Specification, LOOKUPP should not check for
2977 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2978 */
2979 if (resp->status == NFS4ERR_WRONGSEC) {
2980 *cs->statusp = resp->status = NFS4_OK;
2981 }
2982
2983 out:
2984 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2985 LOOKUPP4res *, resp);
2986 }
2987
2988
2989 /*ARGSUSED2*/
2990 static void
2991 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2992 struct compound_state *cs)
2993 {
2994 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
2995 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
2996 vnode_t *avp = NULL;
2997 int lookup_flags = LOOKUP_XATTR, error;
2998 int exp_ro = 0;
2999
3000 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3001 OPENATTR4args *, args);
3002
3003 if (cs->vp == NULL) {
3004 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3005 goto out;
3006 }
3007
3008 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3009 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3010 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3011 goto out;
3012 }
3013
3014 /*
3015 * If file system supports passing ACE mask to VOP_ACCESS then
3016 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3017 */
3018
3019 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3020 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3021 V_ACE_MASK, cs->cr, NULL);
3022 else
3023 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3024 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3025 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3026
3027 if (error) {
3028 *cs->statusp = resp->status = puterrno4(EACCES);
3029 goto out;
3030 }
3031
3032 /*
3033 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3034 * the file system is exported read-only -- regardless of
3035 * createdir flag. Otherwise the attrdir would be created
3036 * (assuming server fs isn't mounted readonly locally). If
3037 * VOP_LOOKUP returns ENOENT in this case, the error will
3038 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3039 * because specfs has no VOP_LOOKUP op, so the macro would
3040 * return ENOSYS. EINVAL is returned by all (current)
3041 * Solaris file system implementations when any of their
3042 * restrictions are violated (xattr(dir) can't have xattrdir).
3043 * Returning NOTSUPP is more appropriate in this case
3044 * because the object will never be able to have an attrdir.
3045 */
3046 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3047 lookup_flags |= CREATE_XATTR_DIR;
3048
3049 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3050 NULL, NULL, NULL);
3051
3052 if (error) {
3053 if (error == ENOENT && args->createdir && exp_ro)
3054 *cs->statusp = resp->status = puterrno4(EROFS);
3055 else if (error == EINVAL || error == ENOSYS)
3056 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3057 else
3058 *cs->statusp = resp->status = puterrno4(error);
3059 goto out;
3060 }
3061
3062 ASSERT(avp->v_flag & V_XATTRDIR);
3063
3064 error = makefh4(&cs->fh, avp, cs->exi);
3065
3066 if (error) {
3067 VN_RELE(avp);
3068 *cs->statusp = resp->status = puterrno4(error);
3069 goto out;
3070 }
3071
3072 VN_RELE(cs->vp);
3073 cs->vp = avp;
3074
3075 /*
3076 * There is no requirement for an attrdir fh flag
3077 * because the attrdir has a vnode flag to distinguish
3078 * it from regular (non-xattr) directories. The
3079 * FH4_ATTRDIR flag is set for future sanity checks.
3080 */
3081 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3082 *cs->statusp = resp->status = NFS4_OK;
3083
3084 out:
3085 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3086 OPENATTR4res *, resp);
3087 }
3088
3089 static int
3090 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3091 caller_context_t *ct)
3092 {
3093 int error;
3094 int i;
3095 clock_t delaytime;
3096
3097 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3098
3099 /*
3100 * Don't block on mandatory locks. If this routine returns
3101 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3102 */
3103 uio->uio_fmode = FNONBLOCK;
3104
3105 for (i = 0; i < rfs4_maxlock_tries; i++) {
3106
3107
3108 if (direction == FREAD) {
3109 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3110 error = VOP_READ(vp, uio, ioflag, cred, ct);
3111 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3112 } else {
3113 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3114 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3115 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3116 }
3117
3118 if (error != EAGAIN)
3119 break;
3120
3121 if (i < rfs4_maxlock_tries - 1) {
3122 delay(delaytime);
3123 delaytime *= 2;
3124 }
3125 }
3126
3127 return (error);
3128 }
3129
3130 /* ARGSUSED */
3131 static void
3132 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3133 struct compound_state *cs)
3134 {
3135 READ4args *args = &argop->nfs_argop4_u.opread;
3136 READ4res *resp = &resop->nfs_resop4_u.opread;
3137 int error;
3138 int verror;
3139 vnode_t *vp;
3140 struct vattr va;
3141 struct iovec iov, *iovp = NULL;
3142 int iovcnt;
3143 struct uio uio;
3144 u_offset_t offset;
3145 bool_t *deleg = &cs->deleg;
3146 nfsstat4 stat;
3147 int in_crit = 0;
3148 mblk_t *mp = NULL;
3149 int alloc_err = 0;
3150 int rdma_used = 0;
3151 int loaned_buffers;
3152 caller_context_t ct;
3153 struct uio *uiop;
3154
3155 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3156 READ4args, args);
3157
3158 vp = cs->vp;
3159 if (vp == NULL) {
3160 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 goto out;
3162 }
3163 if (cs->access == CS_ACCESS_DENIED) {
3164 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3165 goto out;
3166 }
3167
3168 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3169 deleg, TRUE, &ct)) != NFS4_OK) {
3170 *cs->statusp = resp->status = stat;
3171 goto out;
3172 }
3173
3174 /*
3175 * Enter the critical region before calling VOP_RWLOCK
3176 * to avoid a deadlock with write requests.
3177 */
3178 if (nbl_need_check(vp)) {
3179 nbl_start_crit(vp, RW_READER);
3180 in_crit = 1;
3181 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3182 &ct)) {
3183 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3184 goto out;
3185 }
3186 }
3187
3188 if (args->wlist) {
3189 if (args->count > clist_len(args->wlist)) {
3190 *cs->statusp = resp->status = NFS4ERR_INVAL;
3191 goto out;
3192 }
3193 rdma_used = 1;
3194 }
3195
3196 /* use loaned buffers for TCP */
3197 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3198
3199 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3200 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3201
3202 /*
3203 * If we can't get the attributes, then we can't do the
3204 * right access checking. So, we'll fail the request.
3205 */
3206 if (verror) {
3207 *cs->statusp = resp->status = puterrno4(verror);
3208 goto out;
3209 }
3210
3211 if (vp->v_type != VREG) {
3212 *cs->statusp = resp->status =
3213 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3214 goto out;
3215 }
3216
3217 if (crgetuid(cs->cr) != va.va_uid &&
3218 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3219 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3220 *cs->statusp = resp->status = puterrno4(error);
3221 goto out;
3222 }
3223
3224 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3225 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3226 goto out;
3227 }
3228
3229 offset = args->offset;
3230 if (offset >= va.va_size) {
3231 *cs->statusp = resp->status = NFS4_OK;
3232 resp->eof = TRUE;
3233 resp->data_len = 0;
3234 resp->data_val = NULL;
3235 resp->mblk = NULL;
3236 /* RDMA */
3237 resp->wlist = args->wlist;
3238 resp->wlist_len = resp->data_len;
3239 *cs->statusp = resp->status = NFS4_OK;
3240 if (resp->wlist)
3241 clist_zero_len(resp->wlist);
3242 goto out;
3243 }
3244
3245 if (args->count == 0) {
3246 *cs->statusp = resp->status = NFS4_OK;
3247 resp->eof = FALSE;
3248 resp->data_len = 0;
3249 resp->data_val = NULL;
3250 resp->mblk = NULL;
3251 /* RDMA */
3252 resp->wlist = args->wlist;
3253 resp->wlist_len = resp->data_len;
3254 if (resp->wlist)
3255 clist_zero_len(resp->wlist);
3256 goto out;
3257 }
3258
3259 /*
3260 * Do not allocate memory more than maximum allowed
3261 * transfer size
3262 */
3263 if (args->count > rfs4_tsize(req))
3264 args->count = rfs4_tsize(req);
3265
3266 if (loaned_buffers) {
3267 uiop = (uio_t *)rfs_setup_xuio(vp);
3268 ASSERT(uiop != NULL);
3269 uiop->uio_segflg = UIO_SYSSPACE;
3270 uiop->uio_loffset = args->offset;
3271 uiop->uio_resid = args->count;
3272
3273 /* Jump to do the read if successful */
3274 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3275 /*
3276 * Need to hold the vnode until after VOP_RETZCBUF()
3277 * is called.
3278 */
3279 VN_HOLD(vp);
3280 goto doio_read;
3281 }
3282
3283 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3284 uiop->uio_loffset, int, uiop->uio_resid);
3285
3286 uiop->uio_extflg = 0;
3287
3288 /* failure to setup for zero copy */
3289 rfs_free_xuio((void *)uiop);
3290 loaned_buffers = 0;
3291 }
3292
3293 /*
3294 * If returning data via RDMA Write, then grab the chunk list. If we
3295 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3296 */
3297 if (rdma_used) {
3298 mp = NULL;
3299 (void) rdma_get_wchunk(req, &iov, args->wlist);
3300 uio.uio_iov = &iov;
3301 uio.uio_iovcnt = 1;
3302 } else {
3303 /*
3304 * mp will contain the data to be sent out in the read reply.
3305 * It will be freed after the reply has been sent.
3306 */
3307 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3308 ASSERT(mp != NULL);
3309 ASSERT(alloc_err == 0);
3310 uio.uio_iov = iovp;
3311 uio.uio_iovcnt = iovcnt;
3312 }
3313
3314 uio.uio_segflg = UIO_SYSSPACE;
3315 uio.uio_extflg = UIO_COPY_CACHED;
3316 uio.uio_loffset = args->offset;
3317 uio.uio_resid = args->count;
3318 uiop = &uio;
3319
3320 doio_read:
3321 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3322
3323 va.va_mask = AT_SIZE;
3324 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3325
3326 if (error) {
3327 if (mp)
3328 freemsg(mp);
3329 *cs->statusp = resp->status = puterrno4(error);
3330 goto out;
3331 }
3332
3333 /* make mblk using zc buffers */
3334 if (loaned_buffers) {
3335 mp = uio_to_mblk(uiop);
3336 ASSERT(mp != NULL);
3337 }
3338
3339 *cs->statusp = resp->status = NFS4_OK;
3340
3341 ASSERT(uiop->uio_resid >= 0);
3342 resp->data_len = args->count - uiop->uio_resid;
3343 if (mp) {
3344 resp->data_val = (char *)mp->b_datap->db_base;
3345 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3346 } else {
3347 resp->data_val = (caddr_t)iov.iov_base;
3348 }
3349
3350 resp->mblk = mp;
3351
3352 if (!verror && offset + resp->data_len == va.va_size)
3353 resp->eof = TRUE;
3354 else
3355 resp->eof = FALSE;
3356
3357 if (rdma_used) {
3358 if (!rdma_setup_read_data4(args, resp)) {
3359 *cs->statusp = resp->status = NFS4ERR_INVAL;
3360 }
3361 } else {
3362 resp->wlist = NULL;
3363 }
3364
3365 out:
3366 if (in_crit)
3367 nbl_end_crit(vp);
3368
3369 if (iovp != NULL)
3370 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3371
3372 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3373 READ4res *, resp);
3374 }
3375
3376 static void
3377 rfs4_op_read_free(nfs_resop4 *resop)
3378 {
3379 READ4res *resp = &resop->nfs_resop4_u.opread;
3380
3381 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3382 freemsg(resp->mblk);
3383 resp->mblk = NULL;
3384 resp->data_val = NULL;
3385 resp->data_len = 0;
3386 }
3387 }
3388
3389 static void
3390 rfs4_op_readdir_free(nfs_resop4 * resop)
3391 {
3392 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3393
3394 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395 freeb(resp->mblk);
3396 resp->mblk = NULL;
3397 resp->data_len = 0;
3398 }
3399 }
3400
3401
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405 struct compound_state *cs)
3406 {
3407 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3408 int error;
3409 vnode_t *vp;
3410 struct exportinfo *exi, *sav_exi;
3411 nfs_fh4_fmt_t *fh_fmtp;
3412
3413 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414
3415 if (cs->vp) {
3416 VN_RELE(cs->vp);
3417 cs->vp = NULL;
3418 }
3419
3420 if (cs->cr)
3421 crfree(cs->cr);
3422
3423 cs->cr = crdup(cs->basecr);
3424
3425 vp = exi_public->exi_vp;
3426 if (vp == NULL) {
3427 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428 goto out;
3429 }
3430
3431 error = makefh4(&cs->fh, vp, exi_public);
3432 if (error != 0) {
3433 *cs->statusp = resp->status = puterrno4(error);
3434 goto out;
3435 }
3436 sav_exi = cs->exi;
3437 if (exi_public == exi_root) {
3438 /*
3439 * No filesystem is actually shared public, so we default
3440 * to exi_root. In this case, we must check whether root
3441 * is exported.
3442 */
3443 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444
3445 /*
3446 * if root filesystem is exported, the exportinfo struct that we
3447 * should use is what checkexport4 returns, because root_exi is
3448 * actually a mostly empty struct.
3449 */
3450 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452 cs->exi = ((exi != NULL) ? exi : exi_public);
3453 } else {
3454 /*
3455 * it's a properly shared filesystem
3456 */
3457 cs->exi = exi_public;
3458 }
3459
3460 if (is_system_labeled()) {
3461 bslabel_t *clabel;
3462
3463 ASSERT(req->rq_label != NULL);
3464 clabel = req->rq_label;
3465 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466 "got client label from request(1)",
3467 struct svc_req *, req);
3468 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470 cs->exi)) {
3471 *cs->statusp = resp->status =
3472 NFS4ERR_SERVERFAULT;
3473 goto out;
3474 }
3475 }
3476 }
3477
3478 VN_HOLD(vp);
3479 cs->vp = vp;
3480
3481 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3482 VN_RELE(cs->vp);
3483 cs->vp = NULL;
3484 cs->exi = sav_exi;
3485 goto out;
3486 }
3487
3488 *cs->statusp = resp->status = NFS4_OK;
3489 out:
3490 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3491 PUTPUBFH4res *, resp);
3492 }
3493
3494 /*
3495 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3496 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3497 * or joe have restrictive search permissions, then we shouldn't let
3498 * the client get a file handle. This is easy to enforce. However, we
3499 * don't know what security flavor should be used until we resolve the
3500 * path name. Another complication is uid mapping. If root is
3501 * the user, then it will be mapped to the anonymous user by default,
3502 * but we won't know that till we've resolved the path name. And we won't
3503 * know what the anonymous user is.
3504 * Luckily, SECINFO is specified to take a full filename.
3505 * So what we will have to in rfs4_op_lookup is check that flavor of
3506 * the target object matches that of the request, and if root was the
3507 * caller, check for the root= and anon= options, and if necessary,
3508 * repeat the lookup using the right cred_t. But that's not done yet.
3509 */
3510 /* ARGSUSED */
3511 static void
3512 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3513 struct compound_state *cs)
3514 {
3515 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3516 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3517 nfs_fh4_fmt_t *fh_fmtp;
3518
3519 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3520 PUTFH4args *, args);
3521
3522 if (cs->vp) {
3523 VN_RELE(cs->vp);
3524 cs->vp = NULL;
3525 }
3526
3527 if (cs->cr) {
3528 crfree(cs->cr);
3529 cs->cr = NULL;
3530 }
3531
3532
3533 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3534 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3535 goto out;
3536 }
3537
3538 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3539 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3540 NULL);
3541
3542 if (cs->exi == NULL) {
3543 *cs->statusp = resp->status = NFS4ERR_STALE;
3544 goto out;
3545 }
3546
3547 cs->cr = crdup(cs->basecr);
3548
3549 ASSERT(cs->cr != NULL);
3550
3551 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3552 *cs->statusp = resp->status;
3553 goto out;
3554 }
3555
3556 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3557 VN_RELE(cs->vp);
3558 cs->vp = NULL;
3559 goto out;
3560 }
3561
3562 nfs_fh4_copy(&args->object, &cs->fh);
3563 *cs->statusp = resp->status = NFS4_OK;
3564 cs->deleg = FALSE;
3565
3566 out:
3567 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3568 PUTFH4res *, resp);
3569 }
3570
3571 /* ARGSUSED */
3572 static void
3573 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574 struct compound_state *cs)
3575 {
3576 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3577 int error;
3578 fid_t fid;
3579 struct exportinfo *exi, *sav_exi;
3580
3581 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582
3583 if (cs->vp) {
3584 VN_RELE(cs->vp);
3585 cs->vp = NULL;
3586 }
3587
3588 if (cs->cr)
3589 crfree(cs->cr);
3590
3591 cs->cr = crdup(cs->basecr);
3592
3593 /*
3594 * Using rootdir, the system root vnode,
3595 * get its fid.
3596 */
3597 bzero(&fid, sizeof (fid));
3598 fid.fid_len = MAXFIDSZ;
3599 error = vop_fid_pseudo(rootdir, &fid);
3600 if (error != 0) {
3601 *cs->statusp = resp->status = puterrno4(error);
3602 goto out;
3603 }
3604
3605 /*
3606 * Then use the root fsid & fid it to find out if it's exported
3607 *
3608 * If the server root isn't exported directly, then
3609 * it should at least be a pseudo export based on
3610 * one or more exports further down in the server's
3611 * file tree.
3612 */
3613 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615 NFS4_DEBUG(rfs4_debug,
3616 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618 goto out;
3619 }
3620
3621 /*
3622 * Now make a filehandle based on the root
3623 * export and root vnode.
3624 */
3625 error = makefh4(&cs->fh, rootdir, exi);
3626 if (error != 0) {
3627 *cs->statusp = resp->status = puterrno4(error);
3628 goto out;
3629 }
3630
3631 sav_exi = cs->exi;
3632 cs->exi = exi;
3633
3634 VN_HOLD(rootdir);
3635 cs->vp = rootdir;
3636
3637 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638 VN_RELE(rootdir);
3639 cs->vp = NULL;
3640 cs->exi = sav_exi;
3641 goto out;
3642 }
3643
3644 *cs->statusp = resp->status = NFS4_OK;
3645 cs->deleg = FALSE;
3646 out:
3647 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648 PUTROOTFH4res *, resp);
3649 }
3650
3651 /*
3652 * set_rdattr_params sets up the variables used to manage what information
3653 * to get for each directory entry.
3654 */
3655 static nfsstat4
3656 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3657 bitmap4 attrs, bool_t *need_to_lookup)
3658 {
3659 uint_t va_mask;
3660 nfsstat4 status;
3661 bitmap4 objbits;
3662
3663 status = bitmap4_to_attrmask(attrs, sargp);
3664 if (status != NFS4_OK) {
3665 /*
3666 * could not even figure attr mask
3667 */
3668 return (status);
3669 }
3670 va_mask = sargp->vap->va_mask;
3671
3672 /*
3673 * dirent's d_ino is always correct value for mounted_on_fileid.
3674 * mntdfid_set is set once here, but mounted_on_fileid is
3675 * set in main dirent processing loop for each dirent.
3676 * The mntdfid_set is a simple optimization that lets the
3677 * server attr code avoid work when caller is readdir.
3678 */
3679 sargp->mntdfid_set = TRUE;
3680
3681 /*
3682 * Lookup entry only if client asked for any of the following:
3683 * a) vattr attrs
3684 * b) vfs attrs
3685 * c) attrs w/per-object scope requested (change, filehandle, etc)
3686 * other than mounted_on_fileid (which we can take from dirent)
3687 */
3688 objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3689
3690 if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3691 *need_to_lookup = TRUE;
3692 else
3693 *need_to_lookup = FALSE;
3694
3695 if (sargp->sbp == NULL)
3696 return (NFS4_OK);
3697
3698 /*
3699 * If filesystem attrs are requested, get them now from the
3700 * directory vp, as most entries will have same filesystem. The only
3701 * exception are mounted over entries but we handle
3702 * those as we go (XXX mounted over detection not yet implemented).
3703 */
3704 sargp->vap->va_mask = 0; /* to avoid VOP_GETATTR */
3705 status = bitmap4_get_sysattrs(sargp);
3706 sargp->vap->va_mask = va_mask;
3707
3708 if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3709 /*
3710 * Failed to get filesystem attributes.
3711 * Return a rdattr_error for each entry, but don't fail.
3712 * However, don't get any obj-dependent attrs.
3713 */
3714 sargp->rdattr_error = status; /* for rdattr_error */
3715 *need_to_lookup = FALSE;
3716 /*
3717 * At least get fileid for regular readdir output
3718 */
3719 sargp->vap->va_mask &= AT_NODEID;
3720 status = NFS4_OK;
3721 }
3722
3723 return (status);
3724 }
3725
3726 /*
3727 * readlink: args: CURRENT_FH.
3728 * res: status. If success - CURRENT_FH unchanged, return linktext.
3729 */
3730
3731 /* ARGSUSED */
3732 static void
3733 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3734 struct compound_state *cs)
3735 {
3736 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3737 int error;
3738 vnode_t *vp;
3739 struct iovec iov;
3740 struct vattr va;
3741 struct uio uio;
3742 char *data;
3743 struct sockaddr *ca;
3744 char *name = NULL;
3745 int is_referral;
3746
3747 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3748
3749 /* CURRENT_FH: directory */
3750 vp = cs->vp;
3751 if (vp == NULL) {
3752 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3753 goto out;
3754 }
3755
3756 if (cs->access == CS_ACCESS_DENIED) {
3757 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3758 goto out;
3759 }
3760
3761 /* Is it a referral? */
3762 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3763
3764 is_referral = 1;
3765
3766 } else {
3767
3768 is_referral = 0;
3769
3770 if (vp->v_type == VDIR) {
3771 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3772 goto out;
3773 }
3774
3775 if (vp->v_type != VLNK) {
3776 *cs->statusp = resp->status = NFS4ERR_INVAL;
3777 goto out;
3778 }
3779
3780 }
3781
3782 va.va_mask = AT_MODE;
3783 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3784 if (error) {
3785 *cs->statusp = resp->status = puterrno4(error);
3786 goto out;
3787 }
3788
3789 if (MANDLOCK(vp, va.va_mode)) {
3790 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3791 goto out;
3792 }
3793
3794 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3795
3796 if (is_referral) {
3797 char *s;
3798 size_t strsz;
3799
3800 /* Get an artificial symlink based on a referral */
3801 s = build_symlink(vp, cs->cr, &strsz);
3802 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3803 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3804 vnode_t *, vp, char *, s);
3805 if (s == NULL)
3806 error = EINVAL;
3807 else {
3808 error = 0;
3809 (void) strlcpy(data, s, MAXPATHLEN + 1);
3810 kmem_free(s, strsz);
3811 }
3812
3813 } else {
3814
3815 iov.iov_base = data;
3816 iov.iov_len = MAXPATHLEN;
3817 uio.uio_iov = &iov;
3818 uio.uio_iovcnt = 1;
3819 uio.uio_segflg = UIO_SYSSPACE;
3820 uio.uio_extflg = UIO_COPY_CACHED;
3821 uio.uio_loffset = 0;
3822 uio.uio_resid = MAXPATHLEN;
3823
3824 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3825
3826 if (!error)
3827 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3828 }
3829
3830 if (error) {
3831 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3832 *cs->statusp = resp->status = puterrno4(error);
3833 goto out;
3834 }
3835
3836 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3837 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3838 MAXPATHLEN + 1);
3839
3840 if (name == NULL) {
3841 /*
3842 * Even though the conversion failed, we return
3843 * something. We just don't translate it.
3844 */
3845 name = data;
3846 }
3847
3848 /*
3849 * treat link name as data
3850 */
3851 (void) str_to_utf8(name, (utf8string *)&resp->link);
3852
3853 if (name != data)
3854 kmem_free(name, MAXPATHLEN + 1);
3855 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3856 *cs->statusp = resp->status = NFS4_OK;
3857
3858 out:
3859 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3860 READLINK4res *, resp);
3861 }
3862
3863 static void
3864 rfs4_op_readlink_free(nfs_resop4 *resop)
3865 {
3866 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3867 utf8string *symlink = (utf8string *)&resp->link;
3868
3869 if (symlink->utf8string_val) {
3870 UTF8STRING_FREE(*symlink)
3871 }
3872 }
3873
3874 /*
3875 * release_lockowner:
3876 * Release any state associated with the supplied
3877 * lockowner. Note if any lo_state is holding locks we will not
3878 * rele that lo_state and thus the lockowner will not be destroyed.
3879 * A client using lock after the lock owner stateid has been released
3880 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3881 * to reissue the lock with new_lock_owner set to TRUE.
3882 * args: lock_owner
3883 * res: status
3884 */
3885 /* ARGSUSED */
3886 static void
3887 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3888 struct svc_req *req, struct compound_state *cs)
3889 {
3890 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3891 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3892 rfs4_lockowner_t *lo;
3893 rfs4_openowner_t *oo;
3894 rfs4_state_t *sp;
3895 rfs4_lo_state_t *lsp;
3896 rfs4_client_t *cp;
3897 bool_t create = FALSE;
3898 locklist_t *llist;
3899 sysid_t sysid;
3900
3901 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3902 cs, RELEASE_LOCKOWNER4args *, ap);
3903
3904 /* Make sure there is a clientid around for this request */
3905 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3906
3907 if (cp == NULL) {
3908 *cs->statusp = resp->status =
3909 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3910 goto out;
3911 }
3912 rfs4_client_rele(cp);
3913
3914 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3915 if (lo == NULL) {
3916 *cs->statusp = resp->status = NFS4_OK;
3917 goto out;
3918 }
3919 ASSERT(lo->rl_client != NULL);
3920
3921 /*
3922 * Check for EXPIRED client. If so will reap state with in a lease
3923 * period or on next set_clientid_confirm step
3924 */
3925 if (rfs4_lease_expired(lo->rl_client)) {
3926 rfs4_lockowner_rele(lo);
3927 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3928 goto out;
3929 }
3930
3931 /*
3932 * If no sysid has been assigned, then no locks exist; just return.
3933 */
3934 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3935 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3936 rfs4_lockowner_rele(lo);
3937 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3938 goto out;
3939 }
3940
3941 sysid = lo->rl_client->rc_sysidt;
3942 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3943
3944 /*
3945 * Mark the lockowner invalid.
3946 */
3947 rfs4_dbe_hide(lo->rl_dbe);
3948
3949 /*
3950 * sysid-pid pair should now not be used since the lockowner is
3951 * invalid. If the client were to instantiate the lockowner again
3952 * it would be assigned a new pid. Thus we can get the list of
3953 * current locks.
3954 */
3955
3956 llist = flk_get_active_locks(sysid, lo->rl_pid);
3957 /* If we are still holding locks fail */
3958 if (llist != NULL) {
3959
3960 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3961
3962 flk_free_locklist(llist);
3963 /*
3964 * We need to unhide the lockowner so the client can
3965 * try it again. The bad thing here is if the client
3966 * has a logic error that took it here in the first place
3967 * they probably have lost accounting of the locks that it
3968 * is holding. So we may have dangling state until the
3969 * open owner state is reaped via close. One scenario
3970 * that could possibly occur is that the client has
3971 * sent the unlock request(s) in separate threads
3972 * and has not waited for the replies before sending the
3973 * RELEASE_LOCKOWNER request. Presumably, it would expect
3974 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3975 * reissuing the request.
3976 */
3977 rfs4_dbe_unhide(lo->rl_dbe);
3978 rfs4_lockowner_rele(lo);
3979 goto out;
3980 }
3981
3982 /*
3983 * For the corresponding client we need to check each open
3984 * owner for any opens that have lockowner state associated
3985 * with this lockowner.
3986 */
3987
3988 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3989 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3990 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3991
3992 rfs4_dbe_lock(oo->ro_dbe);
3993 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3994 sp = list_next(&oo->ro_statelist, sp)) {
3995
3996 rfs4_dbe_lock(sp->rs_dbe);
3997 for (lsp = list_head(&sp->rs_lostatelist);
3998 lsp != NULL;
3999 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4000 if (lsp->rls_locker == lo) {
4001 rfs4_dbe_lock(lsp->rls_dbe);
4002 rfs4_dbe_invalidate(lsp->rls_dbe);
4003 rfs4_dbe_unlock(lsp->rls_dbe);
4004 }
4005 }
4006 rfs4_dbe_unlock(sp->rs_dbe);
4007 }
4008 rfs4_dbe_unlock(oo->ro_dbe);
4009 }
4010 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4011
4012 rfs4_lockowner_rele(lo);
4013
4014 *cs->statusp = resp->status = NFS4_OK;
4015
4016 out:
4017 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4018 cs, RELEASE_LOCKOWNER4res *, resp);
4019 }
4020
4021 /*
4022 * short utility function to lookup a file and recall the delegation
4023 */
4024 static rfs4_file_t *
4025 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4026 int *lkup_error, cred_t *cr)
4027 {
4028 vnode_t *vp;
4029 rfs4_file_t *fp = NULL;
4030 bool_t fcreate = FALSE;
4031 int error;
4032
4033 if (vpp)
4034 *vpp = NULL;
4035
4036 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4037 NULL)) == 0) {
4038 if (vp->v_type == VREG)
4039 fp = rfs4_findfile(vp, NULL, &fcreate);
4040 if (vpp)
4041 *vpp = vp;
4042 else
4043 VN_RELE(vp);
4044 }
4045
4046 if (lkup_error)
4047 *lkup_error = error;
4048
4049 return (fp);
4050 }
4051
4052 /*
4053 * remove: args: CURRENT_FH: directory; name.
4054 * res: status. If success - CURRENT_FH unchanged, return change_info
4055 * for directory.
4056 */
4057 /* ARGSUSED */
4058 static void
4059 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4060 struct compound_state *cs)
4061 {
4062 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4063 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4064 int error;
4065 vnode_t *dvp, *vp;
4066 struct vattr bdva, idva, adva;
4067 char *nm;
4068 uint_t len;
4069 rfs4_file_t *fp;
4070 int in_crit = 0;
4071 bslabel_t *clabel;
4072 struct sockaddr *ca;
4073 char *name = NULL;
4074 nfsstat4 status;
4075
4076 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4077 REMOVE4args *, args);
4078
4079 /* CURRENT_FH: directory */
4080 dvp = cs->vp;
4081 if (dvp == NULL) {
4082 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4083 goto out;
4084 }
4085
4086 if (cs->access == CS_ACCESS_DENIED) {
4087 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4088 goto out;
4089 }
4090
4091 /*
4092 * If there is an unshared filesystem mounted on this vnode,
4093 * Do not allow to remove anything in this directory.
4094 */
4095 if (vn_ismntpt(dvp)) {
4096 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4097 goto out;
4098 }
4099
4100 if (dvp->v_type != VDIR) {
4101 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4102 goto out;
4103 }
4104
4105 status = utf8_dir_verify(&args->target);
4106 if (status != NFS4_OK) {
4107 *cs->statusp = resp->status = status;
4108 goto out;
4109 }
4110
4111 /*
4112 * Lookup the file so that we can check if it's a directory
4113 */
4114 nm = utf8_to_fn(&args->target, &len, NULL);
4115 if (nm == NULL) {
4116 *cs->statusp = resp->status = NFS4ERR_INVAL;
4117 goto out;
4118 }
4119
4120 if (len > MAXNAMELEN) {
4121 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4122 kmem_free(nm, len);
4123 goto out;
4124 }
4125
4126 if (rdonly4(req, cs)) {
4127 *cs->statusp = resp->status = NFS4ERR_ROFS;
4128 kmem_free(nm, len);
4129 goto out;
4130 }
4131
4132 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4133 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4134 MAXPATHLEN + 1);
4135
4136 if (name == NULL) {
4137 *cs->statusp = resp->status = NFS4ERR_INVAL;
4138 kmem_free(nm, len);
4139 goto out;
4140 }
4141
4142 /*
4143 * Lookup the file to determine type and while we are see if
4144 * there is a file struct around and check for delegation.
4145 * We don't need to acquire va_seq before this lookup, if
4146 * it causes an update, cinfo.before will not match, which will
4147 * trigger a cache flush even if atomic is TRUE.
4148 */
4149 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4150 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4151 NULL)) {
4152 VN_RELE(vp);
4153 rfs4_file_rele(fp);
4154 *cs->statusp = resp->status = NFS4ERR_DELAY;
4155 if (nm != name)
4156 kmem_free(name, MAXPATHLEN + 1);
4157 kmem_free(nm, len);
4158 goto out;
4159 }
4160 }
4161
4162 /* Didn't find anything to remove */
4163 if (vp == NULL) {
4164 *cs->statusp = resp->status = error;
4165 if (nm != name)
4166 kmem_free(name, MAXPATHLEN + 1);
4167 kmem_free(nm, len);
4168 goto out;
4169 }
4170
4171 if (nbl_need_check(vp)) {
4172 nbl_start_crit(vp, RW_READER);
4173 in_crit = 1;
4174 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4175 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4176 if (nm != name)
4177 kmem_free(name, MAXPATHLEN + 1);
4178 kmem_free(nm, len);
4179 nbl_end_crit(vp);
4180 VN_RELE(vp);
4181 if (fp) {
4182 rfs4_clear_dont_grant(fp);
4183 rfs4_file_rele(fp);
4184 }
4185 goto out;
4186 }
4187 }
4188
4189 /* check label before allowing removal */
4190 if (is_system_labeled()) {
4191 ASSERT(req->rq_label != NULL);
4192 clabel = req->rq_label;
4193 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4194 "got client label from request(1)",
4195 struct svc_req *, req);
4196 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4197 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4198 cs->exi)) {
4199 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4200 if (name != nm)
4201 kmem_free(name, MAXPATHLEN + 1);
4202 kmem_free(nm, len);
4203 if (in_crit)
4204 nbl_end_crit(vp);
4205 VN_RELE(vp);
4206 if (fp) {
4207 rfs4_clear_dont_grant(fp);
4208 rfs4_file_rele(fp);
4209 }
4210 goto out;
4211 }
4212 }
4213 }
4214
4215 /* Get dir "before" change value */
4216 bdva.va_mask = AT_CTIME|AT_SEQ;
4217 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4218 if (error) {
4219 *cs->statusp = resp->status = puterrno4(error);
4220 if (nm != name)
4221 kmem_free(name, MAXPATHLEN + 1);
4222 kmem_free(nm, len);
4223 if (in_crit)
4224 nbl_end_crit(vp);
4225 VN_RELE(vp);
4226 if (fp) {
4227 rfs4_clear_dont_grant(fp);
4228 rfs4_file_rele(fp);
4229 }
4230 goto out;
4231 }
4232 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4233
4234 /* Actually do the REMOVE operation */
4235 if (vp->v_type == VDIR) {
4236 /*
4237 * Can't remove a directory that has a mounted-on filesystem.
4238 */
4239 if (vn_ismntpt(vp)) {
4240 error = EACCES;
4241 } else {
4242 /*
4243 * System V defines rmdir to return EEXIST,
4244 * not ENOTEMPTY, if the directory is not
4245 * empty. A System V NFS server needs to map
4246 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4247 * transmit over the wire.
4248 */
4249 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4250 NULL, 0)) == EEXIST)
4251 error = ENOTEMPTY;
4252 }
4253 } else {
4254 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4255 fp != NULL) {
4256 struct vattr va;
4257 vnode_t *tvp;
4258
4259 rfs4_dbe_lock(fp->rf_dbe);
4260 tvp = fp->rf_vp;
4261 if (tvp)
4262 VN_HOLD(tvp);
4263 rfs4_dbe_unlock(fp->rf_dbe);
4264
4265 if (tvp) {
4266 /*
4267 * This is va_seq safe because we are not
4268 * manipulating dvp.
4269 */
4270 va.va_mask = AT_NLINK;
4271 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4272 va.va_nlink == 0) {
4273 /* Remove state on file remove */
4274 if (in_crit) {
4275 nbl_end_crit(vp);
4276 in_crit = 0;
4277 }
4278 rfs4_close_all_state(fp);
4279 }
4280 VN_RELE(tvp);
4281 }
4282 }
4283 }
4284
4285 if (in_crit)
4286 nbl_end_crit(vp);
4287 VN_RELE(vp);
4288
4289 if (fp) {
4290 rfs4_clear_dont_grant(fp);
4291 rfs4_file_rele(fp);
4292 }
4293 if (nm != name)
4294 kmem_free(name, MAXPATHLEN + 1);
4295 kmem_free(nm, len);
4296
4297 if (error) {
4298 *cs->statusp = resp->status = puterrno4(error);
4299 goto out;
4300 }
4301
4302 /*
4303 * Get the initial "after" sequence number, if it fails, set to zero
4304 */
4305 idva.va_mask = AT_SEQ;
4306 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4307 idva.va_seq = 0;
4308
4309 /*
4310 * Force modified data and metadata out to stable storage.
4311 */
4312 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4313
4314 /*
4315 * Get "after" change value, if it fails, simply return the
4316 * before value.
4317 */
4318 adva.va_mask = AT_CTIME|AT_SEQ;
4319 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4320 adva.va_ctime = bdva.va_ctime;
4321 adva.va_seq = 0;
4322 }
4323
4324 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4325
4326 /*
4327 * The cinfo.atomic = TRUE only if we have
4328 * non-zero va_seq's, and it has incremented by exactly one
4329 * during the VOP_REMOVE/RMDIR and it didn't change during
4330 * the VOP_FSYNC.
4331 */
4332 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4333 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4334 resp->cinfo.atomic = TRUE;
4335 else
4336 resp->cinfo.atomic = FALSE;
4337
4338 *cs->statusp = resp->status = NFS4_OK;
4339
4340 out:
4341 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4342 REMOVE4res *, resp);
4343 }
4344
4345 /*
4346 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4347 * oldname and newname.
4348 * res: status. If success - CURRENT_FH unchanged, return change_info
4349 * for both from and target directories.
4350 */
4351 /* ARGSUSED */
4352 static void
4353 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4354 struct compound_state *cs)
4355 {
4356 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4357 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4358 int error;
4359 vnode_t *odvp;
4360 vnode_t *ndvp;
4361 vnode_t *srcvp, *targvp;
4362 struct vattr obdva, oidva, oadva;
4363 struct vattr nbdva, nidva, nadva;
4364 char *onm, *nnm;
4365 uint_t olen, nlen;
4366 rfs4_file_t *fp, *sfp;
4367 int in_crit_src, in_crit_targ;
4368 int fp_rele_grant_hold, sfp_rele_grant_hold;
4369 bslabel_t *clabel;
4370 struct sockaddr *ca;
4371 char *converted_onm = NULL;
4372 char *converted_nnm = NULL;
4373 nfsstat4 status;
4374
4375 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4376 RENAME4args *, args);
4377
4378 fp = sfp = NULL;
4379 srcvp = targvp = NULL;
4380 in_crit_src = in_crit_targ = 0;
4381 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4382
4383 /* CURRENT_FH: target directory */
4384 ndvp = cs->vp;
4385 if (ndvp == NULL) {
4386 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4387 goto out;
4388 }
4389
4390 /* SAVED_FH: from directory */
4391 odvp = cs->saved_vp;
4392 if (odvp == NULL) {
4393 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4394 goto out;
4395 }
4396
4397 if (cs->access == CS_ACCESS_DENIED) {
4398 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4399 goto out;
4400 }
4401
4402 /*
4403 * If there is an unshared filesystem mounted on this vnode,
4404 * do not allow to rename objects in this directory.
4405 */
4406 if (vn_ismntpt(odvp)) {
4407 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4408 goto out;
4409 }
4410
4411 /*
4412 * If there is an unshared filesystem mounted on this vnode,
4413 * do not allow to rename to this directory.
4414 */
4415 if (vn_ismntpt(ndvp)) {
4416 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4417 goto out;
4418 }
4419
4420 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4421 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4422 goto out;
4423 }
4424
4425 if (cs->saved_exi != cs->exi) {
4426 *cs->statusp = resp->status = NFS4ERR_XDEV;
4427 goto out;
4428 }
4429
4430 status = utf8_dir_verify(&args->oldname);
4431 if (status != NFS4_OK) {
4432 *cs->statusp = resp->status = status;
4433 goto out;
4434 }
4435
4436 status = utf8_dir_verify(&args->newname);
4437 if (status != NFS4_OK) {
4438 *cs->statusp = resp->status = status;
4439 goto out;
4440 }
4441
4442 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4443 if (onm == NULL) {
4444 *cs->statusp = resp->status = NFS4ERR_INVAL;
4445 goto out;
4446 }
4447 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4448 nlen = MAXPATHLEN + 1;
4449 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4450 nlen);
4451
4452 if (converted_onm == NULL) {
4453 *cs->statusp = resp->status = NFS4ERR_INVAL;
4454 kmem_free(onm, olen);
4455 goto out;
4456 }
4457
4458 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4459 if (nnm == NULL) {
4460 *cs->statusp = resp->status = NFS4ERR_INVAL;
4461 if (onm != converted_onm)
4462 kmem_free(converted_onm, MAXPATHLEN + 1);
4463 kmem_free(onm, olen);
4464 goto out;
4465 }
4466 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4467 MAXPATHLEN + 1);
4468
4469 if (converted_nnm == NULL) {
4470 *cs->statusp = resp->status = NFS4ERR_INVAL;
4471 kmem_free(nnm, nlen);
4472 nnm = NULL;
4473 if (onm != converted_onm)
4474 kmem_free(converted_onm, MAXPATHLEN + 1);
4475 kmem_free(onm, olen);
4476 goto out;
4477 }
4478
4479
4480 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4481 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4482 kmem_free(onm, olen);
4483 kmem_free(nnm, nlen);
4484 goto out;
4485 }
4486
4487
4488 if (rdonly4(req, cs)) {
4489 *cs->statusp = resp->status = NFS4ERR_ROFS;
4490 if (onm != converted_onm)
4491 kmem_free(converted_onm, MAXPATHLEN + 1);
4492 kmem_free(onm, olen);
4493 if (nnm != converted_nnm)
4494 kmem_free(converted_nnm, MAXPATHLEN + 1);
4495 kmem_free(nnm, nlen);
4496 goto out;
4497 }
4498
4499 /* check label of the target dir */
4500 if (is_system_labeled()) {
4501 ASSERT(req->rq_label != NULL);
4502 clabel = req->rq_label;
4503 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4504 "got client label from request(1)",
4505 struct svc_req *, req);
4506 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4507 if (!do_rfs_label_check(clabel, ndvp,
4508 EQUALITY_CHECK, cs->exi)) {
4509 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4510 goto err_out;
4511 }
4512 }
4513 }
4514
4515 /*
4516 * Is the source a file and have a delegation?
4517 * We don't need to acquire va_seq before these lookups, if
4518 * it causes an update, cinfo.before will not match, which will
4519 * trigger a cache flush even if atomic is TRUE.
4520 */
4521 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4522 &error, cs->cr)) {
4523 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4524 NULL)) {
4525 *cs->statusp = resp->status = NFS4ERR_DELAY;
4526 goto err_out;
4527 }
4528 }
4529
4530 if (srcvp == NULL) {
4531 *cs->statusp = resp->status = puterrno4(error);
4532 if (onm != converted_onm)
4533 kmem_free(converted_onm, MAXPATHLEN + 1);
4534 kmem_free(onm, olen);
4535 if (nnm != converted_nnm)
4536 kmem_free(converted_nnm, MAXPATHLEN + 1);
4537 kmem_free(nnm, nlen);
4538 goto out;
4539 }
4540
4541 sfp_rele_grant_hold = 1;
4542
4543 /* Does the destination exist and a file and have a delegation? */
4544 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4545 NULL, cs->cr)) {
4546 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4547 NULL)) {
4548 *cs->statusp = resp->status = NFS4ERR_DELAY;
4549 goto err_out;
4550 }
4551 }
4552 fp_rele_grant_hold = 1;
4553
4554
4555 /* Check for NBMAND lock on both source and target */
4556 if (nbl_need_check(srcvp)) {
4557 nbl_start_crit(srcvp, RW_READER);
4558 in_crit_src = 1;
4559 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4560 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4561 goto err_out;
4562 }
4563 }
4564
4565 if (targvp && nbl_need_check(targvp)) {
4566 nbl_start_crit(targvp, RW_READER);
4567 in_crit_targ = 1;
4568 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4569 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570 goto err_out;
4571 }
4572 }
4573
4574 /* Get source "before" change value */
4575 obdva.va_mask = AT_CTIME|AT_SEQ;
4576 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4577 if (!error) {
4578 nbdva.va_mask = AT_CTIME|AT_SEQ;
4579 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4580 }
4581 if (error) {
4582 *cs->statusp = resp->status = puterrno4(error);
4583 goto err_out;
4584 }
4585
4586 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4587 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4588
4589 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4590 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4591 struct vattr va;
4592 vnode_t *tvp;
4593
4594 rfs4_dbe_lock(fp->rf_dbe);
4595 tvp = fp->rf_vp;
4596 if (tvp)
4597 VN_HOLD(tvp);
4598 rfs4_dbe_unlock(fp->rf_dbe);
4599
4600 if (tvp) {
4601 va.va_mask = AT_NLINK;
4602 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4603 va.va_nlink == 0) {
4604 /* The file is gone and so should the state */
4605 if (in_crit_targ) {
4606 nbl_end_crit(targvp);
4607 in_crit_targ = 0;
4608 }
4609 rfs4_close_all_state(fp);
4610 }
4611 VN_RELE(tvp);
4612 }
4613 }
4614 if (error == 0)
4615 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4616
4617 if (in_crit_src)
4618 nbl_end_crit(srcvp);
4619 if (srcvp)
4620 VN_RELE(srcvp);
4621 if (in_crit_targ)
4622 nbl_end_crit(targvp);
4623 if (targvp)
4624 VN_RELE(targvp);
4625
4626 if (sfp) {
4627 rfs4_clear_dont_grant(sfp);
4628 rfs4_file_rele(sfp);
4629 }
4630 if (fp) {
4631 rfs4_clear_dont_grant(fp);
4632 rfs4_file_rele(fp);
4633 }
4634
4635 if (converted_onm != onm)
4636 kmem_free(converted_onm, MAXPATHLEN + 1);
4637 kmem_free(onm, olen);
4638 if (converted_nnm != nnm)
4639 kmem_free(converted_nnm, MAXPATHLEN + 1);
4640 kmem_free(nnm, nlen);
4641
4642 /*
4643 * Get the initial "after" sequence number, if it fails, set to zero
4644 */
4645 oidva.va_mask = AT_SEQ;
4646 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4647 oidva.va_seq = 0;
4648
4649 nidva.va_mask = AT_SEQ;
4650 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4651 nidva.va_seq = 0;
4652
4653 /*
4654 * Force modified data and metadata out to stable storage.
4655 */
4656 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4657 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4658
4659 if (error) {
4660 *cs->statusp = resp->status = puterrno4(error);
4661 goto out;
4662 }
4663
4664 /*
4665 * Get "after" change values, if it fails, simply return the
4666 * before value.
4667 */
4668 oadva.va_mask = AT_CTIME|AT_SEQ;
4669 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4670 oadva.va_ctime = obdva.va_ctime;
4671 oadva.va_seq = 0;
4672 }
4673
4674 nadva.va_mask = AT_CTIME|AT_SEQ;
4675 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4676 nadva.va_ctime = nbdva.va_ctime;
4677 nadva.va_seq = 0;
4678 }
4679
4680 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4681 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4682
4683 /*
4684 * The cinfo.atomic = TRUE only if we have
4685 * non-zero va_seq's, and it has incremented by exactly one
4686 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4687 */
4688 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4689 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4690 resp->source_cinfo.atomic = TRUE;
4691 else
4692 resp->source_cinfo.atomic = FALSE;
4693
4694 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4695 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4696 resp->target_cinfo.atomic = TRUE;
4697 else
4698 resp->target_cinfo.atomic = FALSE;
4699
4700 #ifdef VOLATILE_FH_TEST
4701 {
4702 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4703
4704 /*
4705 * Add the renamed file handle to the volatile rename list
4706 */
4707 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4708 /* file handles may expire on rename */
4709 vnode_t *vp;
4710
4711 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4712 /*
4713 * Already know that nnm will be a valid string
4714 */
4715 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4716 NULL, NULL, NULL);
4717 kmem_free(nnm, nlen);
4718 if (!error) {
4719 add_volrnm_fh(cs->exi, vp);
4720 VN_RELE(vp);
4721 }
4722 }
4723 }
4724 #endif /* VOLATILE_FH_TEST */
4725
4726 *cs->statusp = resp->status = NFS4_OK;
4727 out:
4728 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4729 RENAME4res *, resp);
4730 return;
4731
4732 err_out:
4733 if (onm != converted_onm)
4734 kmem_free(converted_onm, MAXPATHLEN + 1);
4735 if (onm != NULL)
4736 kmem_free(onm, olen);
4737 if (nnm != converted_nnm)
4738 kmem_free(converted_nnm, MAXPATHLEN + 1);
4739 if (nnm != NULL)
4740 kmem_free(nnm, nlen);
4741
4742 if (in_crit_src) nbl_end_crit(srcvp);
4743 if (in_crit_targ) nbl_end_crit(targvp);
4744 if (targvp) VN_RELE(targvp);
4745 if (srcvp) VN_RELE(srcvp);
4746 if (sfp) {
4747 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4748 rfs4_file_rele(sfp);
4749 }
4750 if (fp) {
4751 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4752 rfs4_file_rele(fp);
4753 }
4754
4755 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4756 RENAME4res *, resp);
4757 }
4758
4759 /* ARGSUSED */
4760 static void
4761 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4762 struct compound_state *cs)
4763 {
4764 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4765 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4766 rfs4_client_t *cp;
4767
4768 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4769 RENEW4args *, args);
4770
4771 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4772 *cs->statusp = resp->status =
4773 rfs4_check_clientid(&args->clientid, 0);
4774 goto out;
4775 }
4776
4777 if (rfs4_lease_expired(cp)) {
4778 rfs4_client_rele(cp);
4779 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4780 goto out;
4781 }
4782
4783 rfs4_update_lease(cp);
4784
4785 mutex_enter(cp->rc_cbinfo.cb_lock);
4786 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4787 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4788 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4789 } else {
4790 *cs->statusp = resp->status = NFS4_OK;
4791 }
4792 mutex_exit(cp->rc_cbinfo.cb_lock);
4793
4794 rfs4_client_rele(cp);
4795
4796 out:
4797 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4798 RENEW4res *, resp);
4799 }
4800
4801 /* ARGSUSED */
4802 static void
4803 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4804 struct compound_state *cs)
4805 {
4806 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4807
4808 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4809
4810 /* No need to check cs->access - we are not accessing any object */
4811 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4812 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4813 goto out;
4814 }
4815 if (cs->vp != NULL) {
4816 VN_RELE(cs->vp);
4817 }
4818 cs->vp = cs->saved_vp;
4819 cs->saved_vp = NULL;
4820 cs->exi = cs->saved_exi;
4821 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4822 *cs->statusp = resp->status = NFS4_OK;
4823 cs->deleg = FALSE;
4824
4825 out:
4826 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4827 RESTOREFH4res *, resp);
4828 }
4829
4830 /* ARGSUSED */
4831 static void
4832 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4833 struct compound_state *cs)
4834 {
4835 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4836
4837 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4838
4839 /* No need to check cs->access - we are not accessing any object */
4840 if (cs->vp == NULL) {
4841 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4842 goto out;
4843 }
4844 if (cs->saved_vp != NULL) {
4845 VN_RELE(cs->saved_vp);
4846 }
4847 cs->saved_vp = cs->vp;
4848 VN_HOLD(cs->saved_vp);
4849 cs->saved_exi = cs->exi;
4850 /*
4851 * since SAVEFH is fairly rare, don't alloc space for its fh
4852 * unless necessary.
4853 */
4854 if (cs->saved_fh.nfs_fh4_val == NULL) {
4855 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4856 }
4857 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4858 *cs->statusp = resp->status = NFS4_OK;
4859
4860 out:
4861 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4862 SAVEFH4res *, resp);
4863 }
4864
4865 /*
4866 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4867 * return the bitmap of attrs that were set successfully. It is also
4868 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4869 * always be called only after rfs4_do_set_attrs().
4870 *
4871 * Verify that the attributes are same as the expected ones. sargp->vap
4872 * and sargp->sbp contain the input attributes as translated from fattr4.
4873 *
4874 * This function verifies only the attrs that correspond to a vattr or
4875 * vfsstat struct. That is because of the extra step needed to get the
4876 * corresponding system structs. Other attributes have already been set or
4877 * verified by do_rfs4_set_attrs.
4878 *
4879 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4880 */
4881 static int
4882 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4883 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4884 {
4885 int error, ret_error = 0;
4886 int i, k;
4887 uint_t sva_mask = sargp->vap->va_mask;
4888 uint_t vbit;
4889 union nfs4_attr_u *na;
4890 uint8_t *amap;
4891 bool_t getsb = ntovp->vfsstat;
4892
4893 if (sva_mask != 0) {
4894 /*
4895 * Okay to overwrite sargp->vap because we verify based
4896 * on the incoming values.
4897 */
4898 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4899 sargp->cs->cr, NULL);
4900 if (ret_error) {
4901 if (resp == NULL)
4902 return (ret_error);
4903 /*
4904 * Must return bitmap of successful attrs
4905 */
4906 sva_mask = 0; /* to prevent checking vap later */
4907 } else {
4908 /*
4909 * Some file systems clobber va_mask. it is probably
4910 * wrong of them to do so, nonethless we practice
4911 * defensive coding.
4912 * See bug id 4276830.
4913 */
4914 sargp->vap->va_mask = sva_mask;
4915 }
4916 }
4917
4918 if (getsb) {
4919 /*
4920 * Now get the superblock and loop on the bitmap, as there is
4921 * no simple way of translating from superblock to bitmap4.
4922 */
4923 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4924 if (ret_error) {
4925 if (resp == NULL)
4926 goto errout;
4927 getsb = FALSE;
4928 }
4929 }
4930
4931 /*
4932 * Now loop and verify each attribute which getattr returned
4933 * whether it's the same as the input.
4934 */
4935 if (resp == NULL && !getsb && (sva_mask == 0))
4936 goto errout;
4937
4938 na = ntovp->na;
4939 amap = ntovp->amap;
4940 k = 0;
4941 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4942 k = *amap;
4943 ASSERT(nfs4_ntov_map[k].nval == k);
4944 vbit = nfs4_ntov_map[k].vbit;
4945
4946 /*
4947 * If vattr attribute but VOP_GETATTR failed, or it's
4948 * superblock attribute but VFS_STATVFS failed, skip
4949 */
4950 if (vbit) {
4951 if ((vbit & sva_mask) == 0)
4952 continue;
4953 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4954 continue;
4955 }
4956 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4957 if (resp != NULL) {
4958 if (error)
4959 ret_error = -1; /* not all match */
4960 else /* update response bitmap */
4961 *resp |= nfs4_ntov_map[k].fbit;
4962 continue;
4963 }
4964 if (error) {
4965 ret_error = -1; /* not all match */
4966 break;
4967 }
4968 }
4969 errout:
4970 return (ret_error);
4971 }
4972
4973 /*
4974 * Decode the attribute to be set/verified. If the attr requires a sys op
4975 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4976 * call the sv_getit function for it, because the sys op hasn't yet been done.
4977 * Return 0 for success, error code if failed.
4978 *
4979 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4980 */
4981 static int
4982 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4983 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4984 {
4985 int error = 0;
4986 bool_t set_later;
4987
4988 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4989
4990 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4991 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4992 /*
4993 * don't verify yet if a vattr or sb dependent attr,
4994 * because we don't have their sys values yet.
4995 * Will be done later.
4996 */
4997 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4998 /*
4999 * ACLs are a special case, since setting the MODE
5000 * conflicts with setting the ACL. We delay setting
5001 * the ACL until all other attributes have been set.
5002 * The ACL gets set in do_rfs4_op_setattr().
5003 */
5004 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5005 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5006 sargp, nap);
5007 if (error) {
5008 xdr_free(nfs4_ntov_map[k].xfunc,
5009 (caddr_t)nap);
5010 }
5011 }
5012 }
5013 } else {
5014 #ifdef DEBUG
5015 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5016 "decoding attribute %d\n", k);
5017 #endif
5018 error = EINVAL;
5019 }
5020 if (!error && resp_bval && !set_later) {
5021 *resp_bval |= nfs4_ntov_map[k].fbit;
5022 }
5023
5024 return (error);
5025 }
5026
5027 /*
5028 * Set vattr based on incoming fattr4 attrs - used by setattr.
5029 * Set response mask. Ignore any values that are not writable vattr attrs.
5030 */
5031 static nfsstat4
5032 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5033 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5034 nfs4_attr_cmd_t cmd)
5035 {
5036 int error = 0;
5037 int i;
5038 char *attrs = fattrp->attrlist4;
5039 uint32_t attrslen = fattrp->attrlist4_len;
5040 XDR xdr;
5041 nfsstat4 status = NFS4_OK;
5042 vnode_t *vp = cs->vp;
5043 union nfs4_attr_u *na;
5044 uint8_t *amap;
5045
5046 #ifndef lint
5047 /*
5048 * Make sure that maximum attribute number can be expressed as an
5049 * 8 bit quantity.
5050 */
5051 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5052 #endif
5053
5054 if (vp == NULL) {
5055 if (resp)
5056 *resp = 0;
5057 return (NFS4ERR_NOFILEHANDLE);
5058 }
5059 if (cs->access == CS_ACCESS_DENIED) {
5060 if (resp)
5061 *resp = 0;
5062 return (NFS4ERR_ACCESS);
5063 }
5064
5065 sargp->op = cmd;
5066 sargp->cs = cs;
5067 sargp->flag = 0; /* may be set later */
5068 sargp->vap->va_mask = 0;
5069 sargp->rdattr_error = NFS4_OK;
5070 sargp->rdattr_error_req = FALSE;
5071 /* sargp->sbp is set by the caller */
5072
5073 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5074
5075 na = ntovp->na;
5076 amap = ntovp->amap;
5077
5078 /*
5079 * The following loop iterates on the nfs4_ntov_map checking
5080 * if the fbit is set in the requested bitmap.
5081 * If set then we process the arguments using the
5082 * rfs4_fattr4 conversion functions to populate the setattr
5083 * vattr and va_mask. Any settable attrs that are not using vattr
5084 * will be set in this loop.
5085 */
5086 for (i = 0; i < nfs4_ntov_map_size; i++) {
5087 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5088 continue;
5089 }
5090 /*
5091 * If setattr, must be a writable attr.
5092 * If verify/nverify, must be a readable attr.
5093 */
5094 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5095 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5096 /*
5097 * Client tries to set/verify an
5098 * unsupported attribute, tries to set
5099 * a read only attr or verify a write
5100 * only one - error!
5101 */
5102 break;
5103 }
5104 /*
5105 * Decode the attribute to set/verify
5106 */
5107 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5108 &xdr, resp ? resp : NULL, na);
5109 if (error)
5110 break;
5111 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5112 na++;
5113 (ntovp->attrcnt)++;
5114 if (nfs4_ntov_map[i].vfsstat)
5115 ntovp->vfsstat = TRUE;
5116 }
5117
5118 if (error != 0)
5119 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5120 puterrno4(error));
5121 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5122 return (status);
5123 }
5124
5125 static nfsstat4
5126 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5127 stateid4 *stateid)
5128 {
5129 int error = 0;
5130 struct nfs4_svgetit_arg sarg;
5131 bool_t trunc;
5132
5133 nfsstat4 status = NFS4_OK;
5134 cred_t *cr = cs->cr;
5135 vnode_t *vp = cs->vp;
5136 struct nfs4_ntov_table ntov;
5137 struct statvfs64 sb;
5138 struct vattr bva;
5139 struct flock64 bf;
5140 int in_crit = 0;
5141 uint_t saved_mask = 0;
5142 caller_context_t ct;
5143
5144 *resp = 0;
5145 sarg.sbp = &sb;
5146 sarg.is_referral = B_FALSE;
5147 nfs4_ntov_table_init(&ntov);
5148 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5149 NFS4ATTR_SETIT);
5150 if (status != NFS4_OK) {
5151 /*
5152 * failed set attrs
5153 */
5154 goto done;
5155 }
5156 if ((sarg.vap->va_mask == 0) &&
5157 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5158 /*
5159 * no further work to be done
5160 */
5161 goto done;
5162 }
5163
5164 /*
5165 * If we got a request to set the ACL and the MODE, only
5166 * allow changing VSUID, VSGID, and VSVTX. Attempting
5167 * to change any other bits, along with setting an ACL,
5168 * gives NFS4ERR_INVAL.
5169 */
5170 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5171 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5172 vattr_t va;
5173
5174 va.va_mask = AT_MODE;
5175 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5176 if (error) {
5177 status = puterrno4(error);
5178 goto done;
5179 }
5180 if ((sarg.vap->va_mode ^ va.va_mode) &
5181 ~(VSUID | VSGID | VSVTX)) {
5182 status = NFS4ERR_INVAL;
5183 goto done;
5184 }
5185 }
5186
5187 /* Check stateid only if size has been set */
5188 if (sarg.vap->va_mask & AT_SIZE) {
5189 trunc = (sarg.vap->va_size == 0);
5190 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5191 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5192 if (status != NFS4_OK)
5193 goto done;
5194 } else {
5195 ct.cc_sysid = 0;
5196 ct.cc_pid = 0;
5197 ct.cc_caller_id = nfs4_srv_caller_id;
5198 ct.cc_flags = CC_DONTBLOCK;
5199 }
5200
5201 /* XXX start of possible race with delegations */
5202
5203 /*
5204 * We need to specially handle size changes because it is
5205 * possible for the client to create a file with read-only
5206 * modes, but with the file opened for writing. If the client
5207 * then tries to set the file size, e.g. ftruncate(3C),
5208 * fcntl(F_FREESP), the normal access checking done in
5209 * VOP_SETATTR would prevent the client from doing it even though
5210 * it should be allowed to do so. To get around this, we do the
5211 * access checking for ourselves and use VOP_SPACE which doesn't
5212 * do the access checking.
5213 * Also the client should not be allowed to change the file
5214 * size if there is a conflicting non-blocking mandatory lock in
5215 * the region of the change.
5216 */
5217 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5218 u_offset_t offset;
5219 ssize_t length;
5220
5221 /*
5222 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5223 * before returning, sarg.vap->va_mask is used to
5224 * generate the setattr reply bitmap. We also clear
5225 * AT_SIZE below before calling VOP_SPACE. For both
5226 * of these cases, the va_mask needs to be saved here
5227 * and restored after calling VOP_SETATTR.
5228 */
5229 saved_mask = sarg.vap->va_mask;
5230
5231 /*
5232 * Check any possible conflict due to NBMAND locks.
5233 * Get into critical region before VOP_GETATTR, so the
5234 * size attribute is valid when checking conflicts.
5235 */
5236 if (nbl_need_check(vp)) {
5237 nbl_start_crit(vp, RW_READER);
5238 in_crit = 1;
5239 }
5240
5241 bva.va_mask = AT_UID|AT_SIZE;
5242 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5243 status = puterrno4(error);
5244 goto done;
5245 }
5246
5247 if (in_crit) {
5248 if (sarg.vap->va_size < bva.va_size) {
5249 offset = sarg.vap->va_size;
5250 length = bva.va_size - sarg.vap->va_size;
5251 } else {
5252 offset = bva.va_size;
5253 length = sarg.vap->va_size - bva.va_size;
5254 }
5255 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5256 &ct)) {
5257 status = NFS4ERR_LOCKED;
5258 goto done;
5259 }
5260 }
5261
5262 if (crgetuid(cr) == bva.va_uid) {
5263 sarg.vap->va_mask &= ~AT_SIZE;
5264 bf.l_type = F_WRLCK;
5265 bf.l_whence = 0;
5266 bf.l_start = (off64_t)sarg.vap->va_size;
5267 bf.l_len = 0;
5268 bf.l_sysid = 0;
5269 bf.l_pid = 0;
5270 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5271 (offset_t)sarg.vap->va_size, cr, &ct);
5272 }
5273 }
5274
5275 if (!error && sarg.vap->va_mask != 0)
5276 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5277
5278 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5279 if (saved_mask & AT_SIZE)
5280 sarg.vap->va_mask |= AT_SIZE;
5281
5282 /*
5283 * If an ACL was being set, it has been delayed until now,
5284 * in order to set the mode (via the VOP_SETATTR() above) first.
5285 */
5286 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5287 int i;
5288
5289 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5290 if (ntov.amap[i] == FATTR4_ACL)
5291 break;
5292 if (i < NFS4_MAXNUM_ATTRS) {
5293 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5294 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5295 if (error == 0) {
5296 *resp |= FATTR4_ACL_MASK;
5297 } else if (error == ENOTSUP) {
5298 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5299 status = NFS4ERR_ATTRNOTSUPP;
5300 goto done;
5301 }
5302 } else {
5303 NFS4_DEBUG(rfs4_debug,
5304 (CE_NOTE, "do_rfs4_op_setattr: "
5305 "unable to find ACL in fattr4"));
5306 error = EINVAL;
5307 }
5308 }
5309
5310 if (error) {
5311 /* check if a monitor detected a delegation conflict */
5312 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5313 status = NFS4ERR_DELAY;
5314 else
5315 status = puterrno4(error);
5316
5317 /*
5318 * Set the response bitmap when setattr failed.
5319 * If VOP_SETATTR partially succeeded, test by doing a
5320 * VOP_GETATTR on the object and comparing the data
5321 * to the setattr arguments.
5322 */
5323 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5324 } else {
5325 /*
5326 * Force modified metadata out to stable storage.
5327 */
5328 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5329 /*
5330 * Set response bitmap
5331 */
5332 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5333 }
5334
5335 /* Return early and already have a NFSv4 error */
5336 done:
5337 /*
5338 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5339 * conversion sets both readable and writeable NFS4 attrs
5340 * for AT_MTIME and AT_ATIME. The line below masks out
5341 * unrequested attrs from the setattr result bitmap. This
5342 * is placed after the done: label to catch the ATTRNOTSUP
5343 * case.
5344 */
5345 *resp &= fattrp->attrmask;
5346
5347 if (in_crit)
5348 nbl_end_crit(vp);
5349
5350 nfs4_ntov_table_free(&ntov, &sarg);
5351
5352 return (status);
5353 }
5354
5355 /* ARGSUSED */
5356 static void
5357 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5358 struct compound_state *cs)
5359 {
5360 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5361 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5362 bslabel_t *clabel;
5363
5364 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5365 SETATTR4args *, args);
5366
5367 if (cs->vp == NULL) {
5368 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5369 goto out;
5370 }
5371
5372 /*
5373 * If there is an unshared filesystem mounted on this vnode,
5374 * do not allow to setattr on this vnode.
5375 */
5376 if (vn_ismntpt(cs->vp)) {
5377 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5378 goto out;
5379 }
5380
5381 resp->attrsset = 0;
5382
5383 if (rdonly4(req, cs)) {
5384 *cs->statusp = resp->status = NFS4ERR_ROFS;
5385 goto out;
5386 }
5387
5388 /* check label before setting attributes */
5389 if (is_system_labeled()) {
5390 ASSERT(req->rq_label != NULL);
5391 clabel = req->rq_label;
5392 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5393 "got client label from request(1)",
5394 struct svc_req *, req);
5395 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5396 if (!do_rfs_label_check(clabel, cs->vp,
5397 EQUALITY_CHECK, cs->exi)) {
5398 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5399 goto out;
5400 }
5401 }
5402 }
5403
5404 *cs->statusp = resp->status =
5405 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5406 &args->stateid);
5407
5408 out:
5409 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5410 SETATTR4res *, resp);
5411 }
5412
5413 /* ARGSUSED */
5414 static void
5415 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5416 struct compound_state *cs)
5417 {
5418 /*
5419 * verify and nverify are exactly the same, except that nverify
5420 * succeeds when some argument changed, and verify succeeds when
5421 * when none changed.
5422 */
5423
5424 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5425 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5426
5427 int error;
5428 struct nfs4_svgetit_arg sarg;
5429 struct statvfs64 sb;
5430 struct nfs4_ntov_table ntov;
5431
5432 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5433 VERIFY4args *, args);
5434
5435 if (cs->vp == NULL) {
5436 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5437 goto out;
5438 }
5439
5440 sarg.sbp = &sb;
5441 sarg.is_referral = B_FALSE;
5442 nfs4_ntov_table_init(&ntov);
5443 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5444 &sarg, &ntov, NFS4ATTR_VERIT);
5445 if (resp->status != NFS4_OK) {
5446 /*
5447 * do_rfs4_set_attrs will try to verify systemwide attrs,
5448 * so could return -1 for "no match".
5449 */
5450 if (resp->status == -1)
5451 resp->status = NFS4ERR_NOT_SAME;
5452 goto done;
5453 }
5454 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5455 switch (error) {
5456 case 0:
5457 resp->status = NFS4_OK;
5458 break;
5459 case -1:
5460 resp->status = NFS4ERR_NOT_SAME;
5461 break;
5462 default:
5463 resp->status = puterrno4(error);
5464 break;
5465 }
5466 done:
5467 *cs->statusp = resp->status;
5468 nfs4_ntov_table_free(&ntov, &sarg);
5469 out:
5470 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5471 VERIFY4res *, resp);
5472 }
5473
5474 /* ARGSUSED */
5475 static void
5476 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5477 struct compound_state *cs)
5478 {
5479 /*
5480 * verify and nverify are exactly the same, except that nverify
5481 * succeeds when some argument changed, and verify succeeds when
5482 * when none changed.
5483 */
5484
5485 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5486 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5487
5488 int error;
5489 struct nfs4_svgetit_arg sarg;
5490 struct statvfs64 sb;
5491 struct nfs4_ntov_table ntov;
5492
5493 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5494 NVERIFY4args *, args);
5495
5496 if (cs->vp == NULL) {
5497 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5498 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5499 NVERIFY4res *, resp);
5500 return;
5501 }
5502 sarg.sbp = &sb;
5503 sarg.is_referral = B_FALSE;
5504 nfs4_ntov_table_init(&ntov);
5505 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5506 &sarg, &ntov, NFS4ATTR_VERIT);
5507 if (resp->status != NFS4_OK) {
5508 /*
5509 * do_rfs4_set_attrs will try to verify systemwide attrs,
5510 * so could return -1 for "no match".
5511 */
5512 if (resp->status == -1)
5513 resp->status = NFS4_OK;
5514 goto done;
5515 }
5516 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5517 switch (error) {
5518 case 0:
5519 resp->status = NFS4ERR_SAME;
5520 break;
5521 case -1:
5522 resp->status = NFS4_OK;
5523 break;
5524 default:
5525 resp->status = puterrno4(error);
5526 break;
5527 }
5528 done:
5529 *cs->statusp = resp->status;
5530 nfs4_ntov_table_free(&ntov, &sarg);
5531
5532 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533 NVERIFY4res *, resp);
5534 }
5535
5536 /*
5537 * XXX - This should live in an NFS header file.
5538 */
5539 #define MAX_IOVECS 12
5540
5541 /* ARGSUSED */
5542 static void
5543 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5544 struct compound_state *cs)
5545 {
5546 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5547 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5548 int error;
5549 vnode_t *vp;
5550 struct vattr bva;
5551 u_offset_t rlimit;
5552 struct uio uio;
5553 struct iovec iov[MAX_IOVECS];
5554 struct iovec *iovp;
5555 int iovcnt;
5556 int ioflag;
5557 cred_t *savecred, *cr;
5558 bool_t *deleg = &cs->deleg;
5559 nfsstat4 stat;
5560 int in_crit = 0;
5561 caller_context_t ct;
5562
5563 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5564 WRITE4args *, args);
5565
5566 vp = cs->vp;
5567 if (vp == NULL) {
5568 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5569 goto out;
5570 }
5571 if (cs->access == CS_ACCESS_DENIED) {
5572 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5573 goto out;
5574 }
5575
5576 cr = cs->cr;
5577
5578 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5579 deleg, TRUE, &ct)) != NFS4_OK) {
5580 *cs->statusp = resp->status = stat;
5581 goto out;
5582 }
5583
5584 /*
5585 * We have to enter the critical region before calling VOP_RWLOCK
5586 * to avoid a deadlock with ufs.
5587 */
5588 if (nbl_need_check(vp)) {
5589 nbl_start_crit(vp, RW_READER);
5590 in_crit = 1;
5591 if (nbl_conflict(vp, NBL_WRITE,
5592 args->offset, args->data_len, 0, &ct)) {
5593 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5594 goto out;
5595 }
5596 }
5597
5598 bva.va_mask = AT_MODE | AT_UID;
5599 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5600
5601 /*
5602 * If we can't get the attributes, then we can't do the
5603 * right access checking. So, we'll fail the request.
5604 */
5605 if (error) {
5606 *cs->statusp = resp->status = puterrno4(error);
5607 goto out;
5608 }
5609
5610 if (rdonly4(req, cs)) {
5611 *cs->statusp = resp->status = NFS4ERR_ROFS;
5612 goto out;
5613 }
5614
5615 if (vp->v_type != VREG) {
5616 *cs->statusp = resp->status =
5617 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5618 goto out;
5619 }
5620
5621 if (crgetuid(cr) != bva.va_uid &&
5622 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5623 *cs->statusp = resp->status = puterrno4(error);
5624 goto out;
5625 }
5626
5627 if (MANDLOCK(vp, bva.va_mode)) {
5628 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5629 goto out;
5630 }
5631
5632 if (args->data_len == 0) {
5633 *cs->statusp = resp->status = NFS4_OK;
5634 resp->count = 0;
5635 resp->committed = args->stable;
5636 resp->writeverf = Write4verf;
5637 goto out;
5638 }
5639
5640 if (args->mblk != NULL) {
5641 mblk_t *m;
5642 uint_t bytes, round_len;
5643
5644 iovcnt = 0;
5645 bytes = 0;
5646 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5647 for (m = args->mblk;
5648 m != NULL && bytes < round_len;
5649 m = m->b_cont) {
5650 iovcnt++;
5651 bytes += MBLKL(m);
5652 }
5653 #ifdef DEBUG
5654 /* should have ended on an mblk boundary */
5655 if (bytes != round_len) {
5656 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5657 bytes, round_len, args->data_len);
5658 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5659 (void *)args->mblk, (void *)m);
5660 ASSERT(bytes == round_len);
5661 }
5662 #endif
5663 if (iovcnt <= MAX_IOVECS) {
5664 iovp = iov;
5665 } else {
5666 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5667 }
5668 mblk_to_iov(args->mblk, iovcnt, iovp);
5669 } else if (args->rlist != NULL) {
5670 iovcnt = 1;
5671 iovp = iov;
5672 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5673 iovp->iov_len = args->data_len;
5674 } else {
5675 iovcnt = 1;
5676 iovp = iov;
5677 iovp->iov_base = args->data_val;
5678 iovp->iov_len = args->data_len;
5679 }
5680
5681 uio.uio_iov = iovp;
5682 uio.uio_iovcnt = iovcnt;
5683
5684 uio.uio_segflg = UIO_SYSSPACE;
5685 uio.uio_extflg = UIO_COPY_DEFAULT;
5686 uio.uio_loffset = args->offset;
5687 uio.uio_resid = args->data_len;
5688 uio.uio_llimit = curproc->p_fsz_ctl;
5689 rlimit = uio.uio_llimit - args->offset;
5690 if (rlimit < (u_offset_t)uio.uio_resid)
5691 uio.uio_resid = (int)rlimit;
5692
5693 if (args->stable == UNSTABLE4)
5694 ioflag = 0;
5695 else if (args->stable == FILE_SYNC4)
5696 ioflag = FSYNC;
5697 else if (args->stable == DATA_SYNC4)
5698 ioflag = FDSYNC;
5699 else {
5700 if (iovp != iov)
5701 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5702 *cs->statusp = resp->status = NFS4ERR_INVAL;
5703 goto out;
5704 }
5705
5706 /*
5707 * We're changing creds because VM may fault and we need
5708 * the cred of the current thread to be used if quota
5709 * checking is enabled.
5710 */
5711 savecred = curthread->t_cred;
5712 curthread->t_cred = cr;
5713 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5714 curthread->t_cred = savecred;
5715
5716 if (iovp != iov)
5717 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5718
5719 if (error) {
5720 *cs->statusp = resp->status = puterrno4(error);
5721 goto out;
5722 }
5723
5724 *cs->statusp = resp->status = NFS4_OK;
5725 resp->count = args->data_len - uio.uio_resid;
5726
5727 if (ioflag == 0)
5728 resp->committed = UNSTABLE4;
5729 else
5730 resp->committed = FILE_SYNC4;
5731
5732 resp->writeverf = Write4verf;
5733
5734 out:
5735 if (in_crit)
5736 nbl_end_crit(vp);
5737
5738 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5739 WRITE4res *, resp);
5740 }
5741
5742
5743 /* XXX put in a header file */
5744 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5745
5746 void
5747 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5748 struct svc_req *req, cred_t *cr, int *rv)
5749 {
5750 uint_t i;
5751 struct compound_state cs;
5752
5753 if (rv != NULL)
5754 *rv = 0;
5755 rfs4_init_compound_state(&cs);
5756 /*
5757 * Form a reply tag by copying over the reqeuest tag.
5758 */
5759 resp->tag.utf8string_val =
5760 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5761 resp->tag.utf8string_len = args->tag.utf8string_len;
5762 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5763 resp->tag.utf8string_len);
5764
5765 cs.statusp = &resp->status;
5766 cs.req = req;
5767 resp->array = NULL;
5768 resp->array_len = 0;
5769
5770 /*
5771 * XXX for now, minorversion should be zero
5772 */
5773 if (args->minorversion != NFS4_MINORVERSION) {
5774 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5775 &cs, COMPOUND4args *, args);
5776 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5777 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5778 &cs, COMPOUND4res *, resp);
5779 return;
5780 }
5781
5782 if (args->array_len == 0) {
5783 resp->status = NFS4_OK;
5784 return;
5785 }
5786
5787 ASSERT(exi == NULL);
5788 ASSERT(cr == NULL);
5789
5790 cr = crget();
5791 ASSERT(cr != NULL);
5792
5793 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5794 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5795 &cs, COMPOUND4args *, args);
5796 crfree(cr);
5797 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5798 &cs, COMPOUND4res *, resp);
5799 svcerr_badcred(req->rq_xprt);
5800 if (rv != NULL)
5801 *rv = 1;
5802 return;
5803 }
5804 resp->array_len = args->array_len;
5805 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5806 KM_SLEEP);
5807
5808 cs.basecr = cr;
5809
5810 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5811 COMPOUND4args *, args);
5812
5813 /*
5814 * For now, NFS4 compound processing must be protected by
5815 * exported_lock because it can access more than one exportinfo
5816 * per compound and share/unshare can now change multiple
5817 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5818 * per proc (excluding public exinfo), and exi_count design
5819 * is sufficient to protect concurrent execution of NFS2/3
5820 * ops along with unexport. This lock will be removed as
5821 * part of the NFSv4 phase 2 namespace redesign work.
5822 */
5823 rw_enter(&exported_lock, RW_READER);
5824
5825 /*
5826 * If this is the first compound we've seen, we need to start all
5827 * new instances' grace periods.
5828 */
5829 if (rfs4_seen_first_compound == 0) {
5830 rfs4_grace_start_new();
5831 /*
5832 * This must be set after rfs4_grace_start_new(), otherwise
5833 * another thread could proceed past here before the former
5834 * is finished.
5835 */
5836 rfs4_seen_first_compound = 1;
5837 }
5838
5839 for (i = 0; i < args->array_len && cs.cont; i++) {
5840 nfs_argop4 *argop;
5841 nfs_resop4 *resop;
5842 uint_t op;
5843
5844 argop = &args->array[i];
5845 resop = &resp->array[i];
5846 resop->resop = argop->argop;
5847 op = (uint_t)resop->resop;
5848
5849 if (op < rfsv4disp_cnt) {
5850 /*
5851 * Count the individual ops here; NULL and COMPOUND
5852 * are counted in common_dispatch()
5853 */
5854 rfsproccnt_v4_ptr[op].value.ui64++;
5855
5856 NFS4_DEBUG(rfs4_debug > 1,
5857 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5858 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5859 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5860 rfs4_op_string[op], *cs.statusp));
5861 if (*cs.statusp != NFS4_OK)
5862 cs.cont = FALSE;
5863 } else {
5864 /*
5865 * This is effectively dead code since XDR code
5866 * will have already returned BADXDR if op doesn't
5867 * decode to legal value. This only done for a
5868 * day when XDR code doesn't verify v4 opcodes.
5869 */
5870 op = OP_ILLEGAL;
5871 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5872
5873 rfs4_op_illegal(argop, resop, req, &cs);
5874 cs.cont = FALSE;
5875 }
5876
5877 /*
5878 * If not at last op, and if we are to stop, then
5879 * compact the results array.
5880 */
5881 if ((i + 1) < args->array_len && !cs.cont) {
5882 nfs_resop4 *new_res = kmem_alloc(
5883 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5884 bcopy(resp->array,
5885 new_res, (i+1) * sizeof (nfs_resop4));
5886 kmem_free(resp->array,
5887 args->array_len * sizeof (nfs_resop4));
5888
5889 resp->array_len = i + 1;
5890 resp->array = new_res;
5891 }
5892 }
5893
5894 rw_exit(&exported_lock);
5895
5896 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5897 COMPOUND4res *, resp);
5898
5899 if (cs.vp)
5900 VN_RELE(cs.vp);
5901 if (cs.saved_vp)
5902 VN_RELE(cs.saved_vp);
5903 if (cs.saved_fh.nfs_fh4_val)
5904 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5905
5906 if (cs.basecr)
5907 crfree(cs.basecr);
5908 if (cs.cr)
5909 crfree(cs.cr);
5910 /*
5911 * done with this compound request, free the label
5912 */
5913
5914 if (req->rq_label != NULL) {
5915 kmem_free(req->rq_label, sizeof (bslabel_t));
5916 req->rq_label = NULL;
5917 }
5918 }
5919
5920 /*
5921 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5922 * XXX zero out the tag and array values. Need to investigate why the
5923 * XXX calls occur, but at least prevent the panic for now.
5924 */
5925 void
5926 rfs4_compound_free(COMPOUND4res *resp)
5927 {
5928 uint_t i;
5929
5930 if (resp->tag.utf8string_val) {
5931 UTF8STRING_FREE(resp->tag)
5932 }
5933
5934 for (i = 0; i < resp->array_len; i++) {
5935 nfs_resop4 *resop;
5936 uint_t op;
5937
5938 resop = &resp->array[i];
5939 op = (uint_t)resop->resop;
5940 if (op < rfsv4disp_cnt) {
5941 (*rfsv4disptab[op].dis_resfree)(resop);
5942 }
5943 }
5944 if (resp->array != NULL) {
5945 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5946 }
5947 }
5948
5949 /*
5950 * Process the value of the compound request rpc flags, as a bit-AND
5951 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5952 */
5953 void
5954 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5955 {
5956 int i;
5957 int flag = RPC_ALL;
5958
5959 for (i = 0; flag && i < args->array_len; i++) {
5960 uint_t op;
5961
5962 op = (uint_t)args->array[i].argop;
5963
5964 if (op < rfsv4disp_cnt)
5965 flag &= rfsv4disptab[op].dis_flags;
5966 else
5967 flag = 0;
5968 }
5969 *flagp = flag;
5970 }
5971
5972 nfsstat4
5973 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5974 {
5975 nfsstat4 e;
5976
5977 rfs4_dbe_lock(cp->rc_dbe);
5978
5979 if (cp->rc_sysidt != LM_NOSYSID) {
5980 *sp = cp->rc_sysidt;
5981 e = NFS4_OK;
5982
5983 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5984 *sp = cp->rc_sysidt;
5985 e = NFS4_OK;
5986
5987 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5988 "rfs4_client_sysid: allocated 0x%x\n", *sp));
5989 } else
5990 e = NFS4ERR_DELAY;
5991
5992 rfs4_dbe_unlock(cp->rc_dbe);
5993 return (e);
5994 }
5995
5996 #if defined(DEBUG) && ! defined(lint)
5997 static void lock_print(char *str, int operation, struct flock64 *flk)
5998 {
5999 char *op, *type;
6000
6001 switch (operation) {
6002 case F_GETLK: op = "F_GETLK";
6003 break;
6004 case F_SETLK: op = "F_SETLK";
6005 break;
6006 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6007 break;
6008 default: op = "F_UNKNOWN";
6009 break;
6010 }
6011 switch (flk->l_type) {
6012 case F_UNLCK: type = "F_UNLCK";
6013 break;
6014 case F_RDLCK: type = "F_RDLCK";
6015 break;
6016 case F_WRLCK: type = "F_WRLCK";
6017 break;
6018 default: type = "F_UNKNOWN";
6019 break;
6020 }
6021
6022 ASSERT(flk->l_whence == 0);
6023 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6024 str, op, type, (longlong_t)flk->l_start,
6025 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6026 }
6027
6028 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6029 #else
6030 #define LOCK_PRINT(d, s, t, f)
6031 #endif
6032
6033 /*ARGSUSED*/
6034 static bool_t
6035 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6036 {
6037 return (TRUE);
6038 }
6039
6040 /*
6041 * Look up the pathname using the vp in cs as the directory vnode.
6042 * cs->vp will be the vnode for the file on success
6043 */
6044
6045 static nfsstat4
6046 rfs4_lookup(component4 *component, struct svc_req *req,
6047 struct compound_state *cs)
6048 {
6049 char *nm;
6050 uint32_t len;
6051 nfsstat4 status;
6052 struct sockaddr *ca;
6053 char *name;
6054
6055 if (cs->vp == NULL) {
6056 return (NFS4ERR_NOFILEHANDLE);
6057 }
6058 if (cs->vp->v_type != VDIR) {
6059 return (NFS4ERR_NOTDIR);
6060 }
6061
6062 status = utf8_dir_verify(component);
6063 if (status != NFS4_OK)
6064 return (status);
6065
6066 nm = utf8_to_fn(component, &len, NULL);
6067 if (nm == NULL) {
6068 return (NFS4ERR_INVAL);
6069 }
6070
6071 if (len > MAXNAMELEN) {
6072 kmem_free(nm, len);
6073 return (NFS4ERR_NAMETOOLONG);
6074 }
6075
6076 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6077 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6078 MAXPATHLEN + 1);
6079
6080 if (name == NULL) {
6081 kmem_free(nm, len);
6082 return (NFS4ERR_INVAL);
6083 }
6084
6085 status = do_rfs4_op_lookup(name, req, cs);
6086
6087 if (name != nm)
6088 kmem_free(name, MAXPATHLEN + 1);
6089
6090 kmem_free(nm, len);
6091
6092 return (status);
6093 }
6094
6095 static nfsstat4
6096 rfs4_lookupfile(component4 *component, struct svc_req *req,
6097 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6098 {
6099 nfsstat4 status;
6100 vnode_t *dvp = cs->vp;
6101 vattr_t bva, ava, fva;
6102 int error;
6103
6104 /* Get "before" change value */
6105 bva.va_mask = AT_CTIME|AT_SEQ;
6106 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6107 if (error)
6108 return (puterrno4(error));
6109
6110 /* rfs4_lookup may VN_RELE directory */
6111 VN_HOLD(dvp);
6112
6113 status = rfs4_lookup(component, req, cs);
6114 if (status != NFS4_OK) {
6115 VN_RELE(dvp);
6116 return (status);
6117 }
6118
6119 /*
6120 * Get "after" change value, if it fails, simply return the
6121 * before value.
6122 */
6123 ava.va_mask = AT_CTIME|AT_SEQ;
6124 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6125 ava.va_ctime = bva.va_ctime;
6126 ava.va_seq = 0;
6127 }
6128 VN_RELE(dvp);
6129
6130 /*
6131 * Validate the file is a file
6132 */
6133 fva.va_mask = AT_TYPE|AT_MODE;
6134 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6135 if (error)
6136 return (puterrno4(error));
6137
6138 if (fva.va_type != VREG) {
6139 if (fva.va_type == VDIR)
6140 return (NFS4ERR_ISDIR);
6141 if (fva.va_type == VLNK)
6142 return (NFS4ERR_SYMLINK);
6143 return (NFS4ERR_INVAL);
6144 }
6145
6146 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6147 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6148
6149 /*
6150 * It is undefined if VOP_LOOKUP will change va_seq, so
6151 * cinfo.atomic = TRUE only if we have
6152 * non-zero va_seq's, and they have not changed.
6153 */
6154 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6155 cinfo->atomic = TRUE;
6156 else
6157 cinfo->atomic = FALSE;
6158
6159 /* Check for mandatory locking */
6160 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6161 return (check_open_access(access, cs, req));
6162 }
6163
6164 static nfsstat4
6165 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6166 cred_t *cr, vnode_t **vpp, bool_t *created)
6167 {
6168 int error;
6169 nfsstat4 status = NFS4_OK;
6170 vattr_t va;
6171
6172 tryagain:
6173
6174 /*
6175 * The file open mode used is VWRITE. If the client needs
6176 * some other semantic, then it should do the access checking
6177 * itself. It would have been nice to have the file open mode
6178 * passed as part of the arguments.
6179 */
6180
6181 *created = TRUE;
6182 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6183
6184 if (error) {
6185 *created = FALSE;
6186
6187 /*
6188 * If we got something other than file already exists
6189 * then just return this error. Otherwise, we got
6190 * EEXIST. If we were doing a GUARDED create, then
6191 * just return this error. Otherwise, we need to
6192 * make sure that this wasn't a duplicate of an
6193 * exclusive create request.
6194 *
6195 * The assumption is made that a non-exclusive create
6196 * request will never return EEXIST.
6197 */
6198
6199 if (error != EEXIST || mode == GUARDED4) {
6200 status = puterrno4(error);
6201 return (status);
6202 }
6203 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6204 NULL, NULL, NULL);
6205
6206 if (error) {
6207 /*
6208 * We couldn't find the file that we thought that
6209 * we just created. So, we'll just try creating
6210 * it again.
6211 */
6212 if (error == ENOENT)
6213 goto tryagain;
6214
6215 status = puterrno4(error);
6216 return (status);
6217 }
6218
6219 if (mode == UNCHECKED4) {
6220 /* existing object must be regular file */
6221 if ((*vpp)->v_type != VREG) {
6222 if ((*vpp)->v_type == VDIR)
6223 status = NFS4ERR_ISDIR;
6224 else if ((*vpp)->v_type == VLNK)
6225 status = NFS4ERR_SYMLINK;
6226 else
6227 status = NFS4ERR_INVAL;
6228 VN_RELE(*vpp);
6229 return (status);
6230 }
6231
6232 return (NFS4_OK);
6233 }
6234
6235 /* Check for duplicate request */
6236 va.va_mask = AT_MTIME;
6237 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6238 if (!error) {
6239 /* We found the file */
6240 const timestruc_t *mtime = &vap->va_mtime;
6241
6242 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6243 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6244 /* but its not our creation */
6245 VN_RELE(*vpp);
6246 return (NFS4ERR_EXIST);
6247 }
6248 *created = TRUE; /* retrans of create == created */
6249 return (NFS4_OK);
6250 }
6251 VN_RELE(*vpp);
6252 return (NFS4ERR_EXIST);
6253 }
6254
6255 return (NFS4_OK);
6256 }
6257
6258 static nfsstat4
6259 check_open_access(uint32_t access, struct compound_state *cs,
6260 struct svc_req *req)
6261 {
6262 int error;
6263 vnode_t *vp;
6264 bool_t readonly;
6265 cred_t *cr = cs->cr;
6266
6267 /* For now we don't allow mandatory locking as per V2/V3 */
6268 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6269 return (NFS4ERR_ACCESS);
6270 }
6271
6272 vp = cs->vp;
6273 ASSERT(cr != NULL && vp->v_type == VREG);
6274
6275 /*
6276 * If the file system is exported read only and we are trying
6277 * to open for write, then return NFS4ERR_ROFS
6278 */
6279
6280 readonly = rdonly4(req, cs);
6281
6282 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6283 return (NFS4ERR_ROFS);
6284
6285 if (access & OPEN4_SHARE_ACCESS_READ) {
6286 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6287 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6288 return (NFS4ERR_ACCESS);
6289 }
6290 }
6291
6292 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6293 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6294 if (error)
6295 return (NFS4ERR_ACCESS);
6296 }
6297
6298 return (NFS4_OK);
6299 }
6300
6301 static nfsstat4
6302 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6303 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6304 {
6305 struct nfs4_svgetit_arg sarg;
6306 struct nfs4_ntov_table ntov;
6307
6308 bool_t ntov_table_init = FALSE;
6309 struct statvfs64 sb;
6310 nfsstat4 status;
6311 vnode_t *vp;
6312 vattr_t bva, ava, iva, cva, *vap;
6313 vnode_t *dvp;
6314 timespec32_t *mtime;
6315 char *nm = NULL;
6316 uint_t buflen;
6317 bool_t created;
6318 bool_t setsize = FALSE;
6319 len_t reqsize;
6320 int error;
6321 bool_t trunc;
6322 caller_context_t ct;
6323 component4 *component;
6324 bslabel_t *clabel;
6325 struct sockaddr *ca;
6326 char *name = NULL;
6327
6328 sarg.sbp = &sb;
6329 sarg.is_referral = B_FALSE;
6330
6331 dvp = cs->vp;
6332
6333 /* Check if the file system is read only */
6334 if (rdonly4(req, cs))
6335 return (NFS4ERR_ROFS);
6336
6337 /* check the label of including directory */
6338 if (is_system_labeled()) {
6339 ASSERT(req->rq_label != NULL);
6340 clabel = req->rq_label;
6341 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6342 "got client label from request(1)",
6343 struct svc_req *, req);
6344 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6345 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6346 cs->exi)) {
6347 return (NFS4ERR_ACCESS);
6348 }
6349 }
6350 }
6351
6352 /*
6353 * Get the last component of path name in nm. cs will reference
6354 * the including directory on success.
6355 */
6356 component = &args->open_claim4_u.file;
6357 status = utf8_dir_verify(component);
6358 if (status != NFS4_OK)
6359 return (status);
6360
6361 nm = utf8_to_fn(component, &buflen, NULL);
6362
6363 if (nm == NULL)
6364 return (NFS4ERR_RESOURCE);
6365
6366 if (buflen > MAXNAMELEN) {
6367 kmem_free(nm, buflen);
6368 return (NFS4ERR_NAMETOOLONG);
6369 }
6370
6371 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6372 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6373 if (error) {
6374 kmem_free(nm, buflen);
6375 return (puterrno4(error));
6376 }
6377
6378 if (bva.va_type != VDIR) {
6379 kmem_free(nm, buflen);
6380 return (NFS4ERR_NOTDIR);
6381 }
6382
6383 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6384
6385 switch (args->mode) {
6386 case GUARDED4:
6387 /*FALLTHROUGH*/
6388 case UNCHECKED4:
6389 nfs4_ntov_table_init(&ntov);
6390 ntov_table_init = TRUE;
6391
6392 *attrset = 0;
6393 status = do_rfs4_set_attrs(attrset,
6394 &args->createhow4_u.createattrs,
6395 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6396
6397 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6398 sarg.vap->va_type != VREG) {
6399 if (sarg.vap->va_type == VDIR)
6400 status = NFS4ERR_ISDIR;
6401 else if (sarg.vap->va_type == VLNK)
6402 status = NFS4ERR_SYMLINK;
6403 else
6404 status = NFS4ERR_INVAL;
6405 }
6406
6407 if (status != NFS4_OK) {
6408 kmem_free(nm, buflen);
6409 nfs4_ntov_table_free(&ntov, &sarg);
6410 *attrset = 0;
6411 return (status);
6412 }
6413
6414 vap = sarg.vap;
6415 vap->va_type = VREG;
6416 vap->va_mask |= AT_TYPE;
6417
6418 if ((vap->va_mask & AT_MODE) == 0) {
6419 vap->va_mask |= AT_MODE;
6420 vap->va_mode = (mode_t)0600;
6421 }
6422
6423 if (vap->va_mask & AT_SIZE) {
6424
6425 /* Disallow create with a non-zero size */
6426
6427 if ((reqsize = sarg.vap->va_size) != 0) {
6428 kmem_free(nm, buflen);
6429 nfs4_ntov_table_free(&ntov, &sarg);
6430 *attrset = 0;
6431 return (NFS4ERR_INVAL);
6432 }
6433 setsize = TRUE;
6434 }
6435 break;
6436
6437 case EXCLUSIVE4:
6438 /* prohibit EXCL create of named attributes */
6439 if (dvp->v_flag & V_XATTRDIR) {
6440 kmem_free(nm, buflen);
6441 *attrset = 0;
6442 return (NFS4ERR_INVAL);
6443 }
6444
6445 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6446 cva.va_type = VREG;
6447 /*
6448 * Ensure no time overflows. Assumes underlying
6449 * filesystem supports at least 32 bits.
6450 * Truncate nsec to usec resolution to allow valid
6451 * compares even if the underlying filesystem truncates.
6452 */
6453 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6454 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6455 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6456 cva.va_mode = (mode_t)0;
6457 vap = &cva;
6458
6459 /*
6460 * For EXCL create, attrset is set to the server attr
6461 * used to cache the client's verifier.
6462 */
6463 *attrset = FATTR4_TIME_MODIFY_MASK;
6464 break;
6465 }
6466
6467 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6468 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6469 MAXPATHLEN + 1);
6470
6471 if (name == NULL) {
6472 kmem_free(nm, buflen);
6473 return (NFS4ERR_SERVERFAULT);
6474 }
6475
6476 status = create_vnode(dvp, name, vap, args->mode,
6477 cs->cr, &vp, &created);
6478 if (nm != name)
6479 kmem_free(name, MAXPATHLEN + 1);
6480 kmem_free(nm, buflen);
6481
6482 if (status != NFS4_OK) {
6483 if (ntov_table_init)
6484 nfs4_ntov_table_free(&ntov, &sarg);
6485 *attrset = 0;
6486 return (status);
6487 }
6488
6489 trunc = (setsize && !created);
6490
6491 if (args->mode != EXCLUSIVE4) {
6492 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6493
6494 /*
6495 * True verification that object was created with correct
6496 * attrs is impossible. The attrs could have been changed
6497 * immediately after object creation. If attributes did
6498 * not verify, the only recourse for the server is to
6499 * destroy the object. Maybe if some attrs (like gid)
6500 * are set incorrectly, the object should be destroyed;
6501 * however, seems bad as a default policy. Do we really
6502 * want to destroy an object over one of the times not
6503 * verifying correctly? For these reasons, the server
6504 * currently sets bits in attrset for createattrs
6505 * that were set; however, no verification is done.
6506 *
6507 * vmask_to_nmask accounts for vattr bits set on create
6508 * [do_rfs4_set_attrs() only sets resp bits for
6509 * non-vattr/vfs bits.]
6510 * Mask off any bits we set by default so as not to return
6511 * more attrset bits than were requested in createattrs
6512 */
6513 if (created) {
6514 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6515 *attrset &= createmask;
6516 } else {
6517 /*
6518 * We did not create the vnode (we tried but it
6519 * already existed). In this case, the only createattr
6520 * that the spec allows the server to set is size,
6521 * and even then, it can only be set if it is 0.
6522 */
6523 *attrset = 0;
6524 if (trunc)
6525 *attrset = FATTR4_SIZE_MASK;
6526 }
6527 }
6528 if (ntov_table_init)
6529 nfs4_ntov_table_free(&ntov, &sarg);
6530
6531 /*
6532 * Get the initial "after" sequence number, if it fails,
6533 * set to zero, time to before.
6534 */
6535 iva.va_mask = AT_CTIME|AT_SEQ;
6536 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6537 iva.va_seq = 0;
6538 iva.va_ctime = bva.va_ctime;
6539 }
6540
6541 /*
6542 * create_vnode attempts to create the file exclusive,
6543 * if it already exists the VOP_CREATE will fail and
6544 * may not increase va_seq. It is atomic if
6545 * we haven't changed the directory, but if it has changed
6546 * we don't know what changed it.
6547 */
6548 if (!created) {
6549 if (bva.va_seq && iva.va_seq &&
6550 bva.va_seq == iva.va_seq)
6551 cinfo->atomic = TRUE;
6552 else
6553 cinfo->atomic = FALSE;
6554 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6555 } else {
6556 /*
6557 * The entry was created, we need to sync the
6558 * directory metadata.
6559 */
6560 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6561
6562 /*
6563 * Get "after" change value, if it fails, simply return the
6564 * before value.
6565 */
6566 ava.va_mask = AT_CTIME|AT_SEQ;
6567 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6568 ava.va_ctime = bva.va_ctime;
6569 ava.va_seq = 0;
6570 }
6571
6572 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6573
6574 /*
6575 * The cinfo->atomic = TRUE only if we have
6576 * non-zero va_seq's, and it has incremented by exactly one
6577 * during the create_vnode and it didn't
6578 * change during the VOP_FSYNC.
6579 */
6580 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6581 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6582 cinfo->atomic = TRUE;
6583 else
6584 cinfo->atomic = FALSE;
6585 }
6586
6587 /* Check for mandatory locking and that the size gets set. */
6588 cva.va_mask = AT_MODE;
6589 if (setsize)
6590 cva.va_mask |= AT_SIZE;
6591
6592 /* Assume the worst */
6593 cs->mandlock = TRUE;
6594
6595 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6596 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6597
6598 /*
6599 * Truncate the file if necessary; this would be
6600 * the case for create over an existing file.
6601 */
6602
6603 if (trunc) {
6604 int in_crit = 0;
6605 rfs4_file_t *fp;
6606 bool_t create = FALSE;
6607
6608 /*
6609 * We are writing over an existing file.
6610 * Check to see if we need to recall a delegation.
6611 */
6612 rfs4_hold_deleg_policy();
6613 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6614 if (rfs4_check_delegated_byfp(FWRITE, fp,
6615 (reqsize == 0), FALSE, FALSE, &clientid)) {
6616 rfs4_file_rele(fp);
6617 rfs4_rele_deleg_policy();
6618 VN_RELE(vp);
6619 *attrset = 0;
6620 return (NFS4ERR_DELAY);
6621 }
6622 rfs4_file_rele(fp);
6623 }
6624 rfs4_rele_deleg_policy();
6625
6626 if (nbl_need_check(vp)) {
6627 in_crit = 1;
6628
6629 ASSERT(reqsize == 0);
6630
6631 nbl_start_crit(vp, RW_READER);
6632 if (nbl_conflict(vp, NBL_WRITE, 0,
6633 cva.va_size, 0, NULL)) {
6634 in_crit = 0;
6635 nbl_end_crit(vp);
6636 VN_RELE(vp);
6637 *attrset = 0;
6638 return (NFS4ERR_ACCESS);
6639 }
6640 }
6641 ct.cc_sysid = 0;
6642 ct.cc_pid = 0;
6643 ct.cc_caller_id = nfs4_srv_caller_id;
6644 ct.cc_flags = CC_DONTBLOCK;
6645
6646 cva.va_mask = AT_SIZE;
6647 cva.va_size = reqsize;
6648 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6649 if (in_crit)
6650 nbl_end_crit(vp);
6651 }
6652 }
6653
6654 error = makefh4(&cs->fh, vp, cs->exi);
6655
6656 /*
6657 * Force modified data and metadata out to stable storage.
6658 */
6659 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6660
6661 if (error) {
6662 VN_RELE(vp);
6663 *attrset = 0;
6664 return (puterrno4(error));
6665 }
6666
6667 /* if parent dir is attrdir, set namedattr fh flag */
6668 if (dvp->v_flag & V_XATTRDIR)
6669 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6670
6671 if (cs->vp)
6672 VN_RELE(cs->vp);
6673
6674 cs->vp = vp;
6675
6676 /*
6677 * if we did not create the file, we will need to check
6678 * the access bits on the file
6679 */
6680
6681 if (!created) {
6682 if (setsize)
6683 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6684 status = check_open_access(args->share_access, cs, req);
6685 if (status != NFS4_OK)
6686 *attrset = 0;
6687 }
6688 return (status);
6689 }
6690
6691 /*ARGSUSED*/
6692 static void
6693 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6694 rfs4_openowner_t *oo, delegreq_t deleg,
6695 uint32_t access, uint32_t deny,
6696 OPEN4res *resp, int deleg_cur)
6697 {
6698 /* XXX Currently not using req */
6699 rfs4_state_t *sp;
6700 rfs4_file_t *fp;
6701 bool_t screate = TRUE;
6702 bool_t fcreate = TRUE;
6703 uint32_t open_a, share_a;
6704 uint32_t open_d, share_d;
6705 rfs4_deleg_state_t *dsp;
6706 sysid_t sysid;
6707 nfsstat4 status;
6708 caller_context_t ct;
6709 int fflags = 0;
6710 int recall = 0;
6711 int err;
6712 int first_open;
6713
6714 /* get the file struct and hold a lock on it during initial open */
6715 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6716 if (fp == NULL) {
6717 resp->status = NFS4ERR_RESOURCE;
6718 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6719 return;
6720 }
6721
6722 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6723 if (sp == NULL) {
6724 resp->status = NFS4ERR_RESOURCE;
6725 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6726 /* No need to keep any reference */
6727 rw_exit(&fp->rf_file_rwlock);
6728 rfs4_file_rele(fp);
6729 return;
6730 }
6731
6732 /* try to get the sysid before continuing */
6733 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6734 resp->status = status;
6735 rfs4_file_rele(fp);
6736 /* Not a fully formed open; "close" it */
6737 if (screate == TRUE)
6738 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6739 rfs4_state_rele(sp);
6740 return;
6741 }
6742
6743 /* Calculate the fflags for this OPEN. */
6744 if (access & OPEN4_SHARE_ACCESS_READ)
6745 fflags |= FREAD;
6746 if (access & OPEN4_SHARE_ACCESS_WRITE)
6747 fflags |= FWRITE;
6748
6749 rfs4_dbe_lock(sp->rs_dbe);
6750
6751 /*
6752 * Calculate the new deny and access mode that this open is adding to
6753 * the file for this open owner;
6754 */
6755 open_d = (deny & ~sp->rs_open_deny);
6756 open_a = (access & ~sp->rs_open_access);
6757
6758 /*
6759 * Calculate the new share access and share deny modes that this open
6760 * is adding to the file for this open owner;
6761 */
6762 share_a = (access & ~sp->rs_share_access);
6763 share_d = (deny & ~sp->rs_share_deny);
6764
6765 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6766
6767 /*
6768 * Check to see the client has already sent an open for this
6769 * open owner on this file with the same share/deny modes.
6770 * If so, we don't need to check for a conflict and we don't
6771 * need to add another shrlock. If not, then we need to
6772 * check for conflicts in deny and access before checking for
6773 * conflicts in delegation. We don't want to recall a
6774 * delegation based on an open that will eventually fail based
6775 * on shares modes.
6776 */
6777
6778 if (share_a || share_d) {
6779 if ((err = rfs4_share(sp, access, deny)) != 0) {
6780 rfs4_dbe_unlock(sp->rs_dbe);
6781 resp->status = err;
6782
6783 rfs4_file_rele(fp);
6784 /* Not a fully formed open; "close" it */
6785 if (screate == TRUE)
6786 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6787 rfs4_state_rele(sp);
6788 return;
6789 }
6790 }
6791
6792 rfs4_dbe_lock(fp->rf_dbe);
6793
6794 /*
6795 * Check to see if this file is delegated and if so, if a
6796 * recall needs to be done.
6797 */
6798 if (rfs4_check_recall(sp, access)) {
6799 rfs4_dbe_unlock(fp->rf_dbe);
6800 rfs4_dbe_unlock(sp->rs_dbe);
6801 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6802 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6803 rfs4_dbe_lock(sp->rs_dbe);
6804
6805 /* if state closed while lock was dropped */
6806 if (sp->rs_closed) {
6807 if (share_a || share_d)
6808 (void) rfs4_unshare(sp);
6809 rfs4_dbe_unlock(sp->rs_dbe);
6810 rfs4_file_rele(fp);
6811 /* Not a fully formed open; "close" it */
6812 if (screate == TRUE)
6813 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6814 rfs4_state_rele(sp);
6815 resp->status = NFS4ERR_OLD_STATEID;
6816 return;
6817 }
6818
6819 rfs4_dbe_lock(fp->rf_dbe);
6820 /* Let's see if the delegation was returned */
6821 if (rfs4_check_recall(sp, access)) {
6822 rfs4_dbe_unlock(fp->rf_dbe);
6823 if (share_a || share_d)
6824 (void) rfs4_unshare(sp);
6825 rfs4_dbe_unlock(sp->rs_dbe);
6826 rfs4_file_rele(fp);
6827 rfs4_update_lease(sp->rs_owner->ro_client);
6828
6829 /* Not a fully formed open; "close" it */
6830 if (screate == TRUE)
6831 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6832 rfs4_state_rele(sp);
6833 resp->status = NFS4ERR_DELAY;
6834 return;
6835 }
6836 }
6837 /*
6838 * the share check passed and any delegation conflict has been
6839 * taken care of, now call vop_open.
6840 * if this is the first open then call vop_open with fflags.
6841 * if not, call vn_open_upgrade with just the upgrade flags.
6842 *
6843 * if the file has been opened already, it will have the current
6844 * access mode in the state struct. if it has no share access, then
6845 * this is a new open.
6846 *
6847 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6848 * call VOP_OPEN(), just do the open upgrade.
6849 */
6850 if (first_open && !deleg_cur) {
6851 ct.cc_sysid = sysid;
6852 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6853 ct.cc_caller_id = nfs4_srv_caller_id;
6854 ct.cc_flags = CC_DONTBLOCK;
6855 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6856 if (err) {
6857 rfs4_dbe_unlock(fp->rf_dbe);
6858 if (share_a || share_d)
6859 (void) rfs4_unshare(sp);
6860 rfs4_dbe_unlock(sp->rs_dbe);
6861 rfs4_file_rele(fp);
6862
6863 /* Not a fully formed open; "close" it */
6864 if (screate == TRUE)
6865 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6866 rfs4_state_rele(sp);
6867 /* check if a monitor detected a delegation conflict */
6868 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6869 resp->status = NFS4ERR_DELAY;
6870 else
6871 resp->status = NFS4ERR_SERVERFAULT;
6872 return;
6873 }
6874 } else { /* open upgrade */
6875 /*
6876 * calculate the fflags for the new mode that is being added
6877 * by this upgrade.
6878 */
6879 fflags = 0;
6880 if (open_a & OPEN4_SHARE_ACCESS_READ)
6881 fflags |= FREAD;
6882 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6883 fflags |= FWRITE;
6884 vn_open_upgrade(cs->vp, fflags);
6885 }
6886 sp->rs_open_access |= access;
6887 sp->rs_open_deny |= deny;
6888
6889 if (open_d & OPEN4_SHARE_DENY_READ)
6890 fp->rf_deny_read++;
6891 if (open_d & OPEN4_SHARE_DENY_WRITE)
6892 fp->rf_deny_write++;
6893 fp->rf_share_deny |= deny;
6894
6895 if (open_a & OPEN4_SHARE_ACCESS_READ)
6896 fp->rf_access_read++;
6897 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6898 fp->rf_access_write++;
6899 fp->rf_share_access |= access;
6900
6901 /*
6902 * Check for delegation here. if the deleg argument is not
6903 * DELEG_ANY, then this is a reclaim from a client and
6904 * we must honor the delegation requested. If necessary we can
6905 * set the recall flag.
6906 */
6907
6908 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6909
6910 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6911
6912 next_stateid(&sp->rs_stateid);
6913
6914 resp->stateid = sp->rs_stateid.stateid;
6915
6916 rfs4_dbe_unlock(fp->rf_dbe);
6917 rfs4_dbe_unlock(sp->rs_dbe);
6918
6919 if (dsp) {
6920 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6921 rfs4_deleg_state_rele(dsp);
6922 }
6923
6924 rfs4_file_rele(fp);
6925 rfs4_state_rele(sp);
6926
6927 resp->status = NFS4_OK;
6928 }
6929
6930 /*ARGSUSED*/
6931 static void
6932 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6933 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6934 {
6935 change_info4 *cinfo = &resp->cinfo;
6936 bitmap4 *attrset = &resp->attrset;
6937
6938 if (args->opentype == OPEN4_NOCREATE)
6939 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6940 req, cs, args->share_access, cinfo);
6941 else {
6942 /* inhibit delegation grants during exclusive create */
6943
6944 if (args->mode == EXCLUSIVE4)
6945 rfs4_disable_delegation();
6946
6947 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6948 oo->ro_client->rc_clientid);
6949 }
6950
6951 if (resp->status == NFS4_OK) {
6952
6953 /* cs->vp cs->fh now reference the desired file */
6954
6955 rfs4_do_open(cs, req, oo,
6956 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6957 args->share_access, args->share_deny, resp, 0);
6958
6959 /*
6960 * If rfs4_createfile set attrset, we must
6961 * clear this attrset before the response is copied.
6962 */
6963 if (resp->status != NFS4_OK && resp->attrset) {
6964 resp->attrset = 0;
6965 }
6966 }
6967 else
6968 *cs->statusp = resp->status;
6969
6970 if (args->mode == EXCLUSIVE4)
6971 rfs4_enable_delegation();
6972 }
6973
6974 /*ARGSUSED*/
6975 static void
6976 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6977 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6978 {
6979 change_info4 *cinfo = &resp->cinfo;
6980 vattr_t va;
6981 vtype_t v_type = cs->vp->v_type;
6982 int error = 0;
6983
6984 /* Verify that we have a regular file */
6985 if (v_type != VREG) {
6986 if (v_type == VDIR)
6987 resp->status = NFS4ERR_ISDIR;
6988 else if (v_type == VLNK)
6989 resp->status = NFS4ERR_SYMLINK;
6990 else
6991 resp->status = NFS4ERR_INVAL;
6992 return;
6993 }
6994
6995 va.va_mask = AT_MODE|AT_UID;
6996 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
6997 if (error) {
6998 resp->status = puterrno4(error);
6999 return;
7000 }
7001
7002 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7003
7004 /*
7005 * Check if we have access to the file, Note the the file
7006 * could have originally been open UNCHECKED or GUARDED
7007 * with mode bits that will now fail, but there is nothing
7008 * we can really do about that except in the case that the
7009 * owner of the file is the one requesting the open.
7010 */
7011 if (crgetuid(cs->cr) != va.va_uid) {
7012 resp->status = check_open_access(args->share_access, cs, req);
7013 if (resp->status != NFS4_OK) {
7014 return;
7015 }
7016 }
7017
7018 /*
7019 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7020 */
7021 cinfo->before = 0;
7022 cinfo->after = 0;
7023 cinfo->atomic = FALSE;
7024
7025 rfs4_do_open(cs, req, oo,
7026 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7027 args->share_access, args->share_deny, resp, 0);
7028 }
7029
7030 static void
7031 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7032 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7033 {
7034 int error;
7035 nfsstat4 status;
7036 stateid4 stateid =
7037 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7038 rfs4_deleg_state_t *dsp;
7039
7040 /*
7041 * Find the state info from the stateid and confirm that the
7042 * file is delegated. If the state openowner is the same as
7043 * the supplied openowner we're done. If not, get the file
7044 * info from the found state info. Use that file info to
7045 * create the state for this lock owner. Note solaris doen't
7046 * really need the pathname to find the file. We may want to
7047 * lookup the pathname and make sure that the vp exist and
7048 * matches the vp in the file structure. However it is
7049 * possible that the pathname nolonger exists (local process
7050 * unlinks the file), so this may not be that useful.
7051 */
7052
7053 status = rfs4_get_deleg_state(&stateid, &dsp);
7054 if (status != NFS4_OK) {
7055 resp->status = status;
7056 return;
7057 }
7058
7059 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7060
7061 /*
7062 * New lock owner, create state. Since this was probably called
7063 * in response to a CB_RECALL we set deleg to DELEG_NONE
7064 */
7065
7066 ASSERT(cs->vp != NULL);
7067 VN_RELE(cs->vp);
7068 VN_HOLD(dsp->rds_finfo->rf_vp);
7069 cs->vp = dsp->rds_finfo->rf_vp;
7070
7071 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7072 rfs4_deleg_state_rele(dsp);
7073 *cs->statusp = resp->status = puterrno4(error);
7074 return;
7075 }
7076
7077 /* Mark progress for delegation returns */
7078 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7079 rfs4_deleg_state_rele(dsp);
7080 rfs4_do_open(cs, req, oo, DELEG_NONE,
7081 args->share_access, args->share_deny, resp, 1);
7082 }
7083
7084 /*ARGSUSED*/
7085 static void
7086 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7087 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7088 {
7089 /*
7090 * Lookup the pathname, it must already exist since this file
7091 * was delegated.
7092 *
7093 * Find the file and state info for this vp and open owner pair.
7094 * check that they are in fact delegated.
7095 * check that the state access and deny modes are the same.
7096 *
7097 * Return the delgation possibly seting the recall flag.
7098 */
7099 rfs4_file_t *fp;
7100 rfs4_state_t *sp;
7101 bool_t create = FALSE;
7102 bool_t dcreate = FALSE;
7103 rfs4_deleg_state_t *dsp;
7104 nfsace4 *ace;
7105
7106 /* Note we ignore oflags */
7107 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7108 req, cs, args->share_access, &resp->cinfo);
7109
7110 if (resp->status != NFS4_OK) {
7111 return;
7112 }
7113
7114 /* get the file struct and hold a lock on it during initial open */
7115 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7116 if (fp == NULL) {
7117 resp->status = NFS4ERR_RESOURCE;
7118 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7119 return;
7120 }
7121
7122 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7123 if (sp == NULL) {
7124 resp->status = NFS4ERR_SERVERFAULT;
7125 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7126 rw_exit(&fp->rf_file_rwlock);
7127 rfs4_file_rele(fp);
7128 return;
7129 }
7130
7131 rfs4_dbe_lock(sp->rs_dbe);
7132 rfs4_dbe_lock(fp->rf_dbe);
7133 if (args->share_access != sp->rs_share_access ||
7134 args->share_deny != sp->rs_share_deny ||
7135 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7136 NFS4_DEBUG(rfs4_debug,
7137 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7138 rfs4_dbe_unlock(fp->rf_dbe);
7139 rfs4_dbe_unlock(sp->rs_dbe);
7140 rfs4_file_rele(fp);
7141 rfs4_state_rele(sp);
7142 resp->status = NFS4ERR_SERVERFAULT;
7143 return;
7144 }
7145 rfs4_dbe_unlock(fp->rf_dbe);
7146 rfs4_dbe_unlock(sp->rs_dbe);
7147
7148 dsp = rfs4_finddeleg(sp, &dcreate);
7149 if (dsp == NULL) {
7150 rfs4_state_rele(sp);
7151 rfs4_file_rele(fp);
7152 resp->status = NFS4ERR_SERVERFAULT;
7153 return;
7154 }
7155
7156 next_stateid(&sp->rs_stateid);
7157
7158 resp->stateid = sp->rs_stateid.stateid;
7159
7160 resp->delegation.delegation_type = dsp->rds_dtype;
7161
7162 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7163 open_read_delegation4 *rv =
7164 &resp->delegation.open_delegation4_u.read;
7165
7166 rv->stateid = dsp->rds_delegid.stateid;
7167 rv->recall = FALSE; /* no policy in place to set to TRUE */
7168 ace = &rv->permissions;
7169 } else {
7170 open_write_delegation4 *rv =
7171 &resp->delegation.open_delegation4_u.write;
7172
7173 rv->stateid = dsp->rds_delegid.stateid;
7174 rv->recall = FALSE; /* no policy in place to set to TRUE */
7175 ace = &rv->permissions;
7176 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7177 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7178 }
7179
7180 /* XXX For now */
7181 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7182 ace->flag = 0;
7183 ace->access_mask = 0;
7184 ace->who.utf8string_len = 0;
7185 ace->who.utf8string_val = 0;
7186
7187 rfs4_deleg_state_rele(dsp);
7188 rfs4_state_rele(sp);
7189 rfs4_file_rele(fp);
7190 }
7191
7192 typedef enum {
7193 NFS4_CHKSEQ_OKAY = 0,
7194 NFS4_CHKSEQ_REPLAY = 1,
7195 NFS4_CHKSEQ_BAD = 2
7196 } rfs4_chkseq_t;
7197
7198 /*
7199 * Generic function for sequence number checks.
7200 */
7201 static rfs4_chkseq_t
7202 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7203 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7204 {
7205 /* Same sequence ids and matching operations? */
7206 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7207 if (copyres == TRUE) {
7208 rfs4_free_reply(resop);
7209 rfs4_copy_reply(resop, lastop);
7210 }
7211 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7212 "Replayed SEQID %d\n", seqid));
7213 return (NFS4_CHKSEQ_REPLAY);
7214 }
7215
7216 /* If the incoming sequence is not the next expected then it is bad */
7217 if (rqst_seq != seqid + 1) {
7218 if (rqst_seq == seqid) {
7219 NFS4_DEBUG(rfs4_debug,
7220 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7221 "but last op was %d current op is %d\n",
7222 lastop->resop, resop->resop));
7223 return (NFS4_CHKSEQ_BAD);
7224 }
7225 NFS4_DEBUG(rfs4_debug,
7226 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7227 rqst_seq, seqid));
7228 return (NFS4_CHKSEQ_BAD);
7229 }
7230
7231 /* Everything okay -- next expected */
7232 return (NFS4_CHKSEQ_OKAY);
7233 }
7234
7235
7236 static rfs4_chkseq_t
7237 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7238 {
7239 rfs4_chkseq_t rc;
7240
7241 rfs4_dbe_lock(op->ro_dbe);
7242 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7243 TRUE);
7244 rfs4_dbe_unlock(op->ro_dbe);
7245
7246 if (rc == NFS4_CHKSEQ_OKAY)
7247 rfs4_update_lease(op->ro_client);
7248
7249 return (rc);
7250 }
7251
7252 static rfs4_chkseq_t
7253 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7254 {
7255 rfs4_chkseq_t rc;
7256
7257 rfs4_dbe_lock(op->ro_dbe);
7258 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7259 olo_seqid, resop, FALSE);
7260 rfs4_dbe_unlock(op->ro_dbe);
7261
7262 return (rc);
7263 }
7264
7265 static rfs4_chkseq_t
7266 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7267 {
7268 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7269
7270 rfs4_dbe_lock(lsp->rls_dbe);
7271 if (!lsp->rls_skip_seqid_check)
7272 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7273 resop, TRUE);
7274 rfs4_dbe_unlock(lsp->rls_dbe);
7275
7276 return (rc);
7277 }
7278
7279 static void
7280 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7281 struct svc_req *req, struct compound_state *cs)
7282 {
7283 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7284 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7285 open_owner4 *owner = &args->owner;
7286 open_claim_type4 claim = args->claim;
7287 rfs4_client_t *cp;
7288 rfs4_openowner_t *oo;
7289 bool_t create;
7290 bool_t replay = FALSE;
7291 int can_reclaim;
7292
7293 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7294 OPEN4args *, args);
7295
7296 if (cs->vp == NULL) {
7297 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7298 goto end;
7299 }
7300
7301 /*
7302 * Need to check clientid and lease expiration first based on
7303 * error ordering and incrementing sequence id.
7304 */
7305 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7306 if (cp == NULL) {
7307 *cs->statusp = resp->status =
7308 rfs4_check_clientid(&owner->clientid, 0);
7309 goto end;
7310 }
7311
7312 if (rfs4_lease_expired(cp)) {
7313 rfs4_client_close(cp);
7314 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7315 goto end;
7316 }
7317 can_reclaim = cp->rc_can_reclaim;
7318
7319 /*
7320 * Find the open_owner for use from this point forward. Take
7321 * care in updating the sequence id based on the type of error
7322 * being returned.
7323 */
7324 retry:
7325 create = TRUE;
7326 oo = rfs4_findopenowner(owner, &create, args->seqid);
7327 if (oo == NULL) {
7328 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7329 rfs4_client_rele(cp);
7330 goto end;
7331 }
7332
7333 /* Hold off access to the sequence space while the open is done */
7334 rfs4_sw_enter(&oo->ro_sw);
7335
7336 /*
7337 * If the open_owner existed before at the server, then check
7338 * the sequence id.
7339 */
7340 if (!create && !oo->ro_postpone_confirm) {
7341 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7342 case NFS4_CHKSEQ_BAD:
7343 if ((args->seqid > oo->ro_open_seqid) &&
7344 oo->ro_need_confirm) {
7345 rfs4_free_opens(oo, TRUE, FALSE);
7346 rfs4_sw_exit(&oo->ro_sw);
7347 rfs4_openowner_rele(oo);
7348 goto retry;
7349 }
7350 resp->status = NFS4ERR_BAD_SEQID;
7351 goto out;
7352 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7353 replay = TRUE;
7354 goto out;
7355 default:
7356 break;
7357 }
7358
7359 /*
7360 * Sequence was ok and open owner exists
7361 * check to see if we have yet to see an
7362 * open_confirm.
7363 */
7364 if (oo->ro_need_confirm) {
7365 rfs4_free_opens(oo, TRUE, FALSE);
7366 rfs4_sw_exit(&oo->ro_sw);
7367 rfs4_openowner_rele(oo);
7368 goto retry;
7369 }
7370 }
7371 /* Grace only applies to regular-type OPENs */
7372 if (rfs4_clnt_in_grace(cp) &&
7373 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7374 *cs->statusp = resp->status = NFS4ERR_GRACE;
7375 goto out;
7376 }
7377
7378 /*
7379 * If previous state at the server existed then can_reclaim
7380 * will be set. If not reply NFS4ERR_NO_GRACE to the
7381 * client.
7382 */
7383 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7384 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7385 goto out;
7386 }
7387
7388
7389 /*
7390 * Reject the open if the client has missed the grace period
7391 */
7392 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7393 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7394 goto out;
7395 }
7396
7397 /* Couple of up-front bookkeeping items */
7398 if (oo->ro_need_confirm) {
7399 /*
7400 * If this is a reclaim OPEN then we should not ask
7401 * for a confirmation of the open_owner per the
7402 * protocol specification.
7403 */
7404 if (claim == CLAIM_PREVIOUS)
7405 oo->ro_need_confirm = FALSE;
7406 else
7407 resp->rflags |= OPEN4_RESULT_CONFIRM;
7408 }
7409 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7410
7411 /*
7412 * If there is an unshared filesystem mounted on this vnode,
7413 * do not allow to open/create in this directory.
7414 */
7415 if (vn_ismntpt(cs->vp)) {
7416 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7417 goto out;
7418 }
7419
7420 /*
7421 * access must READ, WRITE, or BOTH. No access is invalid.
7422 * deny can be READ, WRITE, BOTH, or NONE.
7423 * bits not defined for access/deny are invalid.
7424 */
7425 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7426 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7427 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7428 *cs->statusp = resp->status = NFS4ERR_INVAL;
7429 goto out;
7430 }
7431
7432
7433 /*
7434 * make sure attrset is zero before response is built.
7435 */
7436 resp->attrset = 0;
7437
7438 switch (claim) {
7439 case CLAIM_NULL:
7440 rfs4_do_opennull(cs, req, args, oo, resp);
7441 break;
7442 case CLAIM_PREVIOUS:
7443 rfs4_do_openprev(cs, req, args, oo, resp);
7444 break;
7445 case CLAIM_DELEGATE_CUR:
7446 rfs4_do_opendelcur(cs, req, args, oo, resp);
7447 break;
7448 case CLAIM_DELEGATE_PREV:
7449 rfs4_do_opendelprev(cs, req, args, oo, resp);
7450 break;
7451 default:
7452 resp->status = NFS4ERR_INVAL;
7453 break;
7454 }
7455
7456 out:
7457 rfs4_client_rele(cp);
7458
7459 /* Catch sequence id handling here to make it a little easier */
7460 switch (resp->status) {
7461 case NFS4ERR_BADXDR:
7462 case NFS4ERR_BAD_SEQID:
7463 case NFS4ERR_BAD_STATEID:
7464 case NFS4ERR_NOFILEHANDLE:
7465 case NFS4ERR_RESOURCE:
7466 case NFS4ERR_STALE_CLIENTID:
7467 case NFS4ERR_STALE_STATEID:
7468 /*
7469 * The protocol states that if any of these errors are
7470 * being returned, the sequence id should not be
7471 * incremented. Any other return requires an
7472 * increment.
7473 */
7474 break;
7475 default:
7476 /* Always update the lease in this case */
7477 rfs4_update_lease(oo->ro_client);
7478
7479 /* Regular response - copy the result */
7480 if (!replay)
7481 rfs4_update_open_resp(oo, resop, &cs->fh);
7482
7483 /*
7484 * REPLAY case: Only if the previous response was OK
7485 * do we copy the filehandle. If not OK, no
7486 * filehandle to copy.
7487 */
7488 if (replay == TRUE &&
7489 resp->status == NFS4_OK &&
7490 oo->ro_reply_fh.nfs_fh4_val) {
7491 /*
7492 * If this is a replay, we must restore the
7493 * current filehandle/vp to that of what was
7494 * returned originally. Try our best to do
7495 * it.
7496 */
7497 nfs_fh4_fmt_t *fh_fmtp =
7498 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7499
7500 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7501 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7502
7503 if (cs->exi == NULL) {
7504 resp->status = NFS4ERR_STALE;
7505 goto finish;
7506 }
7507
7508 VN_RELE(cs->vp);
7509
7510 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7511 &resp->status);
7512
7513 if (cs->vp == NULL)
7514 goto finish;
7515
7516 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7517 }
7518
7519 /*
7520 * If this was a replay, no need to update the
7521 * sequence id. If the open_owner was not created on
7522 * this pass, then update. The first use of an
7523 * open_owner will not bump the sequence id.
7524 */
7525 if (replay == FALSE && !create)
7526 rfs4_update_open_sequence(oo);
7527 /*
7528 * If the client is receiving an error and the
7529 * open_owner needs to be confirmed, there is no way
7530 * to notify the client of this fact ignoring the fact
7531 * that the server has no method of returning a
7532 * stateid to confirm. Therefore, the server needs to
7533 * mark this open_owner in a way as to avoid the
7534 * sequence id checking the next time the client uses
7535 * this open_owner.
7536 */
7537 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7538 oo->ro_postpone_confirm = TRUE;
7539 /*
7540 * If OK response then clear the postpone flag and
7541 * reset the sequence id to keep in sync with the
7542 * client.
7543 */
7544 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7545 oo->ro_postpone_confirm = FALSE;
7546 oo->ro_open_seqid = args->seqid;
7547 }
7548 break;
7549 }
7550
7551 finish:
7552 *cs->statusp = resp->status;
7553
7554 rfs4_sw_exit(&oo->ro_sw);
7555 rfs4_openowner_rele(oo);
7556
7557 end:
7558 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7559 OPEN4res *, resp);
7560 }
7561
7562 /*ARGSUSED*/
7563 void
7564 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7565 struct svc_req *req, struct compound_state *cs)
7566 {
7567 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7568 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7569 rfs4_state_t *sp;
7570 nfsstat4 status;
7571
7572 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7573 OPEN_CONFIRM4args *, args);
7574
7575 if (cs->vp == NULL) {
7576 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7577 goto out;
7578 }
7579
7580 if (cs->vp->v_type != VREG) {
7581 *cs->statusp = resp->status =
7582 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7583 return;
7584 }
7585
7586 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7587 if (status != NFS4_OK) {
7588 *cs->statusp = resp->status = status;
7589 goto out;
7590 }
7591
7592 /* Ensure specified filehandle matches */
7593 if (cs->vp != sp->rs_finfo->rf_vp) {
7594 rfs4_state_rele(sp);
7595 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7596 goto out;
7597 }
7598
7599 /* hold off other access to open_owner while we tinker */
7600 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7601
7602 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7603 case NFS4_CHECK_STATEID_OKAY:
7604 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7605 resop) != 0) {
7606 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7607 break;
7608 }
7609 /*
7610 * If it is the appropriate stateid and determined to
7611 * be "OKAY" then this means that the stateid does not
7612 * need to be confirmed and the client is in error for
7613 * sending an OPEN_CONFIRM.
7614 */
7615 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7616 break;
7617 case NFS4_CHECK_STATEID_OLD:
7618 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7619 break;
7620 case NFS4_CHECK_STATEID_BAD:
7621 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7622 break;
7623 case NFS4_CHECK_STATEID_EXPIRED:
7624 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7625 break;
7626 case NFS4_CHECK_STATEID_CLOSED:
7627 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7628 break;
7629 case NFS4_CHECK_STATEID_REPLAY:
7630 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7631 resop)) {
7632 case NFS4_CHKSEQ_OKAY:
7633 /*
7634 * This is replayed stateid; if seqid matches
7635 * next expected, then client is using wrong seqid.
7636 */
7637 /* fall through */
7638 case NFS4_CHKSEQ_BAD:
7639 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7640 break;
7641 case NFS4_CHKSEQ_REPLAY:
7642 /*
7643 * Note this case is the duplicate case so
7644 * resp->status is already set.
7645 */
7646 *cs->statusp = resp->status;
7647 rfs4_update_lease(sp->rs_owner->ro_client);
7648 break;
7649 }
7650 break;
7651 case NFS4_CHECK_STATEID_UNCONFIRMED:
7652 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7653 resop) != NFS4_CHKSEQ_OKAY) {
7654 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7655 break;
7656 }
7657 *cs->statusp = resp->status = NFS4_OK;
7658
7659 next_stateid(&sp->rs_stateid);
7660 resp->open_stateid = sp->rs_stateid.stateid;
7661 sp->rs_owner->ro_need_confirm = FALSE;
7662 rfs4_update_lease(sp->rs_owner->ro_client);
7663 rfs4_update_open_sequence(sp->rs_owner);
7664 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7665 break;
7666 default:
7667 ASSERT(FALSE);
7668 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7669 break;
7670 }
7671 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7672 rfs4_state_rele(sp);
7673
7674 out:
7675 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7676 OPEN_CONFIRM4res *, resp);
7677 }
7678
7679 /*ARGSUSED*/
7680 void
7681 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7682 struct svc_req *req, struct compound_state *cs)
7683 {
7684 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7685 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7686 uint32_t access = args->share_access;
7687 uint32_t deny = args->share_deny;
7688 nfsstat4 status;
7689 rfs4_state_t *sp;
7690 rfs4_file_t *fp;
7691 int fflags = 0;
7692
7693 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7694 OPEN_DOWNGRADE4args *, args);
7695
7696 if (cs->vp == NULL) {
7697 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7698 goto out;
7699 }
7700
7701 if (cs->vp->v_type != VREG) {
7702 *cs->statusp = resp->status = NFS4ERR_INVAL;
7703 return;
7704 }
7705
7706 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7707 if (status != NFS4_OK) {
7708 *cs->statusp = resp->status = status;
7709 goto out;
7710 }
7711
7712 /* Ensure specified filehandle matches */
7713 if (cs->vp != sp->rs_finfo->rf_vp) {
7714 rfs4_state_rele(sp);
7715 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7716 goto out;
7717 }
7718
7719 /* hold off other access to open_owner while we tinker */
7720 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7721
7722 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7723 case NFS4_CHECK_STATEID_OKAY:
7724 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7725 resop) != NFS4_CHKSEQ_OKAY) {
7726 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7727 goto end;
7728 }
7729 break;
7730 case NFS4_CHECK_STATEID_OLD:
7731 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7732 goto end;
7733 case NFS4_CHECK_STATEID_BAD:
7734 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7735 goto end;
7736 case NFS4_CHECK_STATEID_EXPIRED:
7737 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7738 goto end;
7739 case NFS4_CHECK_STATEID_CLOSED:
7740 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7741 goto end;
7742 case NFS4_CHECK_STATEID_UNCONFIRMED:
7743 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7744 goto end;
7745 case NFS4_CHECK_STATEID_REPLAY:
7746 /* Check the sequence id for the open owner */
7747 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7748 resop)) {
7749 case NFS4_CHKSEQ_OKAY:
7750 /*
7751 * This is replayed stateid; if seqid matches
7752 * next expected, then client is using wrong seqid.
7753 */
7754 /* fall through */
7755 case NFS4_CHKSEQ_BAD:
7756 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7757 goto end;
7758 case NFS4_CHKSEQ_REPLAY:
7759 /*
7760 * Note this case is the duplicate case so
7761 * resp->status is already set.
7762 */
7763 *cs->statusp = resp->status;
7764 rfs4_update_lease(sp->rs_owner->ro_client);
7765 goto end;
7766 }
7767 break;
7768 default:
7769 ASSERT(FALSE);
7770 break;
7771 }
7772
7773 rfs4_dbe_lock(sp->rs_dbe);
7774 /*
7775 * Check that the new access modes and deny modes are valid.
7776 * Check that no invalid bits are set.
7777 */
7778 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7779 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7780 *cs->statusp = resp->status = NFS4ERR_INVAL;
7781 rfs4_update_open_sequence(sp->rs_owner);
7782 rfs4_dbe_unlock(sp->rs_dbe);
7783 goto end;
7784 }
7785
7786 /*
7787 * The new modes must be a subset of the current modes and
7788 * the access must specify at least one mode. To test that
7789 * the new mode is a subset of the current modes we bitwise
7790 * AND them together and check that the result equals the new
7791 * mode. For example:
7792 * New mode, access == R and current mode, sp->rs_open_access == RW
7793 * access & sp->rs_open_access == R == access, so the new access mode
7794 * is valid. Consider access == RW, sp->rs_open_access = R
7795 * access & sp->rs_open_access == R != access, so the new access mode
7796 * is invalid.
7797 */
7798 if ((access & sp->rs_open_access) != access ||
7799 (deny & sp->rs_open_deny) != deny ||
7800 (access &
7801 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7802 *cs->statusp = resp->status = NFS4ERR_INVAL;
7803 rfs4_update_open_sequence(sp->rs_owner);
7804 rfs4_dbe_unlock(sp->rs_dbe);
7805 goto end;
7806 }
7807
7808 /*
7809 * Release any share locks associated with this stateID.
7810 * Strictly speaking, this violates the spec because the
7811 * spec effectively requires that open downgrade be atomic.
7812 * At present, fs_shrlock does not have this capability.
7813 */
7814 (void) rfs4_unshare(sp);
7815
7816 status = rfs4_share(sp, access, deny);
7817 if (status != NFS4_OK) {
7818 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7819 rfs4_update_open_sequence(sp->rs_owner);
7820 rfs4_dbe_unlock(sp->rs_dbe);
7821 goto end;
7822 }
7823
7824 fp = sp->rs_finfo;
7825 rfs4_dbe_lock(fp->rf_dbe);
7826
7827 /*
7828 * If the current mode has deny read and the new mode
7829 * does not, decrement the number of deny read mode bits
7830 * and if it goes to zero turn off the deny read bit
7831 * on the file.
7832 */
7833 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7834 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7835 fp->rf_deny_read--;
7836 if (fp->rf_deny_read == 0)
7837 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7838 }
7839
7840 /*
7841 * If the current mode has deny write and the new mode
7842 * does not, decrement the number of deny write mode bits
7843 * and if it goes to zero turn off the deny write bit
7844 * on the file.
7845 */
7846 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7847 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7848 fp->rf_deny_write--;
7849 if (fp->rf_deny_write == 0)
7850 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7851 }
7852
7853 /*
7854 * If the current mode has access read and the new mode
7855 * does not, decrement the number of access read mode bits
7856 * and if it goes to zero turn off the access read bit
7857 * on the file. set fflags to FREAD for the call to
7858 * vn_open_downgrade().
7859 */
7860 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7861 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7862 fp->rf_access_read--;
7863 if (fp->rf_access_read == 0)
7864 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7865 fflags |= FREAD;
7866 }
7867
7868 /*
7869 * If the current mode has access write and the new mode
7870 * does not, decrement the number of access write mode bits
7871 * and if it goes to zero turn off the access write bit
7872 * on the file. set fflags to FWRITE for the call to
7873 * vn_open_downgrade().
7874 */
7875 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7876 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7877 fp->rf_access_write--;
7878 if (fp->rf_access_write == 0)
7879 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7880 fflags |= FWRITE;
7881 }
7882
7883 /* Check that the file is still accessible */
7884 ASSERT(fp->rf_share_access);
7885
7886 rfs4_dbe_unlock(fp->rf_dbe);
7887
7888 /* now set the new open access and deny modes */
7889 sp->rs_open_access = access;
7890 sp->rs_open_deny = deny;
7891
7892 /*
7893 * we successfully downgraded the share lock, now we need to downgrade
7894 * the open. it is possible that the downgrade was only for a deny
7895 * mode and we have nothing else to do.
7896 */
7897 if ((fflags & (FREAD|FWRITE)) != 0)
7898 vn_open_downgrade(cs->vp, fflags);
7899
7900 /* Update the stateid */
7901 next_stateid(&sp->rs_stateid);
7902 resp->open_stateid = sp->rs_stateid.stateid;
7903
7904 rfs4_dbe_unlock(sp->rs_dbe);
7905
7906 *cs->statusp = resp->status = NFS4_OK;
7907 /* Update the lease */
7908 rfs4_update_lease(sp->rs_owner->ro_client);
7909 /* And the sequence */
7910 rfs4_update_open_sequence(sp->rs_owner);
7911 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7912
7913 end:
7914 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7915 rfs4_state_rele(sp);
7916 out:
7917 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7918 OPEN_DOWNGRADE4res *, resp);
7919 }
7920
7921 static void *
7922 memstr(const void *s1, const char *s2, size_t n)
7923 {
7924 size_t l = strlen(s2);
7925 char *p = (char *)s1;
7926
7927 while (n >= l) {
7928 if (bcmp(p, s2, l) == 0)
7929 return (p);
7930 p++;
7931 n--;
7932 }
7933
7934 return (NULL);
7935 }
7936
7937 /*
7938 * The logic behind this function is detailed in the NFSv4 RFC in the
7939 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7940 * that section for explicit guidance to server behavior for
7941 * SETCLIENTID.
7942 */
7943 void
7944 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7945 struct svc_req *req, struct compound_state *cs)
7946 {
7947 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7948 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7949 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7950 rfs4_clntip_t *ci;
7951 bool_t create;
7952 char *addr, *netid;
7953 int len;
7954
7955 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7956 SETCLIENTID4args *, args);
7957 retry:
7958 newcp = cp_confirmed = cp_unconfirmed = NULL;
7959
7960 /*
7961 * Save the caller's IP address
7962 */
7963 args->client.cl_addr =
7964 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7965
7966 /*
7967 * Record if it is a Solaris client that cannot handle referrals.
7968 */
7969 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7970 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7971 /* Add a "yes, it's downrev" record */
7972 create = TRUE;
7973 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7974 ASSERT(ci != NULL);
7975 rfs4_dbe_rele(ci->ri_dbe);
7976 } else {
7977 /* Remove any previous record */
7978 rfs4_invalidate_clntip(args->client.cl_addr);
7979 }
7980
7981 /*
7982 * In search of an EXISTING client matching the incoming
7983 * request to establish a new client identifier at the server
7984 */
7985 create = TRUE;
7986 cp = rfs4_findclient(&args->client, &create, NULL);
7987
7988 /* Should never happen */
7989 ASSERT(cp != NULL);
7990
7991 if (cp == NULL) {
7992 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7993 goto out;
7994 }
7995
7996 /*
7997 * Easiest case. Client identifier is newly created and is
7998 * unconfirmed. Also note that for this case, no other
7999 * entries exist for the client identifier. Nothing else to
8000 * check. Just setup the response and respond.
8001 */
8002 if (create) {
8003 *cs->statusp = res->status = NFS4_OK;
8004 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8005 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8006 cp->rc_confirm_verf;
8007 /* Setup callback information; CB_NULL confirmation later */
8008 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8009
8010 rfs4_client_rele(cp);
8011 goto out;
8012 }
8013
8014 /*
8015 * An existing, confirmed client may exist but it may not have
8016 * been active for at least one lease period. If so, then
8017 * "close" the client and create a new client identifier
8018 */
8019 if (rfs4_lease_expired(cp)) {
8020 rfs4_client_close(cp);
8021 goto retry;
8022 }
8023
8024 if (cp->rc_need_confirm == TRUE)
8025 cp_unconfirmed = cp;
8026 else
8027 cp_confirmed = cp;
8028
8029 cp = NULL;
8030
8031 /*
8032 * We have a confirmed client, now check for an
8033 * unconfimred entry
8034 */
8035 if (cp_confirmed) {
8036 /* If creds don't match then client identifier is inuse */
8037 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8038 rfs4_cbinfo_t *cbp;
8039 /*
8040 * Some one else has established this client
8041 * id. Try and say * who they are. We will use
8042 * the call back address supplied by * the
8043 * first client.
8044 */
8045 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8046
8047 addr = netid = NULL;
8048
8049 cbp = &cp_confirmed->rc_cbinfo;
8050 if (cbp->cb_callback.cb_location.r_addr &&
8051 cbp->cb_callback.cb_location.r_netid) {
8052 cb_client4 *cbcp = &cbp->cb_callback;
8053
8054 len = strlen(cbcp->cb_location.r_addr)+1;
8055 addr = kmem_alloc(len, KM_SLEEP);
8056 bcopy(cbcp->cb_location.r_addr, addr, len);
8057 len = strlen(cbcp->cb_location.r_netid)+1;
8058 netid = kmem_alloc(len, KM_SLEEP);
8059 bcopy(cbcp->cb_location.r_netid, netid, len);
8060 }
8061
8062 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8063 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8064
8065 rfs4_client_rele(cp_confirmed);
8066 }
8067
8068 /*
8069 * Confirmed, creds match, and verifier matches; must
8070 * be an update of the callback info
8071 */
8072 if (cp_confirmed->rc_nfs_client.verifier ==
8073 args->client.verifier) {
8074 /* Setup callback information */
8075 rfs4_client_setcb(cp_confirmed, &args->callback,
8076 args->callback_ident);
8077
8078 /* everything okay -- move ahead */
8079 *cs->statusp = res->status = NFS4_OK;
8080 res->SETCLIENTID4res_u.resok4.clientid =
8081 cp_confirmed->rc_clientid;
8082
8083 /* update the confirm_verifier and return it */
8084 rfs4_client_scv_next(cp_confirmed);
8085 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8086 cp_confirmed->rc_confirm_verf;
8087
8088 rfs4_client_rele(cp_confirmed);
8089 goto out;
8090 }
8091
8092 /*
8093 * Creds match but the verifier doesn't. Must search
8094 * for an unconfirmed client that would be replaced by
8095 * this request.
8096 */
8097 create = FALSE;
8098 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8099 cp_confirmed);
8100 }
8101
8102 /*
8103 * At this point, we have taken care of the brand new client
8104 * struct, INUSE case, update of an existing, and confirmed
8105 * client struct.
8106 */
8107
8108 /*
8109 * check to see if things have changed while we originally
8110 * picked up the client struct. If they have, then return and
8111 * retry the processing of this SETCLIENTID request.
8112 */
8113 if (cp_unconfirmed) {
8114 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8115 if (!cp_unconfirmed->rc_need_confirm) {
8116 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8117 rfs4_client_rele(cp_unconfirmed);
8118 if (cp_confirmed)
8119 rfs4_client_rele(cp_confirmed);
8120 goto retry;
8121 }
8122 /* do away with the old unconfirmed one */
8123 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8124 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8125 rfs4_client_rele(cp_unconfirmed);
8126 cp_unconfirmed = NULL;
8127 }
8128
8129 /*
8130 * This search will temporarily hide the confirmed client
8131 * struct while a new client struct is created as the
8132 * unconfirmed one.
8133 */
8134 create = TRUE;
8135 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8136
8137 ASSERT(newcp != NULL);
8138
8139 if (newcp == NULL) {
8140 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8141 rfs4_client_rele(cp_confirmed);
8142 goto out;
8143 }
8144
8145 /*
8146 * If one was not created, then a similar request must be in
8147 * process so release and start over with this one
8148 */
8149 if (create != TRUE) {
8150 rfs4_client_rele(newcp);
8151 if (cp_confirmed)
8152 rfs4_client_rele(cp_confirmed);
8153 goto retry;
8154 }
8155
8156 *cs->statusp = res->status = NFS4_OK;
8157 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8158 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8159 newcp->rc_confirm_verf;
8160 /* Setup callback information; CB_NULL confirmation later */
8161 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8162
8163 newcp->rc_cp_confirmed = cp_confirmed;
8164
8165 rfs4_client_rele(newcp);
8166
8167 out:
8168 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8169 SETCLIENTID4res *, res);
8170 }
8171
8172 /*ARGSUSED*/
8173 void
8174 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8175 struct svc_req *req, struct compound_state *cs)
8176 {
8177 SETCLIENTID_CONFIRM4args *args =
8178 &argop->nfs_argop4_u.opsetclientid_confirm;
8179 SETCLIENTID_CONFIRM4res *res =
8180 &resop->nfs_resop4_u.opsetclientid_confirm;
8181 rfs4_client_t *cp, *cptoclose = NULL;
8182
8183 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8184 struct compound_state *, cs,
8185 SETCLIENTID_CONFIRM4args *, args);
8186
8187 *cs->statusp = res->status = NFS4_OK;
8188
8189 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8190
8191 if (cp == NULL) {
8192 *cs->statusp = res->status =
8193 rfs4_check_clientid(&args->clientid, 1);
8194 goto out;
8195 }
8196
8197 if (!creds_ok(cp, req, cs)) {
8198 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8199 rfs4_client_rele(cp);
8200 goto out;
8201 }
8202
8203 /* If the verifier doesn't match, the record doesn't match */
8204 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8205 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8206 rfs4_client_rele(cp);
8207 goto out;
8208 }
8209
8210 rfs4_dbe_lock(cp->rc_dbe);
8211 cp->rc_need_confirm = FALSE;
8212 if (cp->rc_cp_confirmed) {
8213 cptoclose = cp->rc_cp_confirmed;
8214 cptoclose->rc_ss_remove = 1;
8215 cp->rc_cp_confirmed = NULL;
8216 }
8217
8218 /*
8219 * Update the client's associated server instance, if it's changed
8220 * since the client was created.
8221 */
8222 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8223 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8224
8225 /*
8226 * Record clientid in stable storage.
8227 * Must be done after server instance has been assigned.
8228 */
8229 rfs4_ss_clid(cp);
8230
8231 rfs4_dbe_unlock(cp->rc_dbe);
8232
8233 if (cptoclose)
8234 /* don't need to rele, client_close does it */
8235 rfs4_client_close(cptoclose);
8236
8237 /* If needed, initiate CB_NULL call for callback path */
8238 rfs4_deleg_cb_check(cp);
8239 rfs4_update_lease(cp);
8240
8241 /*
8242 * Check to see if client can perform reclaims
8243 */
8244 rfs4_ss_chkclid(cp);
8245
8246 rfs4_client_rele(cp);
8247
8248 out:
8249 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8250 struct compound_state *, cs,
8251 SETCLIENTID_CONFIRM4 *, res);
8252 }
8253
8254
8255 /*ARGSUSED*/
8256 void
8257 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8258 struct svc_req *req, struct compound_state *cs)
8259 {
8260 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8261 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8262 rfs4_state_t *sp;
8263 nfsstat4 status;
8264
8265 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8266 CLOSE4args *, args);
8267
8268 if (cs->vp == NULL) {
8269 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8270 goto out;
8271 }
8272
8273 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8274 if (status != NFS4_OK) {
8275 *cs->statusp = resp->status = status;
8276 goto out;
8277 }
8278
8279 /* Ensure specified filehandle matches */
8280 if (cs->vp != sp->rs_finfo->rf_vp) {
8281 rfs4_state_rele(sp);
8282 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8283 goto out;
8284 }
8285
8286 /* hold off other access to open_owner while we tinker */
8287 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8288
8289 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8290 case NFS4_CHECK_STATEID_OKAY:
8291 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8292 resop) != NFS4_CHKSEQ_OKAY) {
8293 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8294 goto end;
8295 }
8296 break;
8297 case NFS4_CHECK_STATEID_OLD:
8298 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8299 goto end;
8300 case NFS4_CHECK_STATEID_BAD:
8301 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8302 goto end;
8303 case NFS4_CHECK_STATEID_EXPIRED:
8304 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8305 goto end;
8306 case NFS4_CHECK_STATEID_CLOSED:
8307 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8308 goto end;
8309 case NFS4_CHECK_STATEID_UNCONFIRMED:
8310 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8311 goto end;
8312 case NFS4_CHECK_STATEID_REPLAY:
8313 /* Check the sequence id for the open owner */
8314 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8315 resop)) {
8316 case NFS4_CHKSEQ_OKAY:
8317 /*
8318 * This is replayed stateid; if seqid matches
8319 * next expected, then client is using wrong seqid.
8320 */
8321 /* FALL THROUGH */
8322 case NFS4_CHKSEQ_BAD:
8323 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8324 goto end;
8325 case NFS4_CHKSEQ_REPLAY:
8326 /*
8327 * Note this case is the duplicate case so
8328 * resp->status is already set.
8329 */
8330 *cs->statusp = resp->status;
8331 rfs4_update_lease(sp->rs_owner->ro_client);
8332 goto end;
8333 }
8334 break;
8335 default:
8336 ASSERT(FALSE);
8337 break;
8338 }
8339
8340 rfs4_dbe_lock(sp->rs_dbe);
8341
8342 /* Update the stateid. */
8343 next_stateid(&sp->rs_stateid);
8344 resp->open_stateid = sp->rs_stateid.stateid;
8345
8346 rfs4_dbe_unlock(sp->rs_dbe);
8347
8348 rfs4_update_lease(sp->rs_owner->ro_client);
8349 rfs4_update_open_sequence(sp->rs_owner);
8350 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8351
8352 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8353
8354 *cs->statusp = resp->status = status;
8355
8356 end:
8357 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8358 rfs4_state_rele(sp);
8359 out:
8360 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8361 CLOSE4res *, resp);
8362 }
8363
8364 /*
8365 * Manage the counts on the file struct and close all file locks
8366 */
8367 /*ARGSUSED*/
8368 void
8369 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8370 bool_t close_of_client)
8371 {
8372 rfs4_file_t *fp = sp->rs_finfo;
8373 rfs4_lo_state_t *lsp;
8374 int fflags = 0;
8375
8376 /*
8377 * If this call is part of the larger closing down of client
8378 * state then it is just easier to release all locks
8379 * associated with this client instead of going through each
8380 * individual file and cleaning locks there.
8381 */
8382 if (close_of_client) {
8383 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8384 !list_is_empty(&sp->rs_lostatelist) &&
8385 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8386 /* Is the PxFS kernel module loaded? */
8387 if (lm_remove_file_locks != NULL) {
8388 int new_sysid;
8389
8390 /* Encode the cluster nodeid in new sysid */
8391 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8392 lm_set_nlmid_flk(&new_sysid);
8393
8394 /*
8395 * This PxFS routine removes file locks for a
8396 * client over all nodes of a cluster.
8397 */
8398 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8399 "lm_remove_file_locks(sysid=0x%x)\n",
8400 new_sysid));
8401 (*lm_remove_file_locks)(new_sysid);
8402 } else {
8403 struct flock64 flk;
8404
8405 /* Release all locks for this client */
8406 flk.l_type = F_UNLKSYS;
8407 flk.l_whence = 0;
8408 flk.l_start = 0;
8409 flk.l_len = 0;
8410 flk.l_sysid =
8411 sp->rs_owner->ro_client->rc_sysidt;
8412 flk.l_pid = 0;
8413 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8414 &flk, F_REMOTELOCK | FREAD | FWRITE,
8415 (u_offset_t)0, NULL, CRED(), NULL);
8416 }
8417
8418 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8419 }
8420 }
8421
8422 /*
8423 * Release all locks on this file by this lock owner or at
8424 * least mark the locks as having been released
8425 */
8426 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8427 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8428 lsp->rls_locks_cleaned = TRUE;
8429
8430 /* Was this already taken care of above? */
8431 if (!close_of_client &&
8432 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8433 (void) cleanlocks(sp->rs_finfo->rf_vp,
8434 lsp->rls_locker->rl_pid,
8435 lsp->rls_locker->rl_client->rc_sysidt);
8436 }
8437
8438 /*
8439 * Release any shrlocks associated with this open state ID.
8440 * This must be done before the rfs4_state gets marked closed.
8441 */
8442 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8443 (void) rfs4_unshare(sp);
8444
8445 if (sp->rs_open_access) {
8446 rfs4_dbe_lock(fp->rf_dbe);
8447
8448 /*
8449 * Decrement the count for each access and deny bit that this
8450 * state has contributed to the file.
8451 * If the file counts go to zero
8452 * clear the appropriate bit in the appropriate mask.
8453 */
8454 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8455 fp->rf_access_read--;
8456 fflags |= FREAD;
8457 if (fp->rf_access_read == 0)
8458 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8459 }
8460 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8461 fp->rf_access_write--;
8462 fflags |= FWRITE;
8463 if (fp->rf_access_write == 0)
8464 fp->rf_share_access &=
8465 ~OPEN4_SHARE_ACCESS_WRITE;
8466 }
8467 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8468 fp->rf_deny_read--;
8469 if (fp->rf_deny_read == 0)
8470 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8471 }
8472 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8473 fp->rf_deny_write--;
8474 if (fp->rf_deny_write == 0)
8475 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8476 }
8477
8478 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8479
8480 rfs4_dbe_unlock(fp->rf_dbe);
8481
8482 sp->rs_open_access = 0;
8483 sp->rs_open_deny = 0;
8484 }
8485 }
8486
8487 /*
8488 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8489 */
8490 static nfsstat4
8491 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8492 {
8493 rfs4_lockowner_t *lo;
8494 rfs4_client_t *cp;
8495 uint32_t len;
8496
8497 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8498 if (lo != NULL) {
8499 cp = lo->rl_client;
8500 if (rfs4_lease_expired(cp)) {
8501 rfs4_lockowner_rele(lo);
8502 rfs4_dbe_hold(cp->rc_dbe);
8503 rfs4_client_close(cp);
8504 return (NFS4ERR_EXPIRED);
8505 }
8506 dp->owner.clientid = lo->rl_owner.clientid;
8507 len = lo->rl_owner.owner_len;
8508 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8509 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8510 dp->owner.owner_len = len;
8511 rfs4_lockowner_rele(lo);
8512 goto finish;
8513 }
8514
8515 /*
8516 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8517 * of the client id contain the boot time for a NFS4 lock. So we
8518 * fabricate and identity by setting clientid to the sysid, and
8519 * the lock owner to the pid.
8520 */
8521 dp->owner.clientid = flk->l_sysid;
8522 len = sizeof (pid_t);
8523 dp->owner.owner_len = len;
8524 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8525 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8526 finish:
8527 dp->offset = flk->l_start;
8528 dp->length = flk->l_len;
8529
8530 if (flk->l_type == F_RDLCK)
8531 dp->locktype = READ_LT;
8532 else if (flk->l_type == F_WRLCK)
8533 dp->locktype = WRITE_LT;
8534 else
8535 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8536
8537 return (NFS4_OK);
8538 }
8539
8540 /*
8541 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8542 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8543 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8544 * for that (obviously); they are sending the LOCK requests with some delays
8545 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8546 * locking and delay implementation at the client side.
8547 *
8548 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8549 * fast retries on its own (the for loop below) in a hope the lock will be
8550 * available soon. And if not, the client won't need to resend the LOCK
8551 * requests so fast to check the lock availability. This basically saves some
8552 * network traffic and tries to make sure the client gets the lock ASAP.
8553 */
8554 static int
8555 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8556 {
8557 int error;
8558 struct flock64 flk;
8559 int i;
8560 clock_t delaytime;
8561 int cmd;
8562 int spin_cnt = 0;
8563
8564 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8565 retry:
8566 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8567
8568 for (i = 0; i < rfs4_maxlock_tries; i++) {
8569 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8570 error = VOP_FRLOCK(vp, cmd,
8571 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8572
8573 if (error != EAGAIN && error != EACCES)
8574 break;
8575
8576 if (i < rfs4_maxlock_tries - 1) {
8577 delay(delaytime);
8578 delaytime *= 2;
8579 }
8580 }
8581
8582 if (error == EAGAIN || error == EACCES) {
8583 /* Get the owner of the lock */
8584 flk = *flock;
8585 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8586 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8587 NULL) == 0) {
8588 /*
8589 * There's a race inherent in the current VOP_FRLOCK
8590 * design where:
8591 * a: "other guy" takes a lock that conflicts with a
8592 * lock we want
8593 * b: we attempt to take our lock (non-blocking) and
8594 * the attempt fails.
8595 * c: "other guy" releases the conflicting lock
8596 * d: we ask what lock conflicts with the lock we want,
8597 * getting F_UNLCK (no lock blocks us)
8598 *
8599 * If we retry the non-blocking lock attempt in this
8600 * case (restart at step 'b') there's some possibility
8601 * that many such attempts might fail. However a test
8602 * designed to actually provoke this race shows that
8603 * the vast majority of cases require no retry, and
8604 * only a few took as many as three retries. Here's
8605 * the test outcome:
8606 *
8607 * number of retries how many times we needed
8608 * that many retries
8609 * 0 79461
8610 * 1 862
8611 * 2 49
8612 * 3 5
8613 *
8614 * Given those empirical results, we arbitrarily limit
8615 * the retry count to ten.
8616 *
8617 * If we actually make to ten retries and give up,
8618 * nothing catastrophic happens, but we're unable to
8619 * return the information about the conflicting lock to
8620 * the NFS client. That's an acceptable trade off vs.
8621 * letting this retry loop run forever.
8622 */
8623 if (flk.l_type == F_UNLCK) {
8624 if (spin_cnt++ < 10) {
8625 /* No longer locked, retry */
8626 goto retry;
8627 }
8628 } else {
8629 *flock = flk;
8630 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8631 F_GETLK, &flk);
8632 }
8633 }
8634 }
8635
8636 return (error);
8637 }
8638
8639 /*ARGSUSED*/
8640 static nfsstat4
8641 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8642 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8643 {
8644 nfsstat4 status;
8645 rfs4_lockowner_t *lo = lsp->rls_locker;
8646 rfs4_state_t *sp = lsp->rls_state;
8647 struct flock64 flock;
8648 int16_t ltype;
8649 int flag;
8650 int error;
8651 sysid_t sysid;
8652 LOCK4res *lres;
8653 vnode_t *vp;
8654
8655 if (rfs4_lease_expired(lo->rl_client)) {
8656 return (NFS4ERR_EXPIRED);
8657 }
8658
8659 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8660 return (status);
8661
8662 /* Check for zero length. To lock to end of file use all ones for V4 */
8663 if (length == 0)
8664 return (NFS4ERR_INVAL);
8665 else if (length == (length4)(~0))
8666 length = 0; /* Posix to end of file */
8667
8668 retry:
8669 rfs4_dbe_lock(sp->rs_dbe);
8670 if (sp->rs_closed == TRUE) {
8671 rfs4_dbe_unlock(sp->rs_dbe);
8672 return (NFS4ERR_OLD_STATEID);
8673 }
8674
8675 if (resop->resop != OP_LOCKU) {
8676 switch (locktype) {
8677 case READ_LT:
8678 case READW_LT:
8679 if ((sp->rs_share_access
8680 & OPEN4_SHARE_ACCESS_READ) == 0) {
8681 rfs4_dbe_unlock(sp->rs_dbe);
8682
8683 return (NFS4ERR_OPENMODE);
8684 }
8685 ltype = F_RDLCK;
8686 break;
8687 case WRITE_LT:
8688 case WRITEW_LT:
8689 if ((sp->rs_share_access
8690 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8691 rfs4_dbe_unlock(sp->rs_dbe);
8692
8693 return (NFS4ERR_OPENMODE);
8694 }
8695 ltype = F_WRLCK;
8696 break;
8697 }
8698 } else
8699 ltype = F_UNLCK;
8700
8701 flock.l_type = ltype;
8702 flock.l_whence = 0; /* SEEK_SET */
8703 flock.l_start = offset;
8704 flock.l_len = length;
8705 flock.l_sysid = sysid;
8706 flock.l_pid = lsp->rls_locker->rl_pid;
8707
8708 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8709 if (flock.l_len < 0 || flock.l_start < 0) {
8710 rfs4_dbe_unlock(sp->rs_dbe);
8711 return (NFS4ERR_INVAL);
8712 }
8713
8714 /*
8715 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8716 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8717 */
8718 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8719
8720 vp = sp->rs_finfo->rf_vp;
8721 VN_HOLD(vp);
8722
8723 /*
8724 * We need to unlock sp before we call the underlying filesystem to
8725 * acquire the file lock.
8726 */
8727 rfs4_dbe_unlock(sp->rs_dbe);
8728
8729 error = setlock(vp, &flock, flag, cred);
8730
8731 /*
8732 * Make sure the file is still open. In a case the file was closed in
8733 * the meantime, clean the lock we acquired using the setlock() call
8734 * above, and return the appropriate error.
8735 */
8736 rfs4_dbe_lock(sp->rs_dbe);
8737 if (sp->rs_closed == TRUE) {
8738 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8739 rfs4_dbe_unlock(sp->rs_dbe);
8740
8741 VN_RELE(vp);
8742
8743 return (NFS4ERR_OLD_STATEID);
8744 }
8745 rfs4_dbe_unlock(sp->rs_dbe);
8746
8747 VN_RELE(vp);
8748
8749 if (error == 0) {
8750 rfs4_dbe_lock(lsp->rls_dbe);
8751 next_stateid(&lsp->rls_lockid);
8752 rfs4_dbe_unlock(lsp->rls_dbe);
8753 }
8754
8755 /*
8756 * N.B. We map error values to nfsv4 errors. This is differrent
8757 * than puterrno4 routine.
8758 */
8759 switch (error) {
8760 case 0:
8761 status = NFS4_OK;
8762 break;
8763 case EAGAIN:
8764 case EACCES: /* Old value */
8765 /* Can only get here if op is OP_LOCK */
8766 ASSERT(resop->resop == OP_LOCK);
8767 lres = &resop->nfs_resop4_u.oplock;
8768 status = NFS4ERR_DENIED;
8769 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8770 == NFS4ERR_EXPIRED)
8771 goto retry;
8772 break;
8773 case ENOLCK:
8774 status = NFS4ERR_DELAY;
8775 break;
8776 case EOVERFLOW:
8777 status = NFS4ERR_INVAL;
8778 break;
8779 case EINVAL:
8780 status = NFS4ERR_NOTSUPP;
8781 break;
8782 default:
8783 status = NFS4ERR_SERVERFAULT;
8784 break;
8785 }
8786
8787 return (status);
8788 }
8789
8790 /*ARGSUSED*/
8791 void
8792 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8793 struct svc_req *req, struct compound_state *cs)
8794 {
8795 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8796 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8797 nfsstat4 status;
8798 stateid4 *stateid;
8799 rfs4_lockowner_t *lo;
8800 rfs4_client_t *cp;
8801 rfs4_state_t *sp = NULL;
8802 rfs4_lo_state_t *lsp = NULL;
8803 bool_t ls_sw_held = FALSE;
8804 bool_t create = TRUE;
8805 bool_t lcreate = TRUE;
8806 bool_t dup_lock = FALSE;
8807 int rc;
8808
8809 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8810 LOCK4args *, args);
8811
8812 if (cs->vp == NULL) {
8813 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8814 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8815 cs, LOCK4res *, resp);
8816 return;
8817 }
8818
8819 if (args->locker.new_lock_owner) {
8820 /* Create a new lockowner for this instance */
8821 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8822
8823 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8824
8825 stateid = &olo->open_stateid;
8826 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8827 if (status != NFS4_OK) {
8828 NFS4_DEBUG(rfs4_debug,
8829 (CE_NOTE, "Get state failed in lock %d", status));
8830 *cs->statusp = resp->status = status;
8831 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8832 cs, LOCK4res *, resp);
8833 return;
8834 }
8835
8836 /* Ensure specified filehandle matches */
8837 if (cs->vp != sp->rs_finfo->rf_vp) {
8838 rfs4_state_rele(sp);
8839 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8840 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8841 cs, LOCK4res *, resp);
8842 return;
8843 }
8844
8845 /* hold off other access to open_owner while we tinker */
8846 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8847
8848 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8849 case NFS4_CHECK_STATEID_OLD:
8850 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8851 goto end;
8852 case NFS4_CHECK_STATEID_BAD:
8853 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8854 goto end;
8855 case NFS4_CHECK_STATEID_EXPIRED:
8856 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8857 goto end;
8858 case NFS4_CHECK_STATEID_UNCONFIRMED:
8859 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8860 goto end;
8861 case NFS4_CHECK_STATEID_CLOSED:
8862 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8863 goto end;
8864 case NFS4_CHECK_STATEID_OKAY:
8865 case NFS4_CHECK_STATEID_REPLAY:
8866 switch (rfs4_check_olo_seqid(olo->open_seqid,
8867 sp->rs_owner, resop)) {
8868 case NFS4_CHKSEQ_OKAY:
8869 if (rc == NFS4_CHECK_STATEID_OKAY)
8870 break;
8871 /*
8872 * This is replayed stateid; if seqid
8873 * matches next expected, then client
8874 * is using wrong seqid.
8875 */
8876 /* FALLTHROUGH */
8877 case NFS4_CHKSEQ_BAD:
8878 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8879 goto end;
8880 case NFS4_CHKSEQ_REPLAY:
8881 /* This is a duplicate LOCK request */
8882 dup_lock = TRUE;
8883
8884 /*
8885 * For a duplicate we do not want to
8886 * create a new lockowner as it should
8887 * already exist.
8888 * Turn off the lockowner create flag.
8889 */
8890 lcreate = FALSE;
8891 }
8892 break;
8893 }
8894
8895 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8896 if (lo == NULL) {
8897 NFS4_DEBUG(rfs4_debug,
8898 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8899 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8900 goto end;
8901 }
8902
8903 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8904 if (lsp == NULL) {
8905 rfs4_update_lease(sp->rs_owner->ro_client);
8906 /*
8907 * Only update theh open_seqid if this is not
8908 * a duplicate request
8909 */
8910 if (dup_lock == FALSE) {
8911 rfs4_update_open_sequence(sp->rs_owner);
8912 }
8913
8914 NFS4_DEBUG(rfs4_debug,
8915 (CE_NOTE, "rfs4_op_lock: no state"));
8916 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8917 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8918 rfs4_lockowner_rele(lo);
8919 goto end;
8920 }
8921
8922 /*
8923 * This is the new_lock_owner branch and the client is
8924 * supposed to be associating a new lock_owner with
8925 * the open file at this point. If we find that a
8926 * lock_owner/state association already exists and a
8927 * successful LOCK request was returned to the client,
8928 * an error is returned to the client since this is
8929 * not appropriate. The client should be using the
8930 * existing lock_owner branch.
8931 */
8932 if (dup_lock == FALSE && create == FALSE) {
8933 if (lsp->rls_lock_completed == TRUE) {
8934 *cs->statusp =
8935 resp->status = NFS4ERR_BAD_SEQID;
8936 rfs4_lockowner_rele(lo);
8937 goto end;
8938 }
8939 }
8940
8941 rfs4_update_lease(sp->rs_owner->ro_client);
8942
8943 /*
8944 * Only update theh open_seqid if this is not
8945 * a duplicate request
8946 */
8947 if (dup_lock == FALSE) {
8948 rfs4_update_open_sequence(sp->rs_owner);
8949 }
8950
8951 /*
8952 * If this is a duplicate lock request, just copy the
8953 * previously saved reply and return.
8954 */
8955 if (dup_lock == TRUE) {
8956 /* verify that lock_seqid's match */
8957 if (lsp->rls_seqid != olo->lock_seqid) {
8958 NFS4_DEBUG(rfs4_debug,
8959 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8960 "lsp->seqid=%d old->seqid=%d",
8961 lsp->rls_seqid, olo->lock_seqid));
8962 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8963 } else {
8964 rfs4_copy_reply(resop, &lsp->rls_reply);
8965 /*
8966 * Make sure to copy the just
8967 * retrieved reply status into the
8968 * overall compound status
8969 */
8970 *cs->statusp = resp->status;
8971 }
8972 rfs4_lockowner_rele(lo);
8973 goto end;
8974 }
8975
8976 rfs4_dbe_lock(lsp->rls_dbe);
8977
8978 /* Make sure to update the lock sequence id */
8979 lsp->rls_seqid = olo->lock_seqid;
8980
8981 NFS4_DEBUG(rfs4_debug,
8982 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8983
8984 /*
8985 * This is used to signify the newly created lockowner
8986 * stateid and its sequence number. The checks for
8987 * sequence number and increment don't occur on the
8988 * very first lock request for a lockowner.
8989 */
8990 lsp->rls_skip_seqid_check = TRUE;
8991
8992 /* hold off other access to lsp while we tinker */
8993 rfs4_sw_enter(&lsp->rls_sw);
8994 ls_sw_held = TRUE;
8995
8996 rfs4_dbe_unlock(lsp->rls_dbe);
8997
8998 rfs4_lockowner_rele(lo);
8999 } else {
9000 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9001 /* get lsp and hold the lock on the underlying file struct */
9002 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9003 != NFS4_OK) {
9004 *cs->statusp = resp->status = status;
9005 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9006 cs, LOCK4res *, resp);
9007 return;
9008 }
9009 create = FALSE; /* We didn't create lsp */
9010
9011 /* Ensure specified filehandle matches */
9012 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9013 rfs4_lo_state_rele(lsp, TRUE);
9014 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9015 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9016 cs, LOCK4res *, resp);
9017 return;
9018 }
9019
9020 /* hold off other access to lsp while we tinker */
9021 rfs4_sw_enter(&lsp->rls_sw);
9022 ls_sw_held = TRUE;
9023
9024 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9025 /*
9026 * The stateid looks like it was okay (expected to be
9027 * the next one)
9028 */
9029 case NFS4_CHECK_STATEID_OKAY:
9030 /*
9031 * The sequence id is now checked. Determine
9032 * if this is a replay or if it is in the
9033 * expected (next) sequence. In the case of a
9034 * replay, there are two replay conditions
9035 * that may occur. The first is the normal
9036 * condition where a LOCK is done with a
9037 * NFS4_OK response and the stateid is
9038 * updated. That case is handled below when
9039 * the stateid is identified as a REPLAY. The
9040 * second is the case where an error is
9041 * returned, like NFS4ERR_DENIED, and the
9042 * sequence number is updated but the stateid
9043 * is not updated. This second case is dealt
9044 * with here. So it may seem odd that the
9045 * stateid is okay but the sequence id is a
9046 * replay but it is okay.
9047 */
9048 switch (rfs4_check_lock_seqid(
9049 args->locker.locker4_u.lock_owner.lock_seqid,
9050 lsp, resop)) {
9051 case NFS4_CHKSEQ_REPLAY:
9052 if (resp->status != NFS4_OK) {
9053 /*
9054 * Here is our replay and need
9055 * to verify that the last
9056 * response was an error.
9057 */
9058 *cs->statusp = resp->status;
9059 goto end;
9060 }
9061 /*
9062 * This is done since the sequence id
9063 * looked like a replay but it didn't
9064 * pass our check so a BAD_SEQID is
9065 * returned as a result.
9066 */
9067 /*FALLTHROUGH*/
9068 case NFS4_CHKSEQ_BAD:
9069 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9070 goto end;
9071 case NFS4_CHKSEQ_OKAY:
9072 /* Everything looks okay move ahead */
9073 break;
9074 }
9075 break;
9076 case NFS4_CHECK_STATEID_OLD:
9077 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9078 goto end;
9079 case NFS4_CHECK_STATEID_BAD:
9080 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9081 goto end;
9082 case NFS4_CHECK_STATEID_EXPIRED:
9083 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9084 goto end;
9085 case NFS4_CHECK_STATEID_CLOSED:
9086 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9087 goto end;
9088 case NFS4_CHECK_STATEID_REPLAY:
9089 switch (rfs4_check_lock_seqid(
9090 args->locker.locker4_u.lock_owner.lock_seqid,
9091 lsp, resop)) {
9092 case NFS4_CHKSEQ_OKAY:
9093 /*
9094 * This is a replayed stateid; if
9095 * seqid matches the next expected,
9096 * then client is using wrong seqid.
9097 */
9098 case NFS4_CHKSEQ_BAD:
9099 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9100 goto end;
9101 case NFS4_CHKSEQ_REPLAY:
9102 rfs4_update_lease(lsp->rls_locker->rl_client);
9103 *cs->statusp = status = resp->status;
9104 goto end;
9105 }
9106 break;
9107 default:
9108 ASSERT(FALSE);
9109 break;
9110 }
9111
9112 rfs4_update_lock_sequence(lsp);
9113 rfs4_update_lease(lsp->rls_locker->rl_client);
9114 }
9115
9116 /*
9117 * NFS4 only allows locking on regular files, so
9118 * verify type of object.
9119 */
9120 if (cs->vp->v_type != VREG) {
9121 if (cs->vp->v_type == VDIR)
9122 status = NFS4ERR_ISDIR;
9123 else
9124 status = NFS4ERR_INVAL;
9125 goto out;
9126 }
9127
9128 cp = lsp->rls_state->rs_owner->ro_client;
9129
9130 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9131 status = NFS4ERR_GRACE;
9132 goto out;
9133 }
9134
9135 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9136 status = NFS4ERR_NO_GRACE;
9137 goto out;
9138 }
9139
9140 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9141 status = NFS4ERR_NO_GRACE;
9142 goto out;
9143 }
9144
9145 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9146 cs->deleg = TRUE;
9147
9148 status = rfs4_do_lock(lsp, args->locktype,
9149 args->offset, args->length, cs->cr, resop);
9150
9151 out:
9152 lsp->rls_skip_seqid_check = FALSE;
9153
9154 *cs->statusp = resp->status = status;
9155
9156 if (status == NFS4_OK) {
9157 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9158 lsp->rls_lock_completed = TRUE;
9159 }
9160 /*
9161 * Only update the "OPEN" response here if this was a new
9162 * lock_owner
9163 */
9164 if (sp)
9165 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9166
9167 rfs4_update_lock_resp(lsp, resop);
9168
9169 end:
9170 if (lsp) {
9171 if (ls_sw_held)
9172 rfs4_sw_exit(&lsp->rls_sw);
9173 /*
9174 * If an sp obtained, then the lsp does not represent
9175 * a lock on the file struct.
9176 */
9177 if (sp != NULL)
9178 rfs4_lo_state_rele(lsp, FALSE);
9179 else
9180 rfs4_lo_state_rele(lsp, TRUE);
9181 }
9182 if (sp) {
9183 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9184 rfs4_state_rele(sp);
9185 }
9186
9187 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9188 LOCK4res *, resp);
9189 }
9190
9191 /* free function for LOCK/LOCKT */
9192 static void
9193 lock_denied_free(nfs_resop4 *resop)
9194 {
9195 LOCK4denied *dp = NULL;
9196
9197 switch (resop->resop) {
9198 case OP_LOCK:
9199 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9200 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9201 break;
9202 case OP_LOCKT:
9203 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9204 dp = &resop->nfs_resop4_u.oplockt.denied;
9205 break;
9206 default:
9207 break;
9208 }
9209
9210 if (dp)
9211 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9212 }
9213
9214 /*ARGSUSED*/
9215 void
9216 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9217 struct svc_req *req, struct compound_state *cs)
9218 {
9219 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9220 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9221 nfsstat4 status;
9222 stateid4 *stateid = &args->lock_stateid;
9223 rfs4_lo_state_t *lsp;
9224
9225 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9226 LOCKU4args *, args);
9227
9228 if (cs->vp == NULL) {
9229 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9230 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9231 LOCKU4res *, resp);
9232 return;
9233 }
9234
9235 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9236 *cs->statusp = resp->status = status;
9237 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9238 LOCKU4res *, resp);
9239 return;
9240 }
9241
9242 /* Ensure specified filehandle matches */
9243 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9244 rfs4_lo_state_rele(lsp, TRUE);
9245 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9246 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9247 LOCKU4res *, resp);
9248 return;
9249 }
9250
9251 /* hold off other access to lsp while we tinker */
9252 rfs4_sw_enter(&lsp->rls_sw);
9253
9254 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9255 case NFS4_CHECK_STATEID_OKAY:
9256 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9257 != NFS4_CHKSEQ_OKAY) {
9258 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9259 goto end;
9260 }
9261 break;
9262 case NFS4_CHECK_STATEID_OLD:
9263 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9264 goto end;
9265 case NFS4_CHECK_STATEID_BAD:
9266 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9267 goto end;
9268 case NFS4_CHECK_STATEID_EXPIRED:
9269 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9270 goto end;
9271 case NFS4_CHECK_STATEID_CLOSED:
9272 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9273 goto end;
9274 case NFS4_CHECK_STATEID_REPLAY:
9275 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9276 case NFS4_CHKSEQ_OKAY:
9277 /*
9278 * This is a replayed stateid; if
9279 * seqid matches the next expected,
9280 * then client is using wrong seqid.
9281 */
9282 case NFS4_CHKSEQ_BAD:
9283 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9284 goto end;
9285 case NFS4_CHKSEQ_REPLAY:
9286 rfs4_update_lease(lsp->rls_locker->rl_client);
9287 *cs->statusp = status = resp->status;
9288 goto end;
9289 }
9290 break;
9291 default:
9292 ASSERT(FALSE);
9293 break;
9294 }
9295
9296 rfs4_update_lock_sequence(lsp);
9297 rfs4_update_lease(lsp->rls_locker->rl_client);
9298
9299 /*
9300 * NFS4 only allows locking on regular files, so
9301 * verify type of object.
9302 */
9303 if (cs->vp->v_type != VREG) {
9304 if (cs->vp->v_type == VDIR)
9305 status = NFS4ERR_ISDIR;
9306 else
9307 status = NFS4ERR_INVAL;
9308 goto out;
9309 }
9310
9311 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9312 status = NFS4ERR_GRACE;
9313 goto out;
9314 }
9315
9316 status = rfs4_do_lock(lsp, args->locktype,
9317 args->offset, args->length, cs->cr, resop);
9318
9319 out:
9320 *cs->statusp = resp->status = status;
9321
9322 if (status == NFS4_OK)
9323 resp->lock_stateid = lsp->rls_lockid.stateid;
9324
9325 rfs4_update_lock_resp(lsp, resop);
9326
9327 end:
9328 rfs4_sw_exit(&lsp->rls_sw);
9329 rfs4_lo_state_rele(lsp, TRUE);
9330
9331 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9332 LOCKU4res *, resp);
9333 }
9334
9335 /*
9336 * LOCKT is a best effort routine, the client can not be guaranteed that
9337 * the status return is still in effect by the time the reply is received.
9338 * They are numerous race conditions in this routine, but we are not required
9339 * and can not be accurate.
9340 */
9341 /*ARGSUSED*/
9342 void
9343 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9344 struct svc_req *req, struct compound_state *cs)
9345 {
9346 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9347 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9348 rfs4_lockowner_t *lo;
9349 rfs4_client_t *cp;
9350 bool_t create = FALSE;
9351 struct flock64 flk;
9352 int error;
9353 int flag = FREAD | FWRITE;
9354 int ltype;
9355 length4 posix_length;
9356 sysid_t sysid;
9357 pid_t pid;
9358
9359 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9360 LOCKT4args *, args);
9361
9362 if (cs->vp == NULL) {
9363 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9364 goto out;
9365 }
9366
9367 /*
9368 * NFS4 only allows locking on regular files, so
9369 * verify type of object.
9370 */
9371 if (cs->vp->v_type != VREG) {
9372 if (cs->vp->v_type == VDIR)
9373 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9374 else
9375 *cs->statusp = resp->status = NFS4ERR_INVAL;
9376 goto out;
9377 }
9378
9379 /*
9380 * Check out the clientid to ensure the server knows about it
9381 * so that we correctly inform the client of a server reboot.
9382 */
9383 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9384 == NULL) {
9385 *cs->statusp = resp->status =
9386 rfs4_check_clientid(&args->owner.clientid, 0);
9387 goto out;
9388 }
9389 if (rfs4_lease_expired(cp)) {
9390 rfs4_client_close(cp);
9391 /*
9392 * Protocol doesn't allow returning NFS4ERR_STALE as
9393 * other operations do on this check so STALE_CLIENTID
9394 * is returned instead
9395 */
9396 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9397 goto out;
9398 }
9399
9400 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9401 *cs->statusp = resp->status = NFS4ERR_GRACE;
9402 rfs4_client_rele(cp);
9403 goto out;
9404 }
9405 rfs4_client_rele(cp);
9406
9407 resp->status = NFS4_OK;
9408
9409 switch (args->locktype) {
9410 case READ_LT:
9411 case READW_LT:
9412 ltype = F_RDLCK;
9413 break;
9414 case WRITE_LT:
9415 case WRITEW_LT:
9416 ltype = F_WRLCK;
9417 break;
9418 }
9419
9420 posix_length = args->length;
9421 /* Check for zero length. To lock to end of file use all ones for V4 */
9422 if (posix_length == 0) {
9423 *cs->statusp = resp->status = NFS4ERR_INVAL;
9424 goto out;
9425 } else if (posix_length == (length4)(~0)) {
9426 posix_length = 0; /* Posix to end of file */
9427 }
9428
9429 /* Find or create a lockowner */
9430 lo = rfs4_findlockowner(&args->owner, &create);
9431
9432 if (lo) {
9433 pid = lo->rl_pid;
9434 if ((resp->status =
9435 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9436 goto err;
9437 } else {
9438 pid = 0;
9439 sysid = lockt_sysid;
9440 }
9441 retry:
9442 flk.l_type = ltype;
9443 flk.l_whence = 0; /* SEEK_SET */
9444 flk.l_start = args->offset;
9445 flk.l_len = posix_length;
9446 flk.l_sysid = sysid;
9447 flk.l_pid = pid;
9448 flag |= F_REMOTELOCK;
9449
9450 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9451
9452 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9453 if (flk.l_len < 0 || flk.l_start < 0) {
9454 resp->status = NFS4ERR_INVAL;
9455 goto err;
9456 }
9457 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9458 NULL, cs->cr, NULL);
9459
9460 /*
9461 * N.B. We map error values to nfsv4 errors. This is differrent
9462 * than puterrno4 routine.
9463 */
9464 switch (error) {
9465 case 0:
9466 if (flk.l_type == F_UNLCK)
9467 resp->status = NFS4_OK;
9468 else {
9469 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9470 goto retry;
9471 resp->status = NFS4ERR_DENIED;
9472 }
9473 break;
9474 case EOVERFLOW:
9475 resp->status = NFS4ERR_INVAL;
9476 break;
9477 case EINVAL:
9478 resp->status = NFS4ERR_NOTSUPP;
9479 break;
9480 default:
9481 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9482 error);
9483 resp->status = NFS4ERR_SERVERFAULT;
9484 break;
9485 }
9486
9487 err:
9488 if (lo)
9489 rfs4_lockowner_rele(lo);
9490 *cs->statusp = resp->status;
9491 out:
9492 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9493 LOCKT4res *, resp);
9494 }
9495
9496 int
9497 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9498 {
9499 int err;
9500 int cmd;
9501 vnode_t *vp;
9502 struct shrlock shr;
9503 struct shr_locowner shr_loco;
9504 int fflags = 0;
9505
9506 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9507 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9508
9509 if (sp->rs_closed)
9510 return (NFS4ERR_OLD_STATEID);
9511
9512 vp = sp->rs_finfo->rf_vp;
9513 ASSERT(vp);
9514
9515 shr.s_access = shr.s_deny = 0;
9516
9517 if (access & OPEN4_SHARE_ACCESS_READ) {
9518 fflags |= FREAD;
9519 shr.s_access |= F_RDACC;
9520 }
9521 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9522 fflags |= FWRITE;
9523 shr.s_access |= F_WRACC;
9524 }
9525 ASSERT(shr.s_access);
9526
9527 if (deny & OPEN4_SHARE_DENY_READ)
9528 shr.s_deny |= F_RDDNY;
9529 if (deny & OPEN4_SHARE_DENY_WRITE)
9530 shr.s_deny |= F_WRDNY;
9531
9532 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9533 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9534 shr_loco.sl_pid = shr.s_pid;
9535 shr_loco.sl_id = shr.s_sysid;
9536 shr.s_owner = (caddr_t)&shr_loco;
9537 shr.s_own_len = sizeof (shr_loco);
9538
9539 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9540
9541 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9542 if (err != 0) {
9543 if (err == EAGAIN)
9544 err = NFS4ERR_SHARE_DENIED;
9545 else
9546 err = puterrno4(err);
9547 return (err);
9548 }
9549
9550 sp->rs_share_access |= access;
9551 sp->rs_share_deny |= deny;
9552
9553 return (0);
9554 }
9555
9556 int
9557 rfs4_unshare(rfs4_state_t *sp)
9558 {
9559 int err;
9560 struct shrlock shr;
9561 struct shr_locowner shr_loco;
9562
9563 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9564
9565 if (sp->rs_closed || sp->rs_share_access == 0)
9566 return (0);
9567
9568 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9569 ASSERT(sp->rs_finfo->rf_vp);
9570
9571 shr.s_access = shr.s_deny = 0;
9572 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9573 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9574 shr_loco.sl_pid = shr.s_pid;
9575 shr_loco.sl_id = shr.s_sysid;
9576 shr.s_owner = (caddr_t)&shr_loco;
9577 shr.s_own_len = sizeof (shr_loco);
9578
9579 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9580 NULL);
9581 if (err != 0) {
9582 err = puterrno4(err);
9583 return (err);
9584 }
9585
9586 sp->rs_share_access = 0;
9587 sp->rs_share_deny = 0;
9588
9589 return (0);
9590
9591 }
9592
9593 static int
9594 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9595 {
9596 struct clist *wcl;
9597 count4 count = rok->data_len;
9598 int wlist_len;
9599
9600 wcl = args->wlist;
9601 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9602 return (FALSE);
9603 }
9604 wcl = args->wlist;
9605 rok->wlist_len = wlist_len;
9606 rok->wlist = wcl;
9607 return (TRUE);
9608 }
9609
9610 /* tunable to disable server referrals */
9611 int rfs4_no_referrals = 0;
9612
9613 /*
9614 * Find an NFS record in reparse point data.
9615 * Returns 0 for success and <0 or an errno value on failure.
9616 */
9617 int
9618 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9619 {
9620 int err;
9621 char *stype, *val;
9622 nvlist_t *nvl;
9623 nvpair_t *curr;
9624
9625 if ((nvl = reparse_init()) == NULL)
9626 return (-1);
9627
9628 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9629 reparse_free(nvl);
9630 return (err);
9631 }
9632
9633 curr = NULL;
9634 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9635 if ((stype = nvpair_name(curr)) == NULL) {
9636 reparse_free(nvl);
9637 return (-2);
9638 }
9639 if (strncasecmp(stype, "NFS", 3) == 0)
9640 break;
9641 }
9642
9643 if ((curr == NULL) ||
9644 (nvpair_value_string(curr, &val))) {
9645 reparse_free(nvl);
9646 return (-3);
9647 }
9648 *nvlp = nvl;
9649 *svcp = stype;
9650 *datap = val;
9651 return (0);
9652 }
9653
9654 int
9655 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9656 {
9657 nvlist_t *nvl;
9658 char *s, *d;
9659
9660 if (rfs4_no_referrals != 0)
9661 return (B_FALSE);
9662
9663 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9664 return (B_FALSE);
9665
9666 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9667 return (B_FALSE);
9668
9669 reparse_free(nvl);
9670
9671 return (B_TRUE);
9672 }
9673
9674 /*
9675 * There is a user-level copy of this routine in ref_subr.c.
9676 * Changes should be kept in sync.
9677 */
9678 static int
9679 nfs4_create_components(char *path, component4 *comp4)
9680 {
9681 int slen, plen, ncomp;
9682 char *ori_path, *nxtc, buf[MAXNAMELEN];
9683
9684 if (path == NULL)
9685 return (0);
9686
9687 plen = strlen(path) + 1; /* include the terminator */
9688 ori_path = path;
9689 ncomp = 0;
9690
9691 /* count number of components in the path */
9692 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9693 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9694 if ((slen = nxtc - path) == 0) {
9695 path = nxtc + 1;
9696 continue;
9697 }
9698
9699 if (comp4 != NULL) {
9700 bcopy(path, buf, slen);
9701 buf[slen] = '\0';
9702 (void) str_to_utf8(buf, &comp4[ncomp]);
9703 }
9704
9705 ncomp++; /* 1 valid component */
9706 path = nxtc + 1;
9707 }
9708 if (*nxtc == '\0' || *nxtc == '\n')
9709 break;
9710 }
9711
9712 return (ncomp);
9713 }
9714
9715 /*
9716 * There is a user-level copy of this routine in ref_subr.c.
9717 * Changes should be kept in sync.
9718 */
9719 static int
9720 make_pathname4(char *path, pathname4 *pathname)
9721 {
9722 int ncomp;
9723 component4 *comp4;
9724
9725 if (pathname == NULL)
9726 return (0);
9727
9728 if (path == NULL) {
9729 pathname->pathname4_val = NULL;
9730 pathname->pathname4_len = 0;
9731 return (0);
9732 }
9733
9734 /* count number of components to alloc buffer */
9735 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9736 pathname->pathname4_val = NULL;
9737 pathname->pathname4_len = 0;
9738 return (0);
9739 }
9740 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9741
9742 /* copy components into allocated buffer */
9743 ncomp = nfs4_create_components(path, comp4);
9744
9745 pathname->pathname4_val = comp4;
9746 pathname->pathname4_len = ncomp;
9747
9748 return (ncomp);
9749 }
9750
9751 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9752
9753 fs_locations4 *
9754 fetch_referral(vnode_t *vp, cred_t *cr)
9755 {
9756 nvlist_t *nvl;
9757 char *stype, *sdata;
9758 fs_locations4 *result;
9759 char buf[1024];
9760 size_t bufsize;
9761 XDR xdr;
9762 int err;
9763
9764 /*
9765 * Check attrs to ensure it's a reparse point
9766 */
9767 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9768 return (NULL);
9769
9770 /*
9771 * Look for an NFS record and get the type and data
9772 */
9773 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9774 return (NULL);
9775
9776 /*
9777 * With the type and data, upcall to get the referral
9778 */
9779 bufsize = sizeof (buf);
9780 bzero(buf, sizeof (buf));
9781 err = reparse_kderef((const char *)stype, (const char *)sdata,
9782 buf, &bufsize);
9783 reparse_free(nvl);
9784
9785 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9786 char *, stype, char *, sdata, char *, buf, int, err);
9787 if (err) {
9788 cmn_err(CE_NOTE,
9789 "reparsed daemon not running: unable to get referral (%d)",
9790 err);
9791 return (NULL);
9792 }
9793
9794 /*
9795 * We get an XDR'ed record back from the kderef call
9796 */
9797 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9798 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9799 err = xdr_fs_locations4(&xdr, result);
9800 XDR_DESTROY(&xdr);
9801 if (err != TRUE) {
9802 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9803 int, err);
9804 return (NULL);
9805 }
9806
9807 /*
9808 * Look at path to recover fs_root, ignoring the leading '/'
9809 */
9810 (void) make_pathname4(vp->v_path, &result->fs_root);
9811
9812 return (result);
9813 }
9814
9815 char *
9816 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9817 {
9818 fs_locations4 *fsl;
9819 fs_location4 *fs;
9820 char *server, *path, *symbuf;
9821 static char *prefix = "/net/";
9822 int i, size, npaths;
9823 uint_t len;
9824
9825 /* Get the referral */
9826 if ((fsl = fetch_referral(vp, cr)) == NULL)
9827 return (NULL);
9828
9829 /* Deal with only the first location and first server */
9830 fs = &fsl->locations_val[0];
9831 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9832 if (server == NULL) {
9833 rfs4_free_fs_locations4(fsl);
9834 kmem_free(fsl, sizeof (fs_locations4));
9835 return (NULL);
9836 }
9837
9838 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9839 size = strlen(prefix) + len;
9840 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9841 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9842
9843 /* Allocate the symlink buffer and fill it */
9844 symbuf = kmem_zalloc(size, KM_SLEEP);
9845 (void) strcat(symbuf, prefix);
9846 (void) strcat(symbuf, server);
9847 kmem_free(server, len);
9848
9849 npaths = 0;
9850 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9851 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9852 if (path == NULL)
9853 continue;
9854 (void) strcat(symbuf, "/");
9855 (void) strcat(symbuf, path);
9856 npaths++;
9857 kmem_free(path, len);
9858 }
9859
9860 rfs4_free_fs_locations4(fsl);
9861 kmem_free(fsl, sizeof (fs_locations4));
9862
9863 if (strsz != NULL)
9864 *strsz = size;
9865 return (symbuf);
9866 }
9867
9868 /*
9869 * Check to see if we have a downrev Solaris client, so that we
9870 * can send it a symlink instead of a referral.
9871 */
9872 int
9873 client_is_downrev(struct svc_req *req)
9874 {
9875 struct sockaddr *ca;
9876 rfs4_clntip_t *ci;
9877 bool_t create = FALSE;
9878 int is_downrev;
9879
9880 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9881 ASSERT(ca);
9882 ci = rfs4_find_clntip(ca, &create);
9883 if (ci == NULL)
9884 return (0);
9885 is_downrev = ci->ri_no_referrals;
9886 rfs4_dbe_rele(ci->ri_dbe);
9887 return (is_downrev);
9888 }