Print this page
Send zone's rootvp to untraverse()
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 28 * All Rights Reserved
29 29 */
30 30
31 31 /*
32 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 33 * Copyright 2019 Nexenta Systems, Inc.
34 34 * Copyright 2019 Nexenta by DDN, Inc.
35 35 */
36 36
37 37 #include <sys/param.h>
38 38 #include <sys/types.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/cred.h>
41 41 #include <sys/buf.h>
42 42 #include <sys/vfs.h>
43 43 #include <sys/vfs_opreg.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/statvfs.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/dirent.h>
51 51 #include <sys/cmn_err.h>
52 52 #include <sys/debug.h>
53 53 #include <sys/systeminfo.h>
54 54 #include <sys/flock.h>
55 55 #include <sys/pathname.h>
56 56 #include <sys/nbmlock.h>
57 57 #include <sys/share.h>
58 58 #include <sys/atomic.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/fem.h>
61 61 #include <sys/sdt.h>
62 62 #include <sys/ddi.h>
63 63 #include <sys/zone.h>
64 64
65 65 #include <fs/fs_reparse.h>
66 66
67 67 #include <rpc/types.h>
68 68 #include <rpc/auth.h>
69 69 #include <rpc/rpcsec_gss.h>
70 70 #include <rpc/svc.h>
71 71
72 72 #include <nfs/nfs.h>
73 73 #include <nfs/nfssys.h>
74 74 #include <nfs/export.h>
75 75 #include <nfs/nfs_cmd.h>
76 76 #include <nfs/lm.h>
77 77 #include <nfs/nfs4.h>
78 78 #include <nfs/nfs4_drc.h>
79 79
80 80 #include <sys/strsubr.h>
81 81 #include <sys/strsun.h>
82 82
83 83 #include <inet/common.h>
84 84 #include <inet/ip.h>
85 85 #include <inet/ip6.h>
86 86
87 87 #include <sys/tsol/label.h>
88 88 #include <sys/tsol/tndb.h>
89 89
90 90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 94 extern struct svc_ops rdma_svc_ops;
95 95 extern int nfs_loaned_buffers;
96 96 /* End of Tunables */
97 97
98 98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99 99
100 100 /*
101 101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 102 */
103 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104 104
105 105 /*
106 106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 108 * maxcount that isn't large enough to hold the smallest possible
109 109 * XDR encoded dirent.
110 110 *
111 111 * sizeof cookie (8 bytes) +
112 112 * sizeof name_len (4 bytes) +
113 113 * sizeof smallest (padded) name (4 bytes) +
114 114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 115 * sizeof attrlist4_len (4 bytes) +
116 116 * sizeof next boolean (4 bytes)
117 117 *
118 118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 119 * the smallest possible entry4 (assumes no attrs requested).
120 120 * sizeof nfsstat4 (4 bytes) +
121 121 * sizeof verifier4 (8 bytes) +
122 122 * sizeof entry4list bool (4 bytes) +
123 123 * sizeof entry4 (36 bytes) +
124 124 * sizeof eof bool (4 bytes)
125 125 *
126 126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 129 * required for a given name length. MAXNAMELEN is the maximum
130 130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 133 * to hold ., .., and the largest possible solaris dirent64.
134 134 */
135 135 #define RFS4_MINLEN_ENTRY4 36
136 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 137 #define RFS4_MINLEN_RDDIR_BUF \
138 138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139 139
140 140 /*
141 141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 142 * but the dirents UFS gives us are already padded to 8, so just take
143 143 * what we're given. Dircount is only a hint anyway. Currently the
144 144 * solaris kernel is ASCII only, so there's no point in calling the
145 145 * UTF8 functions.
146 146 *
147 147 * dirent64: named padded to provide 8 byte struct alignment
148 148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 149 *
150 150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 151 *
152 152 */
153 153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155 155
156 156
157 157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
158 158
159 159 u_longlong_t nfs4_srv_caller_id;
160 160 uint_t nfs4_srv_vkey = 0;
161 161
162 162 void rfs4_init_compound_state(struct compound_state *);
163 163
164 164 static void nullfree(caddr_t);
165 165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 166 struct compound_state *);
167 167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 168 struct compound_state *);
169 169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 170 struct compound_state *);
171 171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 172 struct compound_state *);
173 173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 174 struct compound_state *);
175 175 static void rfs4_op_create_free(nfs_resop4 *resop);
176 176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 177 struct svc_req *, struct compound_state *);
178 178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 179 struct svc_req *, struct compound_state *);
180 180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 181 struct compound_state *);
182 182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 184 struct compound_state *);
185 185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 187 struct compound_state *);
188 188 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 189 struct compound_state *);
190 190 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 191 struct compound_state *);
192 192 static void lock_denied_free(nfs_resop4 *);
193 193 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 194 struct compound_state *);
195 195 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 196 struct compound_state *);
197 197 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 198 struct compound_state *);
199 199 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 200 struct compound_state *);
201 201 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
202 202 struct svc_req *req, struct compound_state *cs);
203 203 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 204 struct compound_state *);
205 205 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 206 struct compound_state *);
207 207 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
208 208 struct svc_req *, struct compound_state *);
209 209 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
210 210 struct svc_req *, struct compound_state *);
211 211 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 212 struct compound_state *);
213 213 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 214 struct compound_state *);
215 215 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 216 struct compound_state *);
217 217 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 218 struct compound_state *);
219 219 static void rfs4_op_read_free(nfs_resop4 *);
220 220 static void rfs4_op_readdir_free(nfs_resop4 *resop);
221 221 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 222 struct compound_state *);
223 223 static void rfs4_op_readlink_free(nfs_resop4 *);
224 224 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
225 225 struct svc_req *, struct compound_state *);
226 226 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 227 struct compound_state *);
228 228 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 229 struct compound_state *);
230 230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 231 struct compound_state *);
232 232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 233 struct compound_state *);
234 234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 235 struct compound_state *);
236 236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 237 struct compound_state *);
238 238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 239 struct compound_state *);
240 240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 241 struct compound_state *);
242 242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
243 243 struct svc_req *, struct compound_state *);
244 244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
245 245 struct svc_req *req, struct compound_state *);
246 246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
247 247 struct compound_state *);
248 248 static void rfs4_op_secinfo_free(nfs_resop4 *);
249 249
250 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
251 251 struct svc_req *);
252 252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
253 253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
254 254
255 255
256 256 /*
257 257 * translation table for attrs
258 258 */
259 259 struct nfs4_ntov_table {
260 260 union nfs4_attr_u *na;
261 261 uint8_t amap[NFS4_MAXNUM_ATTRS];
262 262 int attrcnt;
263 263 bool_t vfsstat;
264 264 };
265 265
266 266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
267 267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
268 268 struct nfs4_svgetit_arg *sargp);
269 269
270 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
271 271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
272 272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
273 273
274 274 static void hanfsv4_failover(nfs4_srv_t *);
275 275
276 276 fem_t *deleg_rdops;
277 277 fem_t *deleg_wrops;
278 278
279 279 /*
280 280 * NFS4 op dispatch table
281 281 */
282 282
283 283 struct rfsv4disp {
284 284 void (*dis_proc)(); /* proc to call */
285 285 void (*dis_resfree)(); /* frees space allocated by proc */
286 286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 287 };
288 288
289 289 static struct rfsv4disp rfsv4disptab[] = {
290 290 /*
291 291 * NFS VERSION 4
292 292 */
293 293
294 294 /* RFS_NULL = 0 */
295 295 {rfs4_op_illegal, nullfree, 0},
296 296
297 297 /* UNUSED = 1 */
298 298 {rfs4_op_illegal, nullfree, 0},
299 299
300 300 /* UNUSED = 2 */
301 301 {rfs4_op_illegal, nullfree, 0},
302 302
303 303 /* OP_ACCESS = 3 */
304 304 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
305 305
306 306 /* OP_CLOSE = 4 */
307 307 {rfs4_op_close, nullfree, 0},
308 308
309 309 /* OP_COMMIT = 5 */
310 310 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
311 311
312 312 /* OP_CREATE = 6 */
313 313 {rfs4_op_create, nullfree, 0},
314 314
315 315 /* OP_DELEGPURGE = 7 */
316 316 {rfs4_op_delegpurge, nullfree, 0},
317 317
318 318 /* OP_DELEGRETURN = 8 */
319 319 {rfs4_op_delegreturn, nullfree, 0},
320 320
321 321 /* OP_GETATTR = 9 */
322 322 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
323 323
324 324 /* OP_GETFH = 10 */
325 325 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
326 326
327 327 /* OP_LINK = 11 */
328 328 {rfs4_op_link, nullfree, 0},
329 329
330 330 /* OP_LOCK = 12 */
331 331 {rfs4_op_lock, lock_denied_free, 0},
332 332
333 333 /* OP_LOCKT = 13 */
334 334 {rfs4_op_lockt, lock_denied_free, 0},
335 335
336 336 /* OP_LOCKU = 14 */
337 337 {rfs4_op_locku, nullfree, 0},
338 338
339 339 /* OP_LOOKUP = 15 */
340 340 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
341 341
342 342 /* OP_LOOKUPP = 16 */
343 343 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
344 344
345 345 /* OP_NVERIFY = 17 */
346 346 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
347 347
348 348 /* OP_OPEN = 18 */
349 349 {rfs4_op_open, rfs4_free_reply, 0},
350 350
351 351 /* OP_OPENATTR = 19 */
352 352 {rfs4_op_openattr, nullfree, 0},
353 353
354 354 /* OP_OPEN_CONFIRM = 20 */
355 355 {rfs4_op_open_confirm, nullfree, 0},
356 356
357 357 /* OP_OPEN_DOWNGRADE = 21 */
358 358 {rfs4_op_open_downgrade, nullfree, 0},
359 359
360 360 /* OP_OPEN_PUTFH = 22 */
361 361 {rfs4_op_putfh, nullfree, RPC_ALL},
362 362
363 363 /* OP_PUTPUBFH = 23 */
364 364 {rfs4_op_putpubfh, nullfree, RPC_ALL},
365 365
366 366 /* OP_PUTROOTFH = 24 */
367 367 {rfs4_op_putrootfh, nullfree, RPC_ALL},
368 368
369 369 /* OP_READ = 25 */
370 370 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
371 371
372 372 /* OP_READDIR = 26 */
373 373 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
374 374
375 375 /* OP_READLINK = 27 */
376 376 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
377 377
378 378 /* OP_REMOVE = 28 */
379 379 {rfs4_op_remove, nullfree, 0},
380 380
381 381 /* OP_RENAME = 29 */
382 382 {rfs4_op_rename, nullfree, 0},
383 383
384 384 /* OP_RENEW = 30 */
385 385 {rfs4_op_renew, nullfree, 0},
386 386
387 387 /* OP_RESTOREFH = 31 */
388 388 {rfs4_op_restorefh, nullfree, RPC_ALL},
389 389
390 390 /* OP_SAVEFH = 32 */
391 391 {rfs4_op_savefh, nullfree, RPC_ALL},
392 392
393 393 /* OP_SECINFO = 33 */
394 394 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
395 395
396 396 /* OP_SETATTR = 34 */
397 397 {rfs4_op_setattr, nullfree, 0},
398 398
399 399 /* OP_SETCLIENTID = 35 */
400 400 {rfs4_op_setclientid, nullfree, 0},
401 401
402 402 /* OP_SETCLIENTID_CONFIRM = 36 */
403 403 {rfs4_op_setclientid_confirm, nullfree, 0},
404 404
405 405 /* OP_VERIFY = 37 */
406 406 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
407 407
408 408 /* OP_WRITE = 38 */
409 409 {rfs4_op_write, nullfree, 0},
410 410
411 411 /* OP_RELEASE_LOCKOWNER = 39 */
412 412 {rfs4_op_release_lockowner, nullfree, 0},
413 413 };
414 414
415 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
416 416
417 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
418 418
419 419 #ifdef DEBUG
420 420
421 421 int rfs4_fillone_debug = 0;
422 422 int rfs4_no_stub_access = 1;
423 423 int rfs4_rddir_debug = 0;
424 424
425 425 static char *rfs4_op_string[] = {
426 426 "rfs4_op_null",
427 427 "rfs4_op_1 unused",
428 428 "rfs4_op_2 unused",
429 429 "rfs4_op_access",
430 430 "rfs4_op_close",
431 431 "rfs4_op_commit",
432 432 "rfs4_op_create",
433 433 "rfs4_op_delegpurge",
434 434 "rfs4_op_delegreturn",
435 435 "rfs4_op_getattr",
436 436 "rfs4_op_getfh",
437 437 "rfs4_op_link",
438 438 "rfs4_op_lock",
439 439 "rfs4_op_lockt",
440 440 "rfs4_op_locku",
441 441 "rfs4_op_lookup",
442 442 "rfs4_op_lookupp",
443 443 "rfs4_op_nverify",
444 444 "rfs4_op_open",
445 445 "rfs4_op_openattr",
446 446 "rfs4_op_open_confirm",
447 447 "rfs4_op_open_downgrade",
448 448 "rfs4_op_putfh",
449 449 "rfs4_op_putpubfh",
450 450 "rfs4_op_putrootfh",
451 451 "rfs4_op_read",
452 452 "rfs4_op_readdir",
453 453 "rfs4_op_readlink",
454 454 "rfs4_op_remove",
455 455 "rfs4_op_rename",
456 456 "rfs4_op_renew",
457 457 "rfs4_op_restorefh",
458 458 "rfs4_op_savefh",
459 459 "rfs4_op_secinfo",
460 460 "rfs4_op_setattr",
461 461 "rfs4_op_setclientid",
462 462 "rfs4_op_setclient_confirm",
463 463 "rfs4_op_verify",
464 464 "rfs4_op_write",
465 465 "rfs4_op_release_lockowner",
466 466 "rfs4_op_illegal"
467 467 };
468 468 #endif
469 469
470 470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
471 471
472 472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
473 473
474 474 extern void rfs4_free_fs_locations4(fs_locations4 *);
475 475
476 476 #ifdef nextdp
477 477 #undef nextdp
478 478 #endif
479 479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
480 480
481 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
482 482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
483 483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
484 484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
485 485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
486 486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
487 487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
488 488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
489 489 NULL, NULL
490 490 };
491 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
492 492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
493 493 VOPNAME_READ, { .femop_read = deleg_wr_read },
494 494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
495 495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
496 496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
497 497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
498 498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
499 499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
500 500 NULL, NULL
501 501 };
502 502
503 503 nfs4_srv_t *
504 504 nfs4_get_srv(void)
505 505 {
506 506 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
507 507 nfs4_srv_t *srv = ng->nfs4_srv;
508 508 ASSERT(srv != NULL);
509 509 return (srv);
510 510 }
511 511
512 512 void
513 513 rfs4_srv_zone_init(nfs_globals_t *ng)
514 514 {
515 515 nfs4_srv_t *nsrv4;
516 516 timespec32_t verf;
517 517
518 518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
519 519
520 520 /*
521 521 * The following algorithm attempts to find a unique verifier
522 522 * to be used as the write verifier returned from the server
523 523 * to the client. It is important that this verifier change
524 524 * whenever the server reboots. Of secondary importance, it
525 525 * is important for the verifier to be unique between two
526 526 * different servers.
527 527 *
528 528 * Thus, an attempt is made to use the system hostid and the
529 529 * current time in seconds when the nfssrv kernel module is
530 530 * loaded. It is assumed that an NFS server will not be able
531 531 * to boot and then to reboot in less than a second. If the
532 532 * hostid has not been set, then the current high resolution
533 533 * time is used. This will ensure different verifiers each
534 534 * time the server reboots and minimize the chances that two
535 535 * different servers will have the same verifier.
536 536 * XXX - this is broken on LP64 kernels.
537 537 */
538 538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
539 539 if (verf.tv_sec != 0) {
540 540 verf.tv_nsec = gethrestime_sec();
541 541 } else {
542 542 timespec_t tverf;
543 543
544 544 gethrestime(&tverf);
545 545 verf.tv_sec = (time_t)tverf.tv_sec;
546 546 verf.tv_nsec = tverf.tv_nsec;
547 547 }
548 548 nsrv4->write4verf = *(uint64_t *)&verf;
549 549
550 550 /* Used to manage create/destroy of server state */
551 551 nsrv4->nfs4_server_state = NULL;
552 552 nsrv4->nfs4_cur_servinst = NULL;
553 553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
554 554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
555 555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
556 556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
557 557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
558 558
559 559 ng->nfs4_srv = nsrv4;
560 560 }
561 561
562 562 void
563 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
564 564 {
565 565 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
566 566
567 567 ng->nfs4_srv = NULL;
568 568
569 569 mutex_destroy(&nsrv4->deleg_lock);
570 570 mutex_destroy(&nsrv4->state_lock);
571 571 mutex_destroy(&nsrv4->servinst_lock);
572 572 rw_destroy(&nsrv4->deleg_policy_lock);
573 573
574 574 kmem_free(nsrv4, sizeof (*nsrv4));
575 575 }
576 576
577 577 void
578 578 rfs4_srvrinit(void)
579 579 {
580 580 extern void rfs4_attr_init();
581 581
582 582 rfs4_attr_init();
583 583
584 584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
585 585 rfs4_disable_delegation();
586 586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
587 587 &deleg_wrops) != 0) {
588 588 rfs4_disable_delegation();
589 589 fem_free(deleg_rdops);
590 590 }
591 591
592 592 nfs4_srv_caller_id = fs_new_caller_id();
593 593 lockt_sysid = lm_alloc_sysidt();
594 594 vsd_create(&nfs4_srv_vkey, NULL);
595 595 rfs4_state_g_init();
596 596 }
597 597
598 598 void
599 599 rfs4_srvrfini(void)
600 600 {
601 601 if (lockt_sysid != LM_NOSYSID) {
602 602 lm_free_sysidt(lockt_sysid);
603 603 lockt_sysid = LM_NOSYSID;
604 604 }
605 605
606 606 rfs4_state_g_fini();
607 607
608 608 fem_free(deleg_rdops);
609 609 fem_free(deleg_wrops);
610 610 }
611 611
612 612 void
613 613 rfs4_do_server_start(int server_upordown,
614 614 int srv_delegation, int cluster_booted)
615 615 {
616 616 nfs4_srv_t *nsrv4 = nfs4_get_srv();
617 617
618 618 /* Is this a warm start? */
619 619 if (server_upordown == NFS_SERVER_QUIESCED) {
620 620 cmn_err(CE_NOTE, "nfs4_srv: "
621 621 "server was previously quiesced; "
622 622 "existing NFSv4 state will be re-used");
623 623
624 624 /*
625 625 * HA-NFSv4: this is also the signal
626 626 * that a Resource Group failover has
627 627 * occurred.
628 628 */
629 629 if (cluster_booted)
630 630 hanfsv4_failover(nsrv4);
631 631 } else {
632 632 /* Cold start */
633 633 nsrv4->rfs4_start_time = 0;
634 634 rfs4_state_zone_init(nsrv4);
635 635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
636 636 nfs4_drc_hash);
637 637
638 638 /*
639 639 * The nfsd service was started with the -s option
640 640 * we need to pull in any state from the paths indicated.
641 641 */
642 642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
643 643 /* read in the stable storage state from these paths */
644 644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
645 645 rfs4_dss_newpaths);
646 646 }
647 647 }
648 648
649 649 /* Check if delegation is to be enabled */
650 650 if (srv_delegation != FALSE)
651 651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
652 652 }
653 653
654 654 void
655 655 rfs4_init_compound_state(struct compound_state *cs)
656 656 {
657 657 bzero(cs, sizeof (*cs));
658 658 cs->cont = TRUE;
659 659 cs->access = CS_ACCESS_DENIED;
660 660 cs->deleg = FALSE;
661 661 cs->mandlock = FALSE;
662 662 cs->fh.nfs_fh4_val = cs->fhbuf;
663 663 }
664 664
665 665 void
666 666 rfs4_grace_start(rfs4_servinst_t *sip)
667 667 {
668 668 rw_enter(&sip->rwlock, RW_WRITER);
669 669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
670 670 sip->grace_period = rfs4_grace_period;
671 671 rw_exit(&sip->rwlock);
672 672 }
673 673
674 674 /*
675 675 * returns true if the instance's grace period has never been started
676 676 */
677 677 int
678 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
679 679 {
680 680 time_t start_time;
681 681
682 682 rw_enter(&sip->rwlock, RW_READER);
683 683 start_time = sip->start_time;
684 684 rw_exit(&sip->rwlock);
685 685
686 686 return (start_time == 0);
687 687 }
688 688
689 689 /*
690 690 * Indicates if server instance is within the
691 691 * grace period.
692 692 */
693 693 int
694 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
695 695 {
696 696 time_t grace_expiry;
697 697
698 698 rw_enter(&sip->rwlock, RW_READER);
699 699 grace_expiry = sip->start_time + sip->grace_period;
700 700 rw_exit(&sip->rwlock);
701 701
702 702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
703 703 }
704 704
705 705 int
706 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
707 707 {
708 708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
709 709
710 710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
711 711 }
712 712
713 713 /*
714 714 * reset all currently active grace periods
715 715 */
716 716 void
717 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
718 718 {
719 719 rfs4_servinst_t *sip;
720 720
721 721 mutex_enter(&nsrv4->servinst_lock);
722 722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
723 723 if (rfs4_servinst_in_grace(sip))
724 724 rfs4_grace_start(sip);
725 725 mutex_exit(&nsrv4->servinst_lock);
726 726 }
727 727
728 728 /*
729 729 * start any new instances' grace periods
730 730 */
731 731 void
732 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
733 733 {
734 734 rfs4_servinst_t *sip;
735 735
736 736 mutex_enter(&nsrv4->servinst_lock);
737 737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
738 738 if (rfs4_servinst_grace_new(sip))
739 739 rfs4_grace_start(sip);
740 740 mutex_exit(&nsrv4->servinst_lock);
741 741 }
742 742
743 743 static rfs4_dss_path_t *
744 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
745 745 char *path, unsigned index)
746 746 {
747 747 size_t len;
748 748 rfs4_dss_path_t *dss_path;
749 749
750 750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
751 751
752 752 /*
753 753 * Take a copy of the string, since the original may be overwritten.
754 754 * Sadly, no strdup() in the kernel.
755 755 */
756 756 /* allow for NUL */
757 757 len = strlen(path) + 1;
758 758 dss_path->path = kmem_alloc(len, KM_SLEEP);
759 759 (void) strlcpy(dss_path->path, path, len);
760 760
761 761 /* associate with servinst */
762 762 dss_path->sip = sip;
763 763 dss_path->index = index;
764 764
765 765 /*
766 766 * Add to list of served paths.
767 767 * No locking required, as we're only ever called at startup.
768 768 */
769 769 if (nsrv4->dss_pathlist == NULL) {
770 770 /* this is the first dss_path_t */
771 771
772 772 /* needed for insque/remque */
773 773 dss_path->next = dss_path->prev = dss_path;
774 774
775 775 nsrv4->dss_pathlist = dss_path;
776 776 } else {
777 777 insque(dss_path, nsrv4->dss_pathlist);
778 778 }
779 779
780 780 return (dss_path);
781 781 }
782 782
783 783 /*
784 784 * Create a new server instance, and make it the currently active instance.
785 785 * Note that starting the grace period too early will reduce the clients'
786 786 * recovery window.
787 787 */
788 788 void
789 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
790 790 int dss_npaths, char **dss_paths)
791 791 {
792 792 unsigned i;
793 793 rfs4_servinst_t *sip;
794 794 rfs4_oldstate_t *oldstate;
795 795
796 796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
797 797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
798 798
799 799 sip->start_time = (time_t)0;
800 800 sip->grace_period = (time_t)0;
801 801 sip->next = NULL;
802 802 sip->prev = NULL;
803 803
804 804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
805 805 /*
806 806 * This initial dummy entry is required to setup for insque/remque.
807 807 * It must be skipped over whenever the list is traversed.
808 808 */
809 809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
810 810 /* insque/remque require initial list entry to be self-terminated */
811 811 oldstate->next = oldstate;
812 812 oldstate->prev = oldstate;
813 813 sip->oldstate = oldstate;
814 814
815 815
816 816 sip->dss_npaths = dss_npaths;
817 817 sip->dss_paths = kmem_alloc(dss_npaths *
818 818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
819 819
820 820 for (i = 0; i < dss_npaths; i++) {
821 821 sip->dss_paths[i] =
822 822 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
823 823 }
824 824
825 825 mutex_enter(&nsrv4->servinst_lock);
826 826 if (nsrv4->nfs4_cur_servinst != NULL) {
827 827 /* add to linked list */
828 828 sip->prev = nsrv4->nfs4_cur_servinst;
829 829 nsrv4->nfs4_cur_servinst->next = sip;
830 830 }
831 831 if (start_grace)
832 832 rfs4_grace_start(sip);
833 833 /* make the new instance "current" */
834 834 nsrv4->nfs4_cur_servinst = sip;
835 835
836 836 mutex_exit(&nsrv4->servinst_lock);
837 837 }
838 838
839 839 /*
840 840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
841 841 * all instances directly.
842 842 */
843 843 void
844 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
845 845 {
846 846 rfs4_servinst_t *sip, *prev, *current;
847 847 #ifdef DEBUG
848 848 int n = 0;
849 849 #endif
850 850
851 851 mutex_enter(&nsrv4->servinst_lock);
852 852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
853 853 current = nsrv4->nfs4_cur_servinst;
854 854 nsrv4->nfs4_cur_servinst = NULL;
855 855 for (sip = current; sip != NULL; sip = prev) {
856 856 prev = sip->prev;
857 857 rw_destroy(&sip->rwlock);
858 858 if (sip->oldstate)
859 859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
860 860 if (sip->dss_paths) {
861 861 int i = sip->dss_npaths;
862 862
863 863 while (i > 0) {
864 864 i--;
865 865 if (sip->dss_paths[i] != NULL) {
866 866 char *path = sip->dss_paths[i]->path;
867 867
868 868 if (path != NULL) {
869 869 kmem_free(path,
870 870 strlen(path) + 1);
871 871 }
872 872 kmem_free(sip->dss_paths[i],
873 873 sizeof (rfs4_dss_path_t));
874 874 }
875 875 }
876 876 kmem_free(sip->dss_paths,
877 877 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
878 878 }
879 879 kmem_free(sip, sizeof (rfs4_servinst_t));
880 880 #ifdef DEBUG
881 881 n++;
882 882 #endif
883 883 }
884 884 mutex_exit(&nsrv4->servinst_lock);
885 885 }
886 886
887 887 /*
888 888 * Assign the current server instance to a client_t.
889 889 * Should be called with cp->rc_dbe held.
890 890 */
891 891 void
892 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
893 893 rfs4_servinst_t *sip)
894 894 {
895 895 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
896 896
897 897 /*
898 898 * The lock ensures that if the current instance is in the process
899 899 * of changing, we will see the new one.
900 900 */
901 901 mutex_enter(&nsrv4->servinst_lock);
902 902 cp->rc_server_instance = sip;
903 903 mutex_exit(&nsrv4->servinst_lock);
904 904 }
905 905
906 906 rfs4_servinst_t *
907 907 rfs4_servinst(rfs4_client_t *cp)
908 908 {
909 909 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
910 910
911 911 return (cp->rc_server_instance);
912 912 }
913 913
914 914 /* ARGSUSED */
915 915 static void
916 916 nullfree(caddr_t resop)
917 917 {
918 918 }
919 919
920 920 /*
921 921 * This is a fall-through for invalid or not implemented (yet) ops
922 922 */
923 923 /* ARGSUSED */
924 924 static void
925 925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
926 926 struct compound_state *cs)
927 927 {
928 928 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
929 929 }
930 930
931 931 /*
932 932 * Check if the security flavor, nfsnum, is in the flavor_list.
933 933 */
934 934 bool_t
935 935 in_flavor_list(int nfsnum, int *flavor_list, int count)
936 936 {
937 937 int i;
938 938
939 939 for (i = 0; i < count; i++) {
940 940 if (nfsnum == flavor_list[i])
941 941 return (TRUE);
942 942 }
943 943 return (FALSE);
944 944 }
945 945
946 946 /*
947 947 * Used by rfs4_op_secinfo to get the security information from the
948 948 * export structure associated with the component.
949 949 */
950 950 /* ARGSUSED */
951 951 static nfsstat4
952 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
953 953 {
954 954 int error, different_export = 0;
955 955 vnode_t *dvp, *vp;
956 956 struct exportinfo *exi;
957 957 fid_t fid;
958 958 uint_t count, i;
959 959 secinfo4 *resok_val;
960 960 struct secinfo *secp;
961 961 seconfig_t *si;
962 962 bool_t did_traverse = FALSE;
963 963 int dotdot, walk;
964 964 nfs_export_t *ne = nfs_get_export();
965 965
|
↓ open down ↓ |
965 lines elided |
↑ open up ↑ |
966 966 dvp = cs->vp;
967 967 exi = cs->exi;
968 968 ASSERT(exi != NULL);
969 969 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
970 970
971 971 /*
972 972 * If dotdotting, then need to check whether it's above the
973 973 * root of a filesystem, or above an export point.
974 974 */
975 975 if (dotdot) {
976 - ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
976 + vnode_t *zone_rootvp = ne->exi_root->exi_vp;
977 +
978 + ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
977 979 /*
978 980 * If dotdotting at the root of a filesystem, then
979 981 * need to traverse back to the mounted-on filesystem
980 982 * and do the dotdot lookup there.
981 983 */
982 - if ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp)) {
984 + if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
983 985
984 986 /*
985 987 * If at the system root, then can
986 988 * go up no further.
987 989 */
988 - if (VN_CMP(dvp, ZONE_ROOTVP()))
990 + if (VN_CMP(dvp, zone_rootvp))
989 991 return (puterrno4(ENOENT));
990 992
991 993 /*
992 994 * Traverse back to the mounted-on filesystem
993 995 */
994 - dvp = untraverse(dvp);
996 + dvp = untraverse(dvp, zone_rootvp);
995 997
996 998 /*
997 999 * Set the different_export flag so we remember
998 1000 * to pick up a new exportinfo entry for
999 1001 * this new filesystem.
1000 1002 */
1001 1003 different_export = 1;
1002 1004 } else {
1003 1005
1004 1006 /*
1005 1007 * If dotdotting above an export point then set
1006 1008 * the different_export to get new export info.
1007 1009 */
1008 1010 different_export = nfs_exported(exi, dvp);
1009 1011 }
1010 1012 }
1011 1013
1012 1014 /*
1013 1015 * Get the vnode for the component "nm".
1014 1016 */
1015 1017 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1016 1018 NULL, NULL, NULL);
1017 1019 if (error)
1018 1020 return (puterrno4(error));
1019 1021
1020 1022 /*
1021 1023 * If the vnode is in a pseudo filesystem, or if the security flavor
1022 1024 * used in the request is valid but not an explicitly shared flavor,
1023 1025 * or the access bit indicates that this is a limited access,
1024 1026 * check whether this vnode is visible.
1025 1027 */
1026 1028 if (!different_export &&
1027 1029 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1028 1030 cs->access & CS_ACCESS_LIMITED)) {
1029 1031 if (! nfs_visible(exi, vp, &different_export)) {
1030 1032 VN_RELE(vp);
1031 1033 return (puterrno4(ENOENT));
1032 1034 }
1033 1035 }
1034 1036
1035 1037 /*
1036 1038 * If it's a mountpoint, then traverse it.
1037 1039 */
1038 1040 if (vn_ismntpt(vp)) {
1039 1041 if ((error = traverse(&vp)) != 0) {
1040 1042 VN_RELE(vp);
1041 1043 return (puterrno4(error));
1042 1044 }
1043 1045 /* remember that we had to traverse mountpoint */
1044 1046 did_traverse = TRUE;
1045 1047 different_export = 1;
1046 1048 } else if (vp->v_vfsp != dvp->v_vfsp) {
1047 1049 /*
1048 1050 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1049 1051 * then vp is probably an LOFS object. We don't need the
1050 1052 * realvp, we just need to know that we might have crossed
1051 1053 * a server fs boundary and need to call checkexport4.
1052 1054 * (LOFS lookup hides server fs mountpoints, and actually calls
1053 1055 * traverse)
1054 1056 */
1055 1057 different_export = 1;
1056 1058 }
1057 1059
1058 1060 /*
1059 1061 * Get the export information for it.
1060 1062 */
1061 1063 if (different_export) {
1062 1064
1063 1065 bzero(&fid, sizeof (fid));
1064 1066 fid.fid_len = MAXFIDSZ;
1065 1067 error = vop_fid_pseudo(vp, &fid);
1066 1068 if (error) {
1067 1069 VN_RELE(vp);
1068 1070 return (puterrno4(error));
1069 1071 }
1070 1072
1071 1073 /* We'll need to reassign "exi". */
1072 1074 if (dotdot)
1073 1075 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1074 1076 else
1075 1077 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1076 1078
1077 1079 if (exi == NULL) {
1078 1080 if (did_traverse == TRUE) {
1079 1081 /*
1080 1082 * If this vnode is a mounted-on vnode,
1081 1083 * but the mounted-on file system is not
1082 1084 * exported, send back the secinfo for
1083 1085 * the exported node that the mounted-on
1084 1086 * vnode lives in.
1085 1087 */
1086 1088 exi = cs->exi;
1087 1089 } else {
1088 1090 VN_RELE(vp);
1089 1091 return (puterrno4(EACCES));
1090 1092 }
1091 1093 }
1092 1094 }
1093 1095 ASSERT(exi != NULL);
1094 1096
1095 1097
1096 1098 /*
1097 1099 * Create the secinfo result based on the security information
1098 1100 * from the exportinfo structure (exi).
1099 1101 *
1100 1102 * Return all flavors for a pseudo node.
1101 1103 * For a real export node, return the flavor that the client
1102 1104 * has access with.
1103 1105 */
1104 1106 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1105 1107 if (PSEUDO(exi)) {
1106 1108 count = exi->exi_export.ex_seccnt; /* total sec count */
1107 1109 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1108 1110 secp = exi->exi_export.ex_secinfo;
1109 1111
1110 1112 for (i = 0; i < count; i++) {
1111 1113 si = &secp[i].s_secinfo;
1112 1114 resok_val[i].flavor = si->sc_rpcnum;
1113 1115 if (resok_val[i].flavor == RPCSEC_GSS) {
1114 1116 rpcsec_gss_info *info;
1115 1117
1116 1118 info = &resok_val[i].flavor_info;
1117 1119 info->qop = si->sc_qop;
1118 1120 info->service = (rpc_gss_svc_t)si->sc_service;
1119 1121
1120 1122 /* get oid opaque data */
1121 1123 info->oid.sec_oid4_len =
1122 1124 si->sc_gss_mech_type->length;
1123 1125 info->oid.sec_oid4_val = kmem_alloc(
1124 1126 si->sc_gss_mech_type->length, KM_SLEEP);
1125 1127 bcopy(
1126 1128 si->sc_gss_mech_type->elements,
1127 1129 info->oid.sec_oid4_val,
1128 1130 info->oid.sec_oid4_len);
1129 1131 }
1130 1132 }
1131 1133 resp->SECINFO4resok_len = count;
1132 1134 resp->SECINFO4resok_val = resok_val;
1133 1135 } else {
1134 1136 int ret_cnt = 0, k = 0;
1135 1137 int *flavor_list;
1136 1138
1137 1139 count = exi->exi_export.ex_seccnt; /* total sec count */
1138 1140 secp = exi->exi_export.ex_secinfo;
1139 1141
1140 1142 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1141 1143 /* find out which flavors to return */
1142 1144 for (i = 0; i < count; i ++) {
1143 1145 int access, flavor, perm;
1144 1146
1145 1147 flavor = secp[i].s_secinfo.sc_nfsnum;
1146 1148 perm = secp[i].s_flags;
1147 1149
1148 1150 access = nfsauth4_secinfo_access(exi, cs->req,
1149 1151 flavor, perm, cs->basecr);
1150 1152
1151 1153 if (! (access & NFSAUTH_DENIED) &&
1152 1154 ! (access & NFSAUTH_WRONGSEC)) {
1153 1155 flavor_list[ret_cnt] = flavor;
1154 1156 ret_cnt++;
1155 1157 }
1156 1158 }
1157 1159
1158 1160 /* Create the returning SECINFO value */
1159 1161 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1160 1162
1161 1163 for (i = 0; i < count; i++) {
1162 1164 /*
1163 1165 * If the flavor is in the flavor list,
1164 1166 * fill in resok_val.
1165 1167 */
1166 1168 si = &secp[i].s_secinfo;
1167 1169 if (in_flavor_list(si->sc_nfsnum,
1168 1170 flavor_list, ret_cnt)) {
1169 1171 resok_val[k].flavor = si->sc_rpcnum;
1170 1172 if (resok_val[k].flavor == RPCSEC_GSS) {
1171 1173 rpcsec_gss_info *info;
1172 1174
1173 1175 info = &resok_val[k].flavor_info;
1174 1176 info->qop = si->sc_qop;
1175 1177 info->service = (rpc_gss_svc_t)
1176 1178 si->sc_service;
1177 1179
1178 1180 /* get oid opaque data */
1179 1181 info->oid.sec_oid4_len =
1180 1182 si->sc_gss_mech_type->length;
1181 1183 info->oid.sec_oid4_val = kmem_alloc(
1182 1184 si->sc_gss_mech_type->length,
1183 1185 KM_SLEEP);
1184 1186 bcopy(si->sc_gss_mech_type->elements,
1185 1187 info->oid.sec_oid4_val,
1186 1188 info->oid.sec_oid4_len);
1187 1189 }
1188 1190 k++;
1189 1191 }
1190 1192 if (k >= ret_cnt)
1191 1193 break;
1192 1194 }
1193 1195 resp->SECINFO4resok_len = ret_cnt;
1194 1196 resp->SECINFO4resok_val = resok_val;
1195 1197 kmem_free(flavor_list, count * sizeof (int));
1196 1198 }
1197 1199
1198 1200 VN_RELE(vp);
1199 1201 return (NFS4_OK);
1200 1202 }
1201 1203
1202 1204 /*
1203 1205 * SECINFO (Operation 33): Obtain required security information on
1204 1206 * the component name in the format of (security-mechanism-oid, qop, service)
1205 1207 * triplets.
1206 1208 */
1207 1209 /* ARGSUSED */
1208 1210 static void
1209 1211 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1210 1212 struct compound_state *cs)
1211 1213 {
1212 1214 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1213 1215 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1214 1216 utf8string *utfnm = &args->name;
1215 1217 uint_t len;
1216 1218 char *nm;
1217 1219 struct sockaddr *ca;
1218 1220 char *name = NULL;
1219 1221 nfsstat4 status = NFS4_OK;
1220 1222
1221 1223 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1222 1224 SECINFO4args *, args);
1223 1225
1224 1226 /*
1225 1227 * Current file handle (cfh) should have been set before getting
1226 1228 * into this function. If not, return error.
1227 1229 */
1228 1230 if (cs->vp == NULL) {
1229 1231 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1230 1232 goto out;
1231 1233 }
1232 1234
1233 1235 if (cs->vp->v_type != VDIR) {
1234 1236 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1235 1237 goto out;
1236 1238 }
1237 1239
1238 1240 /*
1239 1241 * Verify the component name. If failed, error out, but
1240 1242 * do not error out if the component name is a "..".
1241 1243 * SECINFO will return its parents secinfo data for SECINFO "..".
1242 1244 */
1243 1245 status = utf8_dir_verify(utfnm);
1244 1246 if (status != NFS4_OK) {
1245 1247 if (utfnm->utf8string_len != 2 ||
1246 1248 utfnm->utf8string_val[0] != '.' ||
1247 1249 utfnm->utf8string_val[1] != '.') {
1248 1250 *cs->statusp = resp->status = status;
1249 1251 goto out;
1250 1252 }
1251 1253 }
1252 1254
1253 1255 nm = utf8_to_str(utfnm, &len, NULL);
1254 1256 if (nm == NULL) {
1255 1257 *cs->statusp = resp->status = NFS4ERR_INVAL;
1256 1258 goto out;
1257 1259 }
1258 1260
1259 1261 if (len > MAXNAMELEN) {
1260 1262 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1261 1263 kmem_free(nm, len);
1262 1264 goto out;
1263 1265 }
1264 1266
1265 1267 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1266 1268 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1267 1269 MAXPATHLEN + 1);
1268 1270
1269 1271 if (name == NULL) {
1270 1272 *cs->statusp = resp->status = NFS4ERR_INVAL;
1271 1273 kmem_free(nm, len);
1272 1274 goto out;
1273 1275 }
1274 1276
1275 1277
1276 1278 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1277 1279
1278 1280 if (name != nm)
1279 1281 kmem_free(name, MAXPATHLEN + 1);
1280 1282 kmem_free(nm, len);
1281 1283
1282 1284 out:
1283 1285 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1284 1286 SECINFO4res *, resp);
1285 1287 }
1286 1288
1287 1289 /*
1288 1290 * Free SECINFO result.
1289 1291 */
1290 1292 /* ARGSUSED */
1291 1293 static void
1292 1294 rfs4_op_secinfo_free(nfs_resop4 *resop)
1293 1295 {
1294 1296 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1295 1297 int count, i;
1296 1298 secinfo4 *resok_val;
1297 1299
1298 1300 /* If this is not an Ok result, nothing to free. */
1299 1301 if (resp->status != NFS4_OK) {
1300 1302 return;
1301 1303 }
1302 1304
1303 1305 count = resp->SECINFO4resok_len;
1304 1306 resok_val = resp->SECINFO4resok_val;
1305 1307
1306 1308 for (i = 0; i < count; i++) {
1307 1309 if (resok_val[i].flavor == RPCSEC_GSS) {
1308 1310 rpcsec_gss_info *info;
1309 1311
1310 1312 info = &resok_val[i].flavor_info;
1311 1313 kmem_free(info->oid.sec_oid4_val,
1312 1314 info->oid.sec_oid4_len);
1313 1315 }
1314 1316 }
1315 1317 kmem_free(resok_val, count * sizeof (secinfo4));
1316 1318 resp->SECINFO4resok_len = 0;
1317 1319 resp->SECINFO4resok_val = NULL;
1318 1320 }
1319 1321
1320 1322 /* ARGSUSED */
1321 1323 static void
1322 1324 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1323 1325 struct compound_state *cs)
1324 1326 {
1325 1327 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1326 1328 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1327 1329 int error;
1328 1330 vnode_t *vp;
1329 1331 struct vattr va;
1330 1332 int checkwriteperm;
1331 1333 cred_t *cr = cs->cr;
1332 1334 bslabel_t *clabel, *slabel;
1333 1335 ts_label_t *tslabel;
1334 1336 boolean_t admin_low_client;
1335 1337
1336 1338 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1337 1339 ACCESS4args *, args);
1338 1340
1339 1341 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1340 1342 if (cs->access == CS_ACCESS_DENIED) {
1341 1343 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1342 1344 goto out;
1343 1345 }
1344 1346 #endif
1345 1347 if (cs->vp == NULL) {
1346 1348 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1347 1349 goto out;
1348 1350 }
1349 1351
1350 1352 ASSERT(cr != NULL);
1351 1353
1352 1354 vp = cs->vp;
1353 1355
1354 1356 /*
1355 1357 * If the file system is exported read only, it is not appropriate
1356 1358 * to check write permissions for regular files and directories.
1357 1359 * Special files are interpreted by the client, so the underlying
1358 1360 * permissions are sent back to the client for interpretation.
1359 1361 */
1360 1362 if (rdonly4(req, cs) &&
1361 1363 (vp->v_type == VREG || vp->v_type == VDIR))
1362 1364 checkwriteperm = 0;
1363 1365 else
1364 1366 checkwriteperm = 1;
1365 1367
1366 1368 /*
1367 1369 * XXX
1368 1370 * We need the mode so that we can correctly determine access
1369 1371 * permissions relative to a mandatory lock file. Access to
1370 1372 * mandatory lock files is denied on the server, so it might
1371 1373 * as well be reflected to the server during the open.
1372 1374 */
1373 1375 va.va_mask = AT_MODE;
1374 1376 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1375 1377 if (error) {
1376 1378 *cs->statusp = resp->status = puterrno4(error);
1377 1379 goto out;
1378 1380 }
1379 1381 resp->access = 0;
1380 1382 resp->supported = 0;
1381 1383
1382 1384 if (is_system_labeled()) {
1383 1385 ASSERT(req->rq_label != NULL);
1384 1386 clabel = req->rq_label;
1385 1387 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1386 1388 "got client label from request(1)",
1387 1389 struct svc_req *, req);
1388 1390 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1389 1391 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1390 1392 *cs->statusp = resp->status = puterrno4(EACCES);
1391 1393 goto out;
1392 1394 }
1393 1395 slabel = label2bslabel(tslabel);
1394 1396 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1395 1397 char *, "got server label(1) for vp(2)",
1396 1398 bslabel_t *, slabel, vnode_t *, vp);
1397 1399
1398 1400 admin_low_client = B_FALSE;
1399 1401 } else
1400 1402 admin_low_client = B_TRUE;
1401 1403 }
1402 1404
1403 1405 if (args->access & ACCESS4_READ) {
1404 1406 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1405 1407 if (!error && !MANDLOCK(vp, va.va_mode) &&
1406 1408 (!is_system_labeled() || admin_low_client ||
1407 1409 bldominates(clabel, slabel)))
1408 1410 resp->access |= ACCESS4_READ;
1409 1411 resp->supported |= ACCESS4_READ;
1410 1412 }
1411 1413 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1412 1414 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1413 1415 if (!error && (!is_system_labeled() || admin_low_client ||
1414 1416 bldominates(clabel, slabel)))
1415 1417 resp->access |= ACCESS4_LOOKUP;
1416 1418 resp->supported |= ACCESS4_LOOKUP;
1417 1419 }
1418 1420 if (checkwriteperm &&
1419 1421 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1420 1422 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1421 1423 if (!error && !MANDLOCK(vp, va.va_mode) &&
1422 1424 (!is_system_labeled() || admin_low_client ||
1423 1425 blequal(clabel, slabel)))
1424 1426 resp->access |=
1425 1427 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1426 1428 resp->supported |=
1427 1429 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1428 1430 }
1429 1431
1430 1432 if (checkwriteperm &&
1431 1433 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1432 1434 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1433 1435 if (!error && (!is_system_labeled() || admin_low_client ||
1434 1436 blequal(clabel, slabel)))
1435 1437 resp->access |= ACCESS4_DELETE;
1436 1438 resp->supported |= ACCESS4_DELETE;
1437 1439 }
1438 1440 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1439 1441 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1440 1442 if (!error && !MANDLOCK(vp, va.va_mode) &&
1441 1443 (!is_system_labeled() || admin_low_client ||
1442 1444 bldominates(clabel, slabel)))
1443 1445 resp->access |= ACCESS4_EXECUTE;
1444 1446 resp->supported |= ACCESS4_EXECUTE;
1445 1447 }
1446 1448
1447 1449 if (is_system_labeled() && !admin_low_client)
1448 1450 label_rele(tslabel);
1449 1451
1450 1452 *cs->statusp = resp->status = NFS4_OK;
1451 1453 out:
1452 1454 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1453 1455 ACCESS4res *, resp);
1454 1456 }
1455 1457
1456 1458 /* ARGSUSED */
1457 1459 static void
1458 1460 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1459 1461 struct compound_state *cs)
1460 1462 {
1461 1463 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1462 1464 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1463 1465 int error;
1464 1466 vnode_t *vp = cs->vp;
1465 1467 cred_t *cr = cs->cr;
1466 1468 vattr_t va;
1467 1469 nfs4_srv_t *nsrv4;
1468 1470
1469 1471 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1470 1472 COMMIT4args *, args);
1471 1473
1472 1474 if (vp == NULL) {
1473 1475 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1474 1476 goto out;
1475 1477 }
1476 1478 if (cs->access == CS_ACCESS_DENIED) {
1477 1479 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1478 1480 goto out;
1479 1481 }
1480 1482
1481 1483 if (args->offset + args->count < args->offset) {
1482 1484 *cs->statusp = resp->status = NFS4ERR_INVAL;
1483 1485 goto out;
1484 1486 }
1485 1487
1486 1488 va.va_mask = AT_UID;
1487 1489 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1488 1490
1489 1491 /*
1490 1492 * If we can't get the attributes, then we can't do the
1491 1493 * right access checking. So, we'll fail the request.
1492 1494 */
1493 1495 if (error) {
1494 1496 *cs->statusp = resp->status = puterrno4(error);
1495 1497 goto out;
1496 1498 }
1497 1499 if (rdonly4(req, cs)) {
1498 1500 *cs->statusp = resp->status = NFS4ERR_ROFS;
1499 1501 goto out;
1500 1502 }
1501 1503
1502 1504 if (vp->v_type != VREG) {
1503 1505 if (vp->v_type == VDIR)
1504 1506 resp->status = NFS4ERR_ISDIR;
1505 1507 else
1506 1508 resp->status = NFS4ERR_INVAL;
1507 1509 *cs->statusp = resp->status;
1508 1510 goto out;
1509 1511 }
1510 1512
1511 1513 if (crgetuid(cr) != va.va_uid &&
1512 1514 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1513 1515 *cs->statusp = resp->status = puterrno4(error);
1514 1516 goto out;
1515 1517 }
1516 1518
1517 1519 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1518 1520
1519 1521 if (error) {
1520 1522 *cs->statusp = resp->status = puterrno4(error);
1521 1523 goto out;
1522 1524 }
1523 1525
1524 1526 nsrv4 = nfs4_get_srv();
1525 1527 *cs->statusp = resp->status = NFS4_OK;
1526 1528 resp->writeverf = nsrv4->write4verf;
1527 1529 out:
1528 1530 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1529 1531 COMMIT4res *, resp);
1530 1532 }
1531 1533
1532 1534 /*
1533 1535 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1534 1536 * was completed. It does the nfsv4 create for special files.
1535 1537 */
1536 1538 /* ARGSUSED */
1537 1539 static vnode_t *
1538 1540 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1539 1541 struct compound_state *cs, vattr_t *vap, char *nm)
1540 1542 {
1541 1543 int error;
1542 1544 cred_t *cr = cs->cr;
1543 1545 vnode_t *dvp = cs->vp;
1544 1546 vnode_t *vp = NULL;
1545 1547 int mode;
1546 1548 enum vcexcl excl;
1547 1549
1548 1550 switch (args->type) {
1549 1551 case NF4CHR:
1550 1552 case NF4BLK:
1551 1553 if (secpolicy_sys_devices(cr) != 0) {
1552 1554 *cs->statusp = resp->status = NFS4ERR_PERM;
1553 1555 return (NULL);
1554 1556 }
1555 1557 if (args->type == NF4CHR)
1556 1558 vap->va_type = VCHR;
1557 1559 else
1558 1560 vap->va_type = VBLK;
1559 1561 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1560 1562 args->ftype4_u.devdata.specdata2);
1561 1563 vap->va_mask |= AT_RDEV;
1562 1564 break;
1563 1565 case NF4SOCK:
1564 1566 vap->va_type = VSOCK;
1565 1567 break;
1566 1568 case NF4FIFO:
1567 1569 vap->va_type = VFIFO;
1568 1570 break;
1569 1571 default:
1570 1572 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1571 1573 return (NULL);
1572 1574 }
1573 1575
1574 1576 /*
1575 1577 * Must specify the mode.
1576 1578 */
1577 1579 if (!(vap->va_mask & AT_MODE)) {
1578 1580 *cs->statusp = resp->status = NFS4ERR_INVAL;
1579 1581 return (NULL);
1580 1582 }
1581 1583
1582 1584 excl = EXCL;
1583 1585
1584 1586 mode = 0;
1585 1587
1586 1588 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1587 1589 if (error) {
1588 1590 *cs->statusp = resp->status = puterrno4(error);
1589 1591 return (NULL);
1590 1592 }
1591 1593 return (vp);
1592 1594 }
1593 1595
1594 1596 /*
1595 1597 * nfsv4 create is used to create non-regular files. For regular files,
1596 1598 * use nfsv4 open.
1597 1599 */
1598 1600 /* ARGSUSED */
1599 1601 static void
1600 1602 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1601 1603 struct compound_state *cs)
1602 1604 {
1603 1605 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1604 1606 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1605 1607 int error;
1606 1608 struct vattr bva, iva, iva2, ava, *vap;
1607 1609 cred_t *cr = cs->cr;
1608 1610 vnode_t *dvp = cs->vp;
1609 1611 vnode_t *vp = NULL;
1610 1612 vnode_t *realvp;
1611 1613 char *nm, *lnm;
1612 1614 uint_t len, llen;
1613 1615 int syncval = 0;
1614 1616 struct nfs4_svgetit_arg sarg;
1615 1617 struct nfs4_ntov_table ntov;
1616 1618 struct statvfs64 sb;
1617 1619 nfsstat4 status;
1618 1620 struct sockaddr *ca;
1619 1621 char *name = NULL;
1620 1622 char *lname = NULL;
1621 1623
1622 1624 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1623 1625 CREATE4args *, args);
1624 1626
1625 1627 resp->attrset = 0;
1626 1628
1627 1629 if (dvp == NULL) {
1628 1630 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1629 1631 goto out;
1630 1632 }
1631 1633
1632 1634 /*
1633 1635 * If there is an unshared filesystem mounted on this vnode,
1634 1636 * do not allow to create an object in this directory.
1635 1637 */
1636 1638 if (vn_ismntpt(dvp)) {
1637 1639 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1638 1640 goto out;
1639 1641 }
1640 1642
1641 1643 /* Verify that type is correct */
1642 1644 switch (args->type) {
1643 1645 case NF4LNK:
1644 1646 case NF4BLK:
1645 1647 case NF4CHR:
1646 1648 case NF4SOCK:
1647 1649 case NF4FIFO:
1648 1650 case NF4DIR:
1649 1651 break;
1650 1652 default:
1651 1653 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1652 1654 goto out;
1653 1655 };
1654 1656
1655 1657 if (cs->access == CS_ACCESS_DENIED) {
1656 1658 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1657 1659 goto out;
1658 1660 }
1659 1661 if (dvp->v_type != VDIR) {
1660 1662 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1661 1663 goto out;
1662 1664 }
1663 1665 status = utf8_dir_verify(&args->objname);
1664 1666 if (status != NFS4_OK) {
1665 1667 *cs->statusp = resp->status = status;
1666 1668 goto out;
1667 1669 }
1668 1670
1669 1671 if (rdonly4(req, cs)) {
1670 1672 *cs->statusp = resp->status = NFS4ERR_ROFS;
1671 1673 goto out;
1672 1674 }
1673 1675
1674 1676 /*
1675 1677 * Name of newly created object
1676 1678 */
1677 1679 nm = utf8_to_fn(&args->objname, &len, NULL);
1678 1680 if (nm == NULL) {
1679 1681 *cs->statusp = resp->status = NFS4ERR_INVAL;
1680 1682 goto out;
1681 1683 }
1682 1684
1683 1685 if (len > MAXNAMELEN) {
1684 1686 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1685 1687 kmem_free(nm, len);
1686 1688 goto out;
1687 1689 }
1688 1690
1689 1691 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1690 1692 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1691 1693 MAXPATHLEN + 1);
1692 1694
1693 1695 if (name == NULL) {
1694 1696 *cs->statusp = resp->status = NFS4ERR_INVAL;
1695 1697 kmem_free(nm, len);
1696 1698 goto out;
1697 1699 }
1698 1700
1699 1701 resp->attrset = 0;
1700 1702
1701 1703 sarg.sbp = &sb;
1702 1704 sarg.is_referral = B_FALSE;
1703 1705 nfs4_ntov_table_init(&ntov);
1704 1706
1705 1707 status = do_rfs4_set_attrs(&resp->attrset,
1706 1708 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1707 1709
1708 1710 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1709 1711 status = NFS4ERR_INVAL;
1710 1712
1711 1713 if (status != NFS4_OK) {
1712 1714 *cs->statusp = resp->status = status;
1713 1715 if (name != nm)
1714 1716 kmem_free(name, MAXPATHLEN + 1);
1715 1717 kmem_free(nm, len);
1716 1718 nfs4_ntov_table_free(&ntov, &sarg);
1717 1719 resp->attrset = 0;
1718 1720 goto out;
1719 1721 }
1720 1722
1721 1723 /* Get "before" change value */
1722 1724 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1723 1725 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1724 1726 if (error) {
1725 1727 *cs->statusp = resp->status = puterrno4(error);
1726 1728 if (name != nm)
1727 1729 kmem_free(name, MAXPATHLEN + 1);
1728 1730 kmem_free(nm, len);
1729 1731 nfs4_ntov_table_free(&ntov, &sarg);
1730 1732 resp->attrset = 0;
1731 1733 goto out;
1732 1734 }
1733 1735 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1734 1736
1735 1737 vap = sarg.vap;
1736 1738
1737 1739 /*
1738 1740 * Set the default initial values for attributes when the parent
1739 1741 * directory does not have the VSUID/VSGID bit set and they have
1740 1742 * not been specified in createattrs.
1741 1743 */
1742 1744 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1743 1745 vap->va_uid = crgetuid(cr);
1744 1746 vap->va_mask |= AT_UID;
1745 1747 }
1746 1748 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1747 1749 vap->va_gid = crgetgid(cr);
1748 1750 vap->va_mask |= AT_GID;
1749 1751 }
1750 1752
1751 1753 vap->va_mask |= AT_TYPE;
1752 1754 switch (args->type) {
1753 1755 case NF4DIR:
1754 1756 vap->va_type = VDIR;
1755 1757 if ((vap->va_mask & AT_MODE) == 0) {
1756 1758 vap->va_mode = 0700; /* default: owner rwx only */
1757 1759 vap->va_mask |= AT_MODE;
1758 1760 }
1759 1761 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1760 1762 if (error)
1761 1763 break;
1762 1764
1763 1765 /*
1764 1766 * Get the initial "after" sequence number, if it fails,
1765 1767 * set to zero
1766 1768 */
1767 1769 iva.va_mask = AT_SEQ;
1768 1770 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1769 1771 iva.va_seq = 0;
1770 1772 break;
1771 1773 case NF4LNK:
1772 1774 vap->va_type = VLNK;
1773 1775 if ((vap->va_mask & AT_MODE) == 0) {
1774 1776 vap->va_mode = 0700; /* default: owner rwx only */
1775 1777 vap->va_mask |= AT_MODE;
1776 1778 }
1777 1779
1778 1780 /*
1779 1781 * symlink names must be treated as data
1780 1782 */
1781 1783 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1782 1784 &llen, NULL);
1783 1785
1784 1786 if (lnm == NULL) {
1785 1787 *cs->statusp = resp->status = NFS4ERR_INVAL;
1786 1788 if (name != nm)
1787 1789 kmem_free(name, MAXPATHLEN + 1);
1788 1790 kmem_free(nm, len);
1789 1791 nfs4_ntov_table_free(&ntov, &sarg);
1790 1792 resp->attrset = 0;
1791 1793 goto out;
1792 1794 }
1793 1795
1794 1796 if (llen > MAXPATHLEN) {
1795 1797 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1796 1798 if (name != nm)
1797 1799 kmem_free(name, MAXPATHLEN + 1);
1798 1800 kmem_free(nm, len);
1799 1801 kmem_free(lnm, llen);
1800 1802 nfs4_ntov_table_free(&ntov, &sarg);
1801 1803 resp->attrset = 0;
1802 1804 goto out;
1803 1805 }
1804 1806
1805 1807 lname = nfscmd_convname(ca, cs->exi, lnm,
1806 1808 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1807 1809
1808 1810 if (lname == NULL) {
1809 1811 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1810 1812 if (name != nm)
1811 1813 kmem_free(name, MAXPATHLEN + 1);
1812 1814 kmem_free(nm, len);
1813 1815 kmem_free(lnm, llen);
1814 1816 nfs4_ntov_table_free(&ntov, &sarg);
1815 1817 resp->attrset = 0;
1816 1818 goto out;
1817 1819 }
1818 1820
1819 1821 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1820 1822 if (lname != lnm)
1821 1823 kmem_free(lname, MAXPATHLEN + 1);
1822 1824 kmem_free(lnm, llen);
1823 1825 if (error)
1824 1826 break;
1825 1827
1826 1828 /*
1827 1829 * Get the initial "after" sequence number, if it fails,
1828 1830 * set to zero
1829 1831 */
1830 1832 iva.va_mask = AT_SEQ;
1831 1833 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1832 1834 iva.va_seq = 0;
1833 1835
1834 1836 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1835 1837 NULL, NULL, NULL);
1836 1838 if (error)
1837 1839 break;
1838 1840
1839 1841 /*
1840 1842 * va_seq is not safe over VOP calls, check it again
1841 1843 * if it has changed zero out iva to force atomic = FALSE.
1842 1844 */
1843 1845 iva2.va_mask = AT_SEQ;
1844 1846 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1845 1847 iva2.va_seq != iva.va_seq)
1846 1848 iva.va_seq = 0;
1847 1849 break;
1848 1850 default:
1849 1851 /*
1850 1852 * probably a special file.
1851 1853 */
1852 1854 if ((vap->va_mask & AT_MODE) == 0) {
1853 1855 vap->va_mode = 0600; /* default: owner rw only */
1854 1856 vap->va_mask |= AT_MODE;
1855 1857 }
1856 1858 syncval = FNODSYNC;
1857 1859 /*
1858 1860 * We know this will only generate one VOP call
1859 1861 */
1860 1862 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1861 1863
1862 1864 if (vp == NULL) {
1863 1865 if (name != nm)
1864 1866 kmem_free(name, MAXPATHLEN + 1);
1865 1867 kmem_free(nm, len);
1866 1868 nfs4_ntov_table_free(&ntov, &sarg);
1867 1869 resp->attrset = 0;
1868 1870 goto out;
1869 1871 }
1870 1872
1871 1873 /*
1872 1874 * Get the initial "after" sequence number, if it fails,
1873 1875 * set to zero
1874 1876 */
1875 1877 iva.va_mask = AT_SEQ;
1876 1878 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1877 1879 iva.va_seq = 0;
1878 1880
1879 1881 break;
1880 1882 }
1881 1883 if (name != nm)
1882 1884 kmem_free(name, MAXPATHLEN + 1);
1883 1885 kmem_free(nm, len);
1884 1886
1885 1887 if (error) {
1886 1888 *cs->statusp = resp->status = puterrno4(error);
1887 1889 }
1888 1890
1889 1891 /*
1890 1892 * Force modified data and metadata out to stable storage.
1891 1893 */
1892 1894 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1893 1895
1894 1896 if (resp->status != NFS4_OK) {
1895 1897 if (vp != NULL)
1896 1898 VN_RELE(vp);
1897 1899 nfs4_ntov_table_free(&ntov, &sarg);
1898 1900 resp->attrset = 0;
1899 1901 goto out;
1900 1902 }
1901 1903
1902 1904 /*
1903 1905 * Finish setup of cinfo response, "before" value already set.
1904 1906 * Get "after" change value, if it fails, simply return the
1905 1907 * before value.
1906 1908 */
1907 1909 ava.va_mask = AT_CTIME|AT_SEQ;
1908 1910 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1909 1911 ava.va_ctime = bva.va_ctime;
1910 1912 ava.va_seq = 0;
1911 1913 }
1912 1914 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1913 1915
1914 1916 /*
1915 1917 * True verification that object was created with correct
1916 1918 * attrs is impossible. The attrs could have been changed
1917 1919 * immediately after object creation. If attributes did
1918 1920 * not verify, the only recourse for the server is to
1919 1921 * destroy the object. Maybe if some attrs (like gid)
1920 1922 * are set incorrectly, the object should be destroyed;
1921 1923 * however, seems bad as a default policy. Do we really
1922 1924 * want to destroy an object over one of the times not
1923 1925 * verifying correctly? For these reasons, the server
1924 1926 * currently sets bits in attrset for createattrs
1925 1927 * that were set; however, no verification is done.
1926 1928 *
1927 1929 * vmask_to_nmask accounts for vattr bits set on create
1928 1930 * [do_rfs4_set_attrs() only sets resp bits for
1929 1931 * non-vattr/vfs bits.]
1930 1932 * Mask off any bits set by default so as not to return
1931 1933 * more attrset bits than were requested in createattrs
1932 1934 */
1933 1935 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1934 1936 resp->attrset &= args->createattrs.attrmask;
1935 1937 nfs4_ntov_table_free(&ntov, &sarg);
1936 1938
1937 1939 error = makefh4(&cs->fh, vp, cs->exi);
1938 1940 if (error) {
1939 1941 *cs->statusp = resp->status = puterrno4(error);
1940 1942 }
1941 1943
1942 1944 /*
1943 1945 * The cinfo.atomic = TRUE only if we got no errors, we have
1944 1946 * non-zero va_seq's, and it has incremented by exactly one
1945 1947 * during the creation and it didn't change during the VOP_LOOKUP
1946 1948 * or VOP_FSYNC.
1947 1949 */
1948 1950 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1949 1951 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1950 1952 resp->cinfo.atomic = TRUE;
1951 1953 else
1952 1954 resp->cinfo.atomic = FALSE;
1953 1955
1954 1956 /*
1955 1957 * Force modified metadata out to stable storage.
1956 1958 *
1957 1959 * if a underlying vp exists, pass it to VOP_FSYNC
1958 1960 */
1959 1961 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1960 1962 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1961 1963 else
1962 1964 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1963 1965
1964 1966 if (resp->status != NFS4_OK) {
1965 1967 VN_RELE(vp);
1966 1968 goto out;
1967 1969 }
1968 1970 if (cs->vp)
1969 1971 VN_RELE(cs->vp);
1970 1972
1971 1973 cs->vp = vp;
1972 1974 *cs->statusp = resp->status = NFS4_OK;
1973 1975 out:
1974 1976 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1975 1977 CREATE4res *, resp);
1976 1978 }
1977 1979
1978 1980 /*ARGSUSED*/
1979 1981 static void
1980 1982 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1981 1983 struct compound_state *cs)
1982 1984 {
1983 1985 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1984 1986 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1985 1987
1986 1988 rfs4_op_inval(argop, resop, req, cs);
1987 1989
1988 1990 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1989 1991 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1990 1992 }
1991 1993
1992 1994 /*ARGSUSED*/
1993 1995 static void
1994 1996 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1995 1997 struct compound_state *cs)
1996 1998 {
1997 1999 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1998 2000 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1999 2001 rfs4_deleg_state_t *dsp;
2000 2002 nfsstat4 status;
2001 2003
2002 2004 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2003 2005 DELEGRETURN4args *, args);
2004 2006
2005 2007 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2006 2008 resp->status = *cs->statusp = status;
2007 2009 if (status != NFS4_OK)
2008 2010 goto out;
2009 2011
2010 2012 /* Ensure specified filehandle matches */
2011 2013 if (cs->vp != dsp->rds_finfo->rf_vp) {
2012 2014 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2013 2015 } else
2014 2016 rfs4_return_deleg(dsp, FALSE);
2015 2017
2016 2018 rfs4_update_lease(dsp->rds_client);
2017 2019
2018 2020 rfs4_deleg_state_rele(dsp);
2019 2021 out:
2020 2022 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2021 2023 DELEGRETURN4res *, resp);
2022 2024 }
2023 2025
2024 2026 /*
2025 2027 * Check to see if a given "flavor" is an explicitly shared flavor.
2026 2028 * The assumption of this routine is the "flavor" is already a valid
2027 2029 * flavor in the secinfo list of "exi".
2028 2030 *
2029 2031 * e.g.
2030 2032 * # share -o sec=flavor1 /export
2031 2033 * # share -o sec=flavor2 /export/home
2032 2034 *
2033 2035 * flavor2 is not an explicitly shared flavor for /export,
2034 2036 * however it is in the secinfo list for /export thru the
2035 2037 * server namespace setup.
2036 2038 */
2037 2039 int
2038 2040 is_exported_sec(int flavor, struct exportinfo *exi)
2039 2041 {
2040 2042 int i;
2041 2043 struct secinfo *sp;
2042 2044
2043 2045 sp = exi->exi_export.ex_secinfo;
2044 2046 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2045 2047 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2046 2048 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2047 2049 return (SEC_REF_EXPORTED(&sp[i]));
2048 2050 }
2049 2051 }
2050 2052
2051 2053 /* Should not reach this point based on the assumption */
2052 2054 return (0);
2053 2055 }
2054 2056
2055 2057 /*
2056 2058 * Check if the security flavor used in the request matches what is
2057 2059 * required at the export point or at the root pseudo node (exi_root).
2058 2060 *
2059 2061 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2060 2062 *
2061 2063 */
2062 2064 static int
2063 2065 secinfo_match_or_authnone(struct compound_state *cs)
2064 2066 {
2065 2067 int i;
2066 2068 struct secinfo *sp;
2067 2069
2068 2070 /*
2069 2071 * Check cs->nfsflavor (from the request) against
2070 2072 * the current export data in cs->exi.
2071 2073 */
2072 2074 sp = cs->exi->exi_export.ex_secinfo;
2073 2075 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2074 2076 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2075 2077 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2076 2078 return (1);
2077 2079 }
2078 2080
2079 2081 return (0);
2080 2082 }
2081 2083
2082 2084 /*
2083 2085 * Check the access authority for the client and return the correct error.
2084 2086 */
2085 2087 nfsstat4
2086 2088 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2087 2089 {
2088 2090 int authres;
2089 2091
2090 2092 /*
2091 2093 * First, check if the security flavor used in the request
2092 2094 * are among the flavors set in the server namespace.
2093 2095 */
2094 2096 if (!secinfo_match_or_authnone(cs)) {
2095 2097 *cs->statusp = NFS4ERR_WRONGSEC;
2096 2098 return (*cs->statusp);
2097 2099 }
2098 2100
2099 2101 authres = checkauth4(cs, req);
2100 2102
2101 2103 if (authres > 0) {
2102 2104 *cs->statusp = NFS4_OK;
2103 2105 if (! (cs->access & CS_ACCESS_LIMITED))
2104 2106 cs->access = CS_ACCESS_OK;
2105 2107 } else if (authres == 0) {
2106 2108 *cs->statusp = NFS4ERR_ACCESS;
2107 2109 } else if (authres == -2) {
2108 2110 *cs->statusp = NFS4ERR_WRONGSEC;
2109 2111 } else {
2110 2112 *cs->statusp = NFS4ERR_DELAY;
2111 2113 }
2112 2114 return (*cs->statusp);
2113 2115 }
2114 2116
2115 2117 /*
2116 2118 * bitmap4_to_attrmask is called by getattr and readdir.
2117 2119 * It sets up the vattr mask and determines whether vfsstat call is needed
2118 2120 * based on the input bitmap.
2119 2121 * Returns nfsv4 status.
2120 2122 */
2121 2123 static nfsstat4
2122 2124 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2123 2125 {
2124 2126 int i;
2125 2127 uint_t va_mask;
2126 2128 struct statvfs64 *sbp = sargp->sbp;
2127 2129
2128 2130 sargp->sbp = NULL;
2129 2131 sargp->flag = 0;
2130 2132 sargp->rdattr_error = NFS4_OK;
2131 2133 sargp->mntdfid_set = FALSE;
2132 2134 if (sargp->cs->vp)
2133 2135 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2134 2136 FH4_ATTRDIR | FH4_NAMEDATTR);
2135 2137 else
2136 2138 sargp->xattr = 0;
2137 2139
2138 2140 /*
2139 2141 * Set rdattr_error_req to true if return error per
2140 2142 * failed entry rather than fail the readdir.
2141 2143 */
2142 2144 if (breq & FATTR4_RDATTR_ERROR_MASK)
2143 2145 sargp->rdattr_error_req = 1;
2144 2146 else
2145 2147 sargp->rdattr_error_req = 0;
2146 2148
2147 2149 /*
2148 2150 * generate the va_mask
2149 2151 * Handle the easy cases first
2150 2152 */
2151 2153 switch (breq) {
2152 2154 case NFS4_NTOV_ATTR_MASK:
2153 2155 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2154 2156 return (NFS4_OK);
2155 2157
2156 2158 case NFS4_FS_ATTR_MASK:
2157 2159 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2158 2160 sargp->sbp = sbp;
2159 2161 return (NFS4_OK);
2160 2162
2161 2163 case NFS4_NTOV_ATTR_CACHE_MASK:
2162 2164 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2163 2165 return (NFS4_OK);
2164 2166
2165 2167 case FATTR4_LEASE_TIME_MASK:
2166 2168 sargp->vap->va_mask = 0;
2167 2169 return (NFS4_OK);
2168 2170
2169 2171 default:
2170 2172 va_mask = 0;
2171 2173 for (i = 0; i < nfs4_ntov_map_size; i++) {
2172 2174 if ((breq & nfs4_ntov_map[i].fbit) &&
2173 2175 nfs4_ntov_map[i].vbit)
2174 2176 va_mask |= nfs4_ntov_map[i].vbit;
2175 2177 }
2176 2178
2177 2179 /*
2178 2180 * Check is vfsstat is needed
2179 2181 */
2180 2182 if (breq & NFS4_FS_ATTR_MASK)
2181 2183 sargp->sbp = sbp;
2182 2184
2183 2185 sargp->vap->va_mask = va_mask;
2184 2186 return (NFS4_OK);
2185 2187 }
2186 2188 /* NOTREACHED */
2187 2189 }
2188 2190
2189 2191 /*
2190 2192 * bitmap4_get_sysattrs is called by getattr and readdir.
2191 2193 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2192 2194 * Returns nfsv4 status.
2193 2195 */
2194 2196 static nfsstat4
2195 2197 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2196 2198 {
2197 2199 int error;
2198 2200 struct compound_state *cs = sargp->cs;
2199 2201 vnode_t *vp = cs->vp;
2200 2202
2201 2203 if (sargp->sbp != NULL) {
2202 2204 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2203 2205 sargp->sbp = NULL; /* to identify error */
2204 2206 return (puterrno4(error));
2205 2207 }
2206 2208 }
2207 2209
2208 2210 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2209 2211 }
2210 2212
2211 2213 static void
2212 2214 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2213 2215 {
2214 2216 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2215 2217 KM_SLEEP);
2216 2218 ntovp->attrcnt = 0;
2217 2219 ntovp->vfsstat = FALSE;
2218 2220 }
2219 2221
2220 2222 static void
2221 2223 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2222 2224 struct nfs4_svgetit_arg *sargp)
2223 2225 {
2224 2226 int i;
2225 2227 union nfs4_attr_u *na;
2226 2228 uint8_t *amap;
2227 2229
2228 2230 /*
2229 2231 * XXX Should do the same checks for whether the bit is set
2230 2232 */
2231 2233 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2232 2234 i < ntovp->attrcnt; i++, na++, amap++) {
2233 2235 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2234 2236 NFS4ATTR_FREEIT, sargp, na);
2235 2237 }
2236 2238 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2237 2239 /*
2238 2240 * xdr_free for getattr will be done later
2239 2241 */
2240 2242 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2241 2243 i < ntovp->attrcnt; i++, na++, amap++) {
2242 2244 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2243 2245 }
2244 2246 }
2245 2247 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2246 2248 }
2247 2249
2248 2250 /*
2249 2251 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2250 2252 */
2251 2253 static nfsstat4
2252 2254 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2253 2255 struct nfs4_svgetit_arg *sargp)
2254 2256 {
2255 2257 int error = 0;
2256 2258 int i, k;
2257 2259 struct nfs4_ntov_table ntov;
2258 2260 XDR xdr;
2259 2261 ulong_t xdr_size;
2260 2262 char *xdr_attrs;
2261 2263 nfsstat4 status = NFS4_OK;
2262 2264 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2263 2265 union nfs4_attr_u *na;
2264 2266 uint8_t *amap;
2265 2267
2266 2268 sargp->op = NFS4ATTR_GETIT;
2267 2269 sargp->flag = 0;
2268 2270
2269 2271 fattrp->attrmask = 0;
2270 2272 /* if no bits requested, then return empty fattr4 */
2271 2273 if (breq == 0) {
2272 2274 fattrp->attrlist4_len = 0;
2273 2275 fattrp->attrlist4 = NULL;
2274 2276 return (NFS4_OK);
2275 2277 }
2276 2278
2277 2279 /*
2278 2280 * return NFS4ERR_INVAL when client requests write-only attrs
2279 2281 */
2280 2282 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2281 2283 return (NFS4ERR_INVAL);
2282 2284
2283 2285 nfs4_ntov_table_init(&ntov);
2284 2286 na = ntov.na;
2285 2287 amap = ntov.amap;
2286 2288
2287 2289 /*
2288 2290 * Now loop to get or verify the attrs
2289 2291 */
2290 2292 for (i = 0; i < nfs4_ntov_map_size; i++) {
2291 2293 if (breq & nfs4_ntov_map[i].fbit) {
2292 2294 if ((*nfs4_ntov_map[i].sv_getit)(
2293 2295 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2294 2296
2295 2297 error = (*nfs4_ntov_map[i].sv_getit)(
2296 2298 NFS4ATTR_GETIT, sargp, na);
2297 2299
2298 2300 /*
2299 2301 * Possible error values:
2300 2302 * >0 if sv_getit failed to
2301 2303 * get the attr; 0 if succeeded;
2302 2304 * <0 if rdattr_error and the
2303 2305 * attribute cannot be returned.
2304 2306 */
2305 2307 if (error && !(sargp->rdattr_error_req))
2306 2308 goto done;
2307 2309 /*
2308 2310 * If error then just for entry
2309 2311 */
2310 2312 if (error == 0) {
2311 2313 fattrp->attrmask |=
2312 2314 nfs4_ntov_map[i].fbit;
2313 2315 *amap++ =
2314 2316 (uint8_t)nfs4_ntov_map[i].nval;
2315 2317 na++;
2316 2318 (ntov.attrcnt)++;
2317 2319 } else if ((error > 0) &&
2318 2320 (sargp->rdattr_error == NFS4_OK)) {
2319 2321 sargp->rdattr_error = puterrno4(error);
2320 2322 }
2321 2323 error = 0;
2322 2324 }
2323 2325 }
2324 2326 }
2325 2327
2326 2328 /*
2327 2329 * If rdattr_error was set after the return value for it was assigned,
2328 2330 * update it.
2329 2331 */
2330 2332 if (prev_rdattr_error != sargp->rdattr_error) {
2331 2333 na = ntov.na;
2332 2334 amap = ntov.amap;
2333 2335 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2334 2336 k = *amap;
2335 2337 if (k < FATTR4_RDATTR_ERROR) {
2336 2338 continue;
2337 2339 }
2338 2340 if ((k == FATTR4_RDATTR_ERROR) &&
2339 2341 ((*nfs4_ntov_map[k].sv_getit)(
2340 2342 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2341 2343
2342 2344 (void) (*nfs4_ntov_map[k].sv_getit)(
2343 2345 NFS4ATTR_GETIT, sargp, na);
2344 2346 }
2345 2347 break;
2346 2348 }
2347 2349 }
2348 2350
2349 2351 xdr_size = 0;
2350 2352 na = ntov.na;
2351 2353 amap = ntov.amap;
2352 2354 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2353 2355 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2354 2356 }
2355 2357
2356 2358 fattrp->attrlist4_len = xdr_size;
2357 2359 if (xdr_size) {
2358 2360 /* freed by rfs4_op_getattr_free() */
2359 2361 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2360 2362
2361 2363 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2362 2364
2363 2365 na = ntov.na;
2364 2366 amap = ntov.amap;
2365 2367 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2366 2368 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2367 2369 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2368 2370 int, *amap);
2369 2371 status = NFS4ERR_SERVERFAULT;
2370 2372 break;
2371 2373 }
2372 2374 }
2373 2375 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2374 2376 } else {
2375 2377 fattrp->attrlist4 = NULL;
2376 2378 }
2377 2379 done:
2378 2380
2379 2381 nfs4_ntov_table_free(&ntov, sargp);
2380 2382
2381 2383 if (error != 0)
2382 2384 status = puterrno4(error);
2383 2385
2384 2386 return (status);
2385 2387 }
2386 2388
2387 2389 /* ARGSUSED */
2388 2390 static void
2389 2391 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2390 2392 struct compound_state *cs)
2391 2393 {
2392 2394 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2393 2395 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2394 2396 struct nfs4_svgetit_arg sarg;
2395 2397 struct statvfs64 sb;
2396 2398 nfsstat4 status;
2397 2399
2398 2400 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2399 2401 GETATTR4args *, args);
2400 2402
2401 2403 if (cs->vp == NULL) {
2402 2404 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2403 2405 goto out;
2404 2406 }
2405 2407
2406 2408 if (cs->access == CS_ACCESS_DENIED) {
2407 2409 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2408 2410 goto out;
2409 2411 }
2410 2412
2411 2413 sarg.sbp = &sb;
2412 2414 sarg.cs = cs;
2413 2415 sarg.is_referral = B_FALSE;
2414 2416
2415 2417 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2416 2418 if (status == NFS4_OK) {
2417 2419
2418 2420 status = bitmap4_get_sysattrs(&sarg);
2419 2421 if (status == NFS4_OK) {
2420 2422
2421 2423 /* Is this a referral? */
2422 2424 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2423 2425 /* Older V4 Solaris client sees a link */
2424 2426 if (client_is_downrev(req))
2425 2427 sarg.vap->va_type = VLNK;
2426 2428 else
2427 2429 sarg.is_referral = B_TRUE;
2428 2430 }
2429 2431
2430 2432 status = do_rfs4_op_getattr(args->attr_request,
2431 2433 &resp->obj_attributes, &sarg);
2432 2434 }
2433 2435 }
2434 2436 *cs->statusp = resp->status = status;
2435 2437 out:
2436 2438 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2437 2439 GETATTR4res *, resp);
2438 2440 }
2439 2441
2440 2442 static void
2441 2443 rfs4_op_getattr_free(nfs_resop4 *resop)
2442 2444 {
2443 2445 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2444 2446
2445 2447 nfs4_fattr4_free(&resp->obj_attributes);
2446 2448 }
2447 2449
2448 2450 /* ARGSUSED */
2449 2451 static void
2450 2452 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2451 2453 struct compound_state *cs)
2452 2454 {
2453 2455 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2454 2456
2455 2457 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2456 2458
2457 2459 if (cs->vp == NULL) {
2458 2460 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2459 2461 goto out;
2460 2462 }
2461 2463 if (cs->access == CS_ACCESS_DENIED) {
2462 2464 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2463 2465 goto out;
2464 2466 }
2465 2467
2466 2468 /* check for reparse point at the share point */
2467 2469 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2468 2470 /* it's all bad */
2469 2471 cs->exi->exi_moved = 1;
2470 2472 *cs->statusp = resp->status = NFS4ERR_MOVED;
2471 2473 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2472 2474 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2473 2475 return;
2474 2476 }
2475 2477
2476 2478 /* check for reparse point at vp */
2477 2479 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2478 2480 /* it's not all bad */
2479 2481 *cs->statusp = resp->status = NFS4ERR_MOVED;
2480 2482 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2481 2483 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2482 2484 return;
2483 2485 }
2484 2486
2485 2487 resp->object.nfs_fh4_val =
2486 2488 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2487 2489 nfs_fh4_copy(&cs->fh, &resp->object);
2488 2490 *cs->statusp = resp->status = NFS4_OK;
2489 2491 out:
2490 2492 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2491 2493 GETFH4res *, resp);
2492 2494 }
2493 2495
2494 2496 static void
2495 2497 rfs4_op_getfh_free(nfs_resop4 *resop)
2496 2498 {
2497 2499 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2498 2500
2499 2501 if (resp->status == NFS4_OK &&
2500 2502 resp->object.nfs_fh4_val != NULL) {
2501 2503 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2502 2504 resp->object.nfs_fh4_val = NULL;
2503 2505 resp->object.nfs_fh4_len = 0;
2504 2506 }
2505 2507 }
2506 2508
2507 2509 /*
2508 2510 * illegal: args: void
2509 2511 * res : status (NFS4ERR_OP_ILLEGAL)
2510 2512 */
2511 2513 /* ARGSUSED */
2512 2514 static void
2513 2515 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2514 2516 struct svc_req *req, struct compound_state *cs)
2515 2517 {
2516 2518 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2517 2519
2518 2520 resop->resop = OP_ILLEGAL;
2519 2521 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2520 2522 }
2521 2523
2522 2524 /*
2523 2525 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2524 2526 * res: status. If success - CURRENT_FH unchanged, return change_info
2525 2527 */
2526 2528 /* ARGSUSED */
2527 2529 static void
2528 2530 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2529 2531 struct compound_state *cs)
2530 2532 {
2531 2533 LINK4args *args = &argop->nfs_argop4_u.oplink;
2532 2534 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2533 2535 int error;
2534 2536 vnode_t *vp;
2535 2537 vnode_t *dvp;
2536 2538 struct vattr bdva, idva, adva;
2537 2539 char *nm;
2538 2540 uint_t len;
2539 2541 struct sockaddr *ca;
2540 2542 char *name = NULL;
2541 2543 nfsstat4 status;
2542 2544
2543 2545 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2544 2546 LINK4args *, args);
2545 2547
2546 2548 /* SAVED_FH: source object */
2547 2549 vp = cs->saved_vp;
2548 2550 if (vp == NULL) {
2549 2551 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2550 2552 goto out;
2551 2553 }
2552 2554
2553 2555 /* CURRENT_FH: target directory */
2554 2556 dvp = cs->vp;
2555 2557 if (dvp == NULL) {
2556 2558 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2557 2559 goto out;
2558 2560 }
2559 2561
2560 2562 /*
2561 2563 * If there is a non-shared filesystem mounted on this vnode,
2562 2564 * do not allow to link any file in this directory.
2563 2565 */
2564 2566 if (vn_ismntpt(dvp)) {
2565 2567 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2566 2568 goto out;
2567 2569 }
2568 2570
2569 2571 if (cs->access == CS_ACCESS_DENIED) {
2570 2572 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2571 2573 goto out;
2572 2574 }
2573 2575
2574 2576 /* Check source object's type validity */
2575 2577 if (vp->v_type == VDIR) {
2576 2578 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2577 2579 goto out;
2578 2580 }
2579 2581
2580 2582 /* Check target directory's type */
2581 2583 if (dvp->v_type != VDIR) {
2582 2584 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2583 2585 goto out;
2584 2586 }
2585 2587
2586 2588 if (cs->saved_exi != cs->exi) {
2587 2589 *cs->statusp = resp->status = NFS4ERR_XDEV;
2588 2590 goto out;
2589 2591 }
2590 2592
2591 2593 status = utf8_dir_verify(&args->newname);
2592 2594 if (status != NFS4_OK) {
2593 2595 *cs->statusp = resp->status = status;
2594 2596 goto out;
2595 2597 }
2596 2598
2597 2599 nm = utf8_to_fn(&args->newname, &len, NULL);
2598 2600 if (nm == NULL) {
2599 2601 *cs->statusp = resp->status = NFS4ERR_INVAL;
2600 2602 goto out;
2601 2603 }
2602 2604
2603 2605 if (len > MAXNAMELEN) {
2604 2606 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2605 2607 kmem_free(nm, len);
2606 2608 goto out;
2607 2609 }
2608 2610
2609 2611 if (rdonly4(req, cs)) {
2610 2612 *cs->statusp = resp->status = NFS4ERR_ROFS;
2611 2613 kmem_free(nm, len);
2612 2614 goto out;
2613 2615 }
2614 2616
2615 2617 /* Get "before" change value */
2616 2618 bdva.va_mask = AT_CTIME|AT_SEQ;
2617 2619 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2618 2620 if (error) {
2619 2621 *cs->statusp = resp->status = puterrno4(error);
2620 2622 kmem_free(nm, len);
2621 2623 goto out;
2622 2624 }
2623 2625
2624 2626 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2625 2627 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2626 2628 MAXPATHLEN + 1);
2627 2629
2628 2630 if (name == NULL) {
2629 2631 *cs->statusp = resp->status = NFS4ERR_INVAL;
2630 2632 kmem_free(nm, len);
2631 2633 goto out;
2632 2634 }
2633 2635
2634 2636 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2635 2637
2636 2638 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2637 2639
2638 2640 if (nm != name)
2639 2641 kmem_free(name, MAXPATHLEN + 1);
2640 2642 kmem_free(nm, len);
2641 2643
2642 2644 /*
2643 2645 * Get the initial "after" sequence number, if it fails, set to zero
2644 2646 */
2645 2647 idva.va_mask = AT_SEQ;
2646 2648 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2647 2649 idva.va_seq = 0;
2648 2650
2649 2651 /*
2650 2652 * Force modified data and metadata out to stable storage.
2651 2653 */
2652 2654 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2653 2655 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2654 2656
2655 2657 if (error) {
2656 2658 *cs->statusp = resp->status = puterrno4(error);
2657 2659 goto out;
2658 2660 }
2659 2661
2660 2662 /*
2661 2663 * Get "after" change value, if it fails, simply return the
2662 2664 * before value.
2663 2665 */
2664 2666 adva.va_mask = AT_CTIME|AT_SEQ;
2665 2667 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2666 2668 adva.va_ctime = bdva.va_ctime;
2667 2669 adva.va_seq = 0;
2668 2670 }
2669 2671
2670 2672 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2671 2673
2672 2674 /*
2673 2675 * The cinfo.atomic = TRUE only if we have
2674 2676 * non-zero va_seq's, and it has incremented by exactly one
2675 2677 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2676 2678 */
2677 2679 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2678 2680 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2679 2681 resp->cinfo.atomic = TRUE;
2680 2682 else
2681 2683 resp->cinfo.atomic = FALSE;
2682 2684
2683 2685 *cs->statusp = resp->status = NFS4_OK;
2684 2686 out:
2685 2687 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2686 2688 LINK4res *, resp);
2687 2689 }
2688 2690
2689 2691 /*
2690 2692 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2691 2693 */
2692 2694
2693 2695 /* ARGSUSED */
2694 2696 static nfsstat4
2695 2697 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2696 2698 {
2697 2699 int error;
2698 2700 int different_export = 0;
2699 2701 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2700 2702 struct exportinfo *exi = NULL, *pre_exi = NULL;
2701 2703 nfsstat4 stat;
2702 2704 fid_t fid;
2703 2705 int attrdir, dotdot, walk;
2704 2706 bool_t is_newvp = FALSE;
2705 2707
2706 2708 if (cs->vp->v_flag & V_XATTRDIR) {
2707 2709 attrdir = 1;
2708 2710 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2709 2711 } else {
2710 2712 attrdir = 0;
2711 2713 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
|
↓ open down ↓ |
1707 lines elided |
↑ open up ↑ |
2712 2714 }
2713 2715
2714 2716 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2715 2717
2716 2718 /*
2717 2719 * If dotdotting, then need to check whether it's
2718 2720 * above the root of a filesystem, or above an
2719 2721 * export point.
2720 2722 */
2721 2723 if (dotdot) {
2724 + vnode_t *zone_rootvp;
2725 +
2722 2726 ASSERT(cs->exi != NULL);
2723 - ASSERT3U(cs->exi->exi_zoneid, ==, curzone->zone_id);
2727 + zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2724 2728 /*
2725 2729 * If dotdotting at the root of a filesystem, then
2726 2730 * need to traverse back to the mounted-on filesystem
2727 2731 * and do the dotdot lookup there.
2728 2732 */
2729 - if ((cs->vp->v_flag & VROOT) || VN_IS_CURZONEROOT(cs->vp)) {
2733 + if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2730 2734
2731 2735 /*
2732 2736 * If at the system root, then can
2733 2737 * go up no further.
2734 2738 */
2735 - if (VN_CMP(cs->vp, ZONE_ROOTVP()))
2739 + if (VN_CMP(cs->vp, zone_rootvp))
2736 2740 return (puterrno4(ENOENT));
2737 2741
2738 2742 /*
2739 2743 * Traverse back to the mounted-on filesystem
2740 2744 */
2741 - cs->vp = untraverse(cs->vp);
2745 + cs->vp = untraverse(cs->vp, zone_rootvp);
2742 2746
2743 2747 /*
2744 2748 * Set the different_export flag so we remember
2745 2749 * to pick up a new exportinfo entry for
2746 2750 * this new filesystem.
2747 2751 */
2748 2752 different_export = 1;
2749 2753 } else {
2750 2754
2751 2755 /*
2752 2756 * If dotdotting above an export point then set
2753 2757 * the different_export to get new export info.
2754 2758 */
2755 2759 different_export = nfs_exported(cs->exi, cs->vp);
2756 2760 }
2757 2761 }
2758 2762
2759 2763 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2760 2764 NULL, NULL, NULL);
2761 2765 if (error)
2762 2766 return (puterrno4(error));
2763 2767
2764 2768 /*
2765 2769 * If the vnode is in a pseudo filesystem, check whether it is visible.
2766 2770 *
2767 2771 * XXX if the vnode is a symlink and it is not visible in
2768 2772 * a pseudo filesystem, return ENOENT (not following symlink).
2769 2773 * V4 client can not mount such symlink. This is a regression
2770 2774 * from V2/V3.
2771 2775 *
2772 2776 * In the same exported filesystem, if the security flavor used
2773 2777 * is not an explicitly shared flavor, limit the view to the visible
2774 2778 * list entries only. This is not a WRONGSEC case because it's already
2775 2779 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2776 2780 */
2777 2781 if (!different_export &&
2778 2782 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2779 2783 cs->access & CS_ACCESS_LIMITED)) {
2780 2784 if (! nfs_visible(cs->exi, vp, &different_export)) {
2781 2785 VN_RELE(vp);
2782 2786 return (puterrno4(ENOENT));
2783 2787 }
2784 2788 }
2785 2789
2786 2790 /*
2787 2791 * If it's a mountpoint, then traverse it.
2788 2792 */
2789 2793 if (vn_ismntpt(vp)) {
2790 2794 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2791 2795 pre_tvp = vp; /* save pre-traversed vnode */
2792 2796
2793 2797 /*
2794 2798 * hold pre_tvp to counteract rele by traverse. We will
2795 2799 * need pre_tvp below if checkexport4 fails
2796 2800 */
2797 2801 VN_HOLD(pre_tvp);
2798 2802 if ((error = traverse(&vp)) != 0) {
2799 2803 VN_RELE(vp);
2800 2804 VN_RELE(pre_tvp);
2801 2805 return (puterrno4(error));
2802 2806 }
2803 2807 different_export = 1;
2804 2808 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2805 2809 /*
2806 2810 * The vfsp comparison is to handle the case where
2807 2811 * a LOFS mount is shared. lo_lookup traverses mount points,
2808 2812 * and NFS is unaware of local fs transistions because
2809 2813 * v_vfsmountedhere isn't set. For this special LOFS case,
2810 2814 * the dir and the obj returned by lookup will have different
2811 2815 * vfs ptrs.
2812 2816 */
2813 2817 different_export = 1;
2814 2818 }
2815 2819
2816 2820 if (different_export) {
2817 2821
2818 2822 bzero(&fid, sizeof (fid));
2819 2823 fid.fid_len = MAXFIDSZ;
2820 2824 error = vop_fid_pseudo(vp, &fid);
2821 2825 if (error) {
2822 2826 VN_RELE(vp);
2823 2827 if (pre_tvp)
2824 2828 VN_RELE(pre_tvp);
2825 2829 return (puterrno4(error));
2826 2830 }
2827 2831
2828 2832 if (dotdot)
2829 2833 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2830 2834 else
2831 2835 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2832 2836
2833 2837 if (exi == NULL) {
2834 2838 if (pre_tvp) {
2835 2839 /*
2836 2840 * If this vnode is a mounted-on vnode,
2837 2841 * but the mounted-on file system is not
2838 2842 * exported, send back the filehandle for
2839 2843 * the mounted-on vnode, not the root of
2840 2844 * the mounted-on file system.
2841 2845 */
2842 2846 VN_RELE(vp);
2843 2847 vp = pre_tvp;
2844 2848 exi = pre_exi;
2845 2849 } else {
2846 2850 VN_RELE(vp);
2847 2851 return (puterrno4(EACCES));
2848 2852 }
2849 2853 } else if (pre_tvp) {
2850 2854 /* we're done with pre_tvp now. release extra hold */
2851 2855 VN_RELE(pre_tvp);
2852 2856 }
2853 2857
2854 2858 cs->exi = exi;
2855 2859
2856 2860 /*
2857 2861 * Now we do a checkauth4. The reason is that
2858 2862 * this client/user may not have access to the new
2859 2863 * exported file system, and if they do,
2860 2864 * the client/user may be mapped to a different uid.
2861 2865 *
2862 2866 * We start with a new cr, because the checkauth4 done
2863 2867 * in the PUT*FH operation over wrote the cred's uid,
2864 2868 * gid, etc, and we want the real thing before calling
2865 2869 * checkauth4()
2866 2870 */
2867 2871 crfree(cs->cr);
2868 2872 cs->cr = crdup(cs->basecr);
2869 2873
2870 2874 oldvp = cs->vp;
2871 2875 cs->vp = vp;
2872 2876 is_newvp = TRUE;
2873 2877
2874 2878 stat = call_checkauth4(cs, req);
2875 2879 if (stat != NFS4_OK) {
2876 2880 VN_RELE(cs->vp);
2877 2881 cs->vp = oldvp;
2878 2882 return (stat);
2879 2883 }
2880 2884 }
2881 2885
2882 2886 /*
2883 2887 * After various NFS checks, do a label check on the path
2884 2888 * component. The label on this path should either be the
2885 2889 * global zone's label or a zone's label. We are only
2886 2890 * interested in the zone's label because exported files
2887 2891 * in global zone is accessible (though read-only) to
2888 2892 * clients. The exportability/visibility check is already
2889 2893 * done before reaching this code.
2890 2894 */
2891 2895 if (is_system_labeled()) {
2892 2896 bslabel_t *clabel;
2893 2897
2894 2898 ASSERT(req->rq_label != NULL);
2895 2899 clabel = req->rq_label;
2896 2900 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2897 2901 "got client label from request(1)", struct svc_req *, req);
2898 2902
2899 2903 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2900 2904 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2901 2905 cs->exi)) {
2902 2906 error = EACCES;
2903 2907 goto err_out;
2904 2908 }
2905 2909 } else {
2906 2910 /*
2907 2911 * We grant access to admin_low label clients
2908 2912 * only if the client is trusted, i.e. also
2909 2913 * running Solaris Trusted Extension.
2910 2914 */
2911 2915 struct sockaddr *ca;
2912 2916 int addr_type;
2913 2917 void *ipaddr;
2914 2918 tsol_tpc_t *tp;
2915 2919
2916 2920 ca = (struct sockaddr *)svc_getrpccaller(
2917 2921 req->rq_xprt)->buf;
2918 2922 if (ca->sa_family == AF_INET) {
2919 2923 addr_type = IPV4_VERSION;
2920 2924 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2921 2925 } else if (ca->sa_family == AF_INET6) {
2922 2926 addr_type = IPV6_VERSION;
2923 2927 ipaddr = &((struct sockaddr_in6 *)
2924 2928 ca)->sin6_addr;
2925 2929 }
2926 2930 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2927 2931 if (tp == NULL || tp->tpc_tp.tp_doi !=
2928 2932 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2929 2933 SUN_CIPSO) {
2930 2934 if (tp != NULL)
2931 2935 TPC_RELE(tp);
2932 2936 error = EACCES;
2933 2937 goto err_out;
2934 2938 }
2935 2939 TPC_RELE(tp);
2936 2940 }
2937 2941 }
2938 2942
2939 2943 error = makefh4(&cs->fh, vp, cs->exi);
2940 2944
2941 2945 err_out:
2942 2946 if (error) {
2943 2947 if (is_newvp) {
2944 2948 VN_RELE(cs->vp);
2945 2949 cs->vp = oldvp;
2946 2950 } else
2947 2951 VN_RELE(vp);
2948 2952 return (puterrno4(error));
2949 2953 }
2950 2954
2951 2955 if (!is_newvp) {
2952 2956 if (cs->vp)
2953 2957 VN_RELE(cs->vp);
2954 2958 cs->vp = vp;
2955 2959 } else if (oldvp)
2956 2960 VN_RELE(oldvp);
2957 2961
2958 2962 /*
2959 2963 * if did lookup on attrdir and didn't lookup .., set named
2960 2964 * attr fh flag
2961 2965 */
2962 2966 if (attrdir && ! dotdot)
2963 2967 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2964 2968
2965 2969 /* Assume false for now, open proc will set this */
2966 2970 cs->mandlock = FALSE;
2967 2971
2968 2972 return (NFS4_OK);
2969 2973 }
2970 2974
2971 2975 /* ARGSUSED */
2972 2976 static void
2973 2977 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2974 2978 struct compound_state *cs)
2975 2979 {
2976 2980 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2977 2981 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2978 2982 char *nm;
2979 2983 uint_t len;
2980 2984 struct sockaddr *ca;
2981 2985 char *name = NULL;
2982 2986 nfsstat4 status;
2983 2987
2984 2988 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2985 2989 LOOKUP4args *, args);
2986 2990
2987 2991 if (cs->vp == NULL) {
2988 2992 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2989 2993 goto out;
2990 2994 }
2991 2995
2992 2996 if (cs->vp->v_type == VLNK) {
2993 2997 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2994 2998 goto out;
2995 2999 }
2996 3000
2997 3001 if (cs->vp->v_type != VDIR) {
2998 3002 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2999 3003 goto out;
3000 3004 }
3001 3005
3002 3006 status = utf8_dir_verify(&args->objname);
3003 3007 if (status != NFS4_OK) {
3004 3008 *cs->statusp = resp->status = status;
3005 3009 goto out;
3006 3010 }
3007 3011
3008 3012 nm = utf8_to_str(&args->objname, &len, NULL);
3009 3013 if (nm == NULL) {
3010 3014 *cs->statusp = resp->status = NFS4ERR_INVAL;
3011 3015 goto out;
3012 3016 }
3013 3017
3014 3018 if (len > MAXNAMELEN) {
3015 3019 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3016 3020 kmem_free(nm, len);
3017 3021 goto out;
3018 3022 }
3019 3023
3020 3024 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3021 3025 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3022 3026 MAXPATHLEN + 1);
3023 3027
3024 3028 if (name == NULL) {
3025 3029 *cs->statusp = resp->status = NFS4ERR_INVAL;
3026 3030 kmem_free(nm, len);
3027 3031 goto out;
3028 3032 }
3029 3033
3030 3034 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3031 3035
3032 3036 if (name != nm)
3033 3037 kmem_free(name, MAXPATHLEN + 1);
3034 3038 kmem_free(nm, len);
3035 3039
3036 3040 out:
3037 3041 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3038 3042 LOOKUP4res *, resp);
3039 3043 }
3040 3044
3041 3045 /* ARGSUSED */
3042 3046 static void
3043 3047 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3044 3048 struct compound_state *cs)
3045 3049 {
3046 3050 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3047 3051
3048 3052 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3049 3053
3050 3054 if (cs->vp == NULL) {
3051 3055 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3052 3056 goto out;
3053 3057 }
3054 3058
3055 3059 if (cs->vp->v_type != VDIR) {
3056 3060 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3057 3061 goto out;
3058 3062 }
3059 3063
3060 3064 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3061 3065
3062 3066 /*
3063 3067 * From NFSV4 Specification, LOOKUPP should not check for
3064 3068 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3065 3069 */
3066 3070 if (resp->status == NFS4ERR_WRONGSEC) {
3067 3071 *cs->statusp = resp->status = NFS4_OK;
3068 3072 }
3069 3073
3070 3074 out:
3071 3075 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3072 3076 LOOKUPP4res *, resp);
3073 3077 }
3074 3078
3075 3079
3076 3080 /*ARGSUSED2*/
3077 3081 static void
3078 3082 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3079 3083 struct compound_state *cs)
3080 3084 {
3081 3085 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3082 3086 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3083 3087 vnode_t *avp = NULL;
3084 3088 int lookup_flags = LOOKUP_XATTR, error;
3085 3089 int exp_ro = 0;
3086 3090
3087 3091 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3088 3092 OPENATTR4args *, args);
3089 3093
3090 3094 if (cs->vp == NULL) {
3091 3095 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3092 3096 goto out;
3093 3097 }
3094 3098
3095 3099 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3096 3100 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3097 3101 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3098 3102 goto out;
3099 3103 }
3100 3104
3101 3105 /*
3102 3106 * If file system supports passing ACE mask to VOP_ACCESS then
3103 3107 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3104 3108 */
3105 3109
3106 3110 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3107 3111 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3108 3112 V_ACE_MASK, cs->cr, NULL);
3109 3113 else
3110 3114 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3111 3115 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3112 3116 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3113 3117
3114 3118 if (error) {
3115 3119 *cs->statusp = resp->status = puterrno4(EACCES);
3116 3120 goto out;
3117 3121 }
3118 3122
3119 3123 /*
3120 3124 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3121 3125 * the file system is exported read-only -- regardless of
3122 3126 * createdir flag. Otherwise the attrdir would be created
3123 3127 * (assuming server fs isn't mounted readonly locally). If
3124 3128 * VOP_LOOKUP returns ENOENT in this case, the error will
3125 3129 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3126 3130 * because specfs has no VOP_LOOKUP op, so the macro would
3127 3131 * return ENOSYS. EINVAL is returned by all (current)
3128 3132 * Solaris file system implementations when any of their
3129 3133 * restrictions are violated (xattr(dir) can't have xattrdir).
3130 3134 * Returning NOTSUPP is more appropriate in this case
3131 3135 * because the object will never be able to have an attrdir.
3132 3136 */
3133 3137 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3134 3138 lookup_flags |= CREATE_XATTR_DIR;
3135 3139
3136 3140 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3137 3141 NULL, NULL, NULL);
3138 3142
3139 3143 if (error) {
3140 3144 if (error == ENOENT && args->createdir && exp_ro)
3141 3145 *cs->statusp = resp->status = puterrno4(EROFS);
3142 3146 else if (error == EINVAL || error == ENOSYS)
3143 3147 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3144 3148 else
3145 3149 *cs->statusp = resp->status = puterrno4(error);
3146 3150 goto out;
3147 3151 }
3148 3152
3149 3153 ASSERT(avp->v_flag & V_XATTRDIR);
3150 3154
3151 3155 error = makefh4(&cs->fh, avp, cs->exi);
3152 3156
3153 3157 if (error) {
3154 3158 VN_RELE(avp);
3155 3159 *cs->statusp = resp->status = puterrno4(error);
3156 3160 goto out;
3157 3161 }
3158 3162
3159 3163 VN_RELE(cs->vp);
3160 3164 cs->vp = avp;
3161 3165
3162 3166 /*
3163 3167 * There is no requirement for an attrdir fh flag
3164 3168 * because the attrdir has a vnode flag to distinguish
3165 3169 * it from regular (non-xattr) directories. The
3166 3170 * FH4_ATTRDIR flag is set for future sanity checks.
3167 3171 */
3168 3172 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3169 3173 *cs->statusp = resp->status = NFS4_OK;
3170 3174
3171 3175 out:
3172 3176 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3173 3177 OPENATTR4res *, resp);
3174 3178 }
3175 3179
3176 3180 static int
3177 3181 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3178 3182 caller_context_t *ct)
3179 3183 {
3180 3184 int error;
3181 3185 int i;
3182 3186 clock_t delaytime;
3183 3187
3184 3188 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3185 3189
3186 3190 /*
3187 3191 * Don't block on mandatory locks. If this routine returns
3188 3192 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3189 3193 */
3190 3194 uio->uio_fmode = FNONBLOCK;
3191 3195
3192 3196 for (i = 0; i < rfs4_maxlock_tries; i++) {
3193 3197
3194 3198
3195 3199 if (direction == FREAD) {
3196 3200 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3197 3201 error = VOP_READ(vp, uio, ioflag, cred, ct);
3198 3202 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3199 3203 } else {
3200 3204 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3201 3205 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3202 3206 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3203 3207 }
3204 3208
3205 3209 if (error != EAGAIN)
3206 3210 break;
3207 3211
3208 3212 if (i < rfs4_maxlock_tries - 1) {
3209 3213 delay(delaytime);
3210 3214 delaytime *= 2;
3211 3215 }
3212 3216 }
3213 3217
3214 3218 return (error);
3215 3219 }
3216 3220
3217 3221 /* ARGSUSED */
3218 3222 static void
3219 3223 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3220 3224 struct compound_state *cs)
3221 3225 {
3222 3226 READ4args *args = &argop->nfs_argop4_u.opread;
3223 3227 READ4res *resp = &resop->nfs_resop4_u.opread;
3224 3228 int error;
3225 3229 int verror;
3226 3230 vnode_t *vp;
3227 3231 struct vattr va;
3228 3232 struct iovec iov, *iovp = NULL;
3229 3233 int iovcnt;
3230 3234 struct uio uio;
3231 3235 u_offset_t offset;
3232 3236 bool_t *deleg = &cs->deleg;
3233 3237 nfsstat4 stat;
3234 3238 int in_crit = 0;
3235 3239 mblk_t *mp = NULL;
3236 3240 int alloc_err = 0;
3237 3241 int rdma_used = 0;
3238 3242 int loaned_buffers;
3239 3243 caller_context_t ct;
3240 3244 struct uio *uiop;
3241 3245
3242 3246 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3243 3247 READ4args, args);
3244 3248
3245 3249 vp = cs->vp;
3246 3250 if (vp == NULL) {
3247 3251 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3248 3252 goto out;
3249 3253 }
3250 3254 if (cs->access == CS_ACCESS_DENIED) {
3251 3255 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3252 3256 goto out;
3253 3257 }
3254 3258
3255 3259 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3256 3260 deleg, TRUE, &ct)) != NFS4_OK) {
3257 3261 *cs->statusp = resp->status = stat;
3258 3262 goto out;
3259 3263 }
3260 3264
3261 3265 /*
3262 3266 * Enter the critical region before calling VOP_RWLOCK
3263 3267 * to avoid a deadlock with write requests.
3264 3268 */
3265 3269 if (nbl_need_check(vp)) {
3266 3270 nbl_start_crit(vp, RW_READER);
3267 3271 in_crit = 1;
3268 3272 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3269 3273 &ct)) {
3270 3274 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3271 3275 goto out;
3272 3276 }
3273 3277 }
3274 3278
3275 3279 if (args->wlist) {
3276 3280 if (args->count > clist_len(args->wlist)) {
3277 3281 *cs->statusp = resp->status = NFS4ERR_INVAL;
3278 3282 goto out;
3279 3283 }
3280 3284 rdma_used = 1;
3281 3285 }
3282 3286
3283 3287 /* use loaned buffers for TCP */
3284 3288 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3285 3289
3286 3290 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3287 3291 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3288 3292
3289 3293 /*
3290 3294 * If we can't get the attributes, then we can't do the
3291 3295 * right access checking. So, we'll fail the request.
3292 3296 */
3293 3297 if (verror) {
3294 3298 *cs->statusp = resp->status = puterrno4(verror);
3295 3299 goto out;
3296 3300 }
3297 3301
3298 3302 if (vp->v_type != VREG) {
3299 3303 *cs->statusp = resp->status =
3300 3304 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3301 3305 goto out;
3302 3306 }
3303 3307
3304 3308 if (crgetuid(cs->cr) != va.va_uid &&
3305 3309 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3306 3310 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3307 3311 *cs->statusp = resp->status = puterrno4(error);
3308 3312 goto out;
3309 3313 }
3310 3314
3311 3315 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3312 3316 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3313 3317 goto out;
3314 3318 }
3315 3319
3316 3320 offset = args->offset;
3317 3321 if (offset >= va.va_size) {
3318 3322 *cs->statusp = resp->status = NFS4_OK;
3319 3323 resp->eof = TRUE;
3320 3324 resp->data_len = 0;
3321 3325 resp->data_val = NULL;
3322 3326 resp->mblk = NULL;
3323 3327 /* RDMA */
3324 3328 resp->wlist = args->wlist;
3325 3329 resp->wlist_len = resp->data_len;
3326 3330 *cs->statusp = resp->status = NFS4_OK;
3327 3331 if (resp->wlist)
3328 3332 clist_zero_len(resp->wlist);
3329 3333 goto out;
3330 3334 }
3331 3335
3332 3336 if (args->count == 0) {
3333 3337 *cs->statusp = resp->status = NFS4_OK;
3334 3338 resp->eof = FALSE;
3335 3339 resp->data_len = 0;
3336 3340 resp->data_val = NULL;
3337 3341 resp->mblk = NULL;
3338 3342 /* RDMA */
3339 3343 resp->wlist = args->wlist;
3340 3344 resp->wlist_len = resp->data_len;
3341 3345 if (resp->wlist)
3342 3346 clist_zero_len(resp->wlist);
3343 3347 goto out;
3344 3348 }
3345 3349
3346 3350 /*
3347 3351 * Do not allocate memory more than maximum allowed
3348 3352 * transfer size
3349 3353 */
3350 3354 if (args->count > rfs4_tsize(req))
3351 3355 args->count = rfs4_tsize(req);
3352 3356
3353 3357 if (loaned_buffers) {
3354 3358 uiop = (uio_t *)rfs_setup_xuio(vp);
3355 3359 ASSERT(uiop != NULL);
3356 3360 uiop->uio_segflg = UIO_SYSSPACE;
3357 3361 uiop->uio_loffset = args->offset;
3358 3362 uiop->uio_resid = args->count;
3359 3363
3360 3364 /* Jump to do the read if successful */
3361 3365 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3362 3366 /*
3363 3367 * Need to hold the vnode until after VOP_RETZCBUF()
3364 3368 * is called.
3365 3369 */
3366 3370 VN_HOLD(vp);
3367 3371 goto doio_read;
3368 3372 }
3369 3373
3370 3374 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3371 3375 uiop->uio_loffset, int, uiop->uio_resid);
3372 3376
3373 3377 uiop->uio_extflg = 0;
3374 3378
3375 3379 /* failure to setup for zero copy */
3376 3380 rfs_free_xuio((void *)uiop);
3377 3381 loaned_buffers = 0;
3378 3382 }
3379 3383
3380 3384 /*
3381 3385 * If returning data via RDMA Write, then grab the chunk list. If we
3382 3386 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3383 3387 */
3384 3388 if (rdma_used) {
3385 3389 mp = NULL;
3386 3390 (void) rdma_get_wchunk(req, &iov, args->wlist);
3387 3391 uio.uio_iov = &iov;
3388 3392 uio.uio_iovcnt = 1;
3389 3393 } else {
3390 3394 /*
3391 3395 * mp will contain the data to be sent out in the read reply.
3392 3396 * It will be freed after the reply has been sent.
3393 3397 */
3394 3398 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3395 3399 ASSERT(mp != NULL);
3396 3400 ASSERT(alloc_err == 0);
3397 3401 uio.uio_iov = iovp;
3398 3402 uio.uio_iovcnt = iovcnt;
3399 3403 }
3400 3404
3401 3405 uio.uio_segflg = UIO_SYSSPACE;
3402 3406 uio.uio_extflg = UIO_COPY_CACHED;
3403 3407 uio.uio_loffset = args->offset;
3404 3408 uio.uio_resid = args->count;
3405 3409 uiop = &uio;
3406 3410
3407 3411 doio_read:
3408 3412 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3409 3413
3410 3414 va.va_mask = AT_SIZE;
3411 3415 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3412 3416
3413 3417 if (error) {
3414 3418 if (mp)
3415 3419 freemsg(mp);
3416 3420 *cs->statusp = resp->status = puterrno4(error);
3417 3421 goto out;
3418 3422 }
3419 3423
3420 3424 /* make mblk using zc buffers */
3421 3425 if (loaned_buffers) {
3422 3426 mp = uio_to_mblk(uiop);
3423 3427 ASSERT(mp != NULL);
3424 3428 }
3425 3429
3426 3430 *cs->statusp = resp->status = NFS4_OK;
3427 3431
3428 3432 ASSERT(uiop->uio_resid >= 0);
3429 3433 resp->data_len = args->count - uiop->uio_resid;
3430 3434 if (mp) {
3431 3435 resp->data_val = (char *)mp->b_datap->db_base;
3432 3436 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3433 3437 } else {
3434 3438 resp->data_val = (caddr_t)iov.iov_base;
3435 3439 }
3436 3440
3437 3441 resp->mblk = mp;
3438 3442
3439 3443 if (!verror && offset + resp->data_len == va.va_size)
3440 3444 resp->eof = TRUE;
3441 3445 else
3442 3446 resp->eof = FALSE;
3443 3447
3444 3448 if (rdma_used) {
3445 3449 if (!rdma_setup_read_data4(args, resp)) {
3446 3450 *cs->statusp = resp->status = NFS4ERR_INVAL;
3447 3451 }
3448 3452 } else {
3449 3453 resp->wlist = NULL;
3450 3454 }
3451 3455
3452 3456 out:
3453 3457 if (in_crit)
3454 3458 nbl_end_crit(vp);
3455 3459
3456 3460 if (iovp != NULL)
3457 3461 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3458 3462
3459 3463 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3460 3464 READ4res *, resp);
3461 3465 }
3462 3466
3463 3467 static void
3464 3468 rfs4_op_read_free(nfs_resop4 *resop)
3465 3469 {
3466 3470 READ4res *resp = &resop->nfs_resop4_u.opread;
3467 3471
3468 3472 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3469 3473 freemsg(resp->mblk);
3470 3474 resp->mblk = NULL;
3471 3475 resp->data_val = NULL;
3472 3476 resp->data_len = 0;
3473 3477 }
3474 3478 }
3475 3479
3476 3480 static void
3477 3481 rfs4_op_readdir_free(nfs_resop4 * resop)
3478 3482 {
3479 3483 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3480 3484
3481 3485 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3482 3486 freeb(resp->mblk);
3483 3487 resp->mblk = NULL;
3484 3488 resp->data_len = 0;
3485 3489 }
3486 3490 }
3487 3491
3488 3492
3489 3493 /* ARGSUSED */
3490 3494 static void
3491 3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3492 3496 struct compound_state *cs)
3493 3497 {
3494 3498 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3495 3499 int error;
3496 3500 vnode_t *vp;
3497 3501 struct exportinfo *exi, *sav_exi;
3498 3502 nfs_fh4_fmt_t *fh_fmtp;
3499 3503 nfs_export_t *ne = nfs_get_export();
3500 3504
3501 3505 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3502 3506
3503 3507 if (cs->vp) {
3504 3508 VN_RELE(cs->vp);
3505 3509 cs->vp = NULL;
3506 3510 }
3507 3511
3508 3512 if (cs->cr)
3509 3513 crfree(cs->cr);
3510 3514
3511 3515 cs->cr = crdup(cs->basecr);
3512 3516
3513 3517 vp = ne->exi_public->exi_vp;
3514 3518 if (vp == NULL) {
3515 3519 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3516 3520 goto out;
3517 3521 }
3518 3522
3519 3523 error = makefh4(&cs->fh, vp, ne->exi_public);
3520 3524 if (error != 0) {
3521 3525 *cs->statusp = resp->status = puterrno4(error);
3522 3526 goto out;
3523 3527 }
3524 3528 sav_exi = cs->exi;
3525 3529 if (ne->exi_public == ne->exi_root) {
3526 3530 /*
3527 3531 * No filesystem is actually shared public, so we default
3528 3532 * to exi_root. In this case, we must check whether root
3529 3533 * is exported.
3530 3534 */
3531 3535 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3532 3536
3533 3537 /*
3534 3538 * if root filesystem is exported, the exportinfo struct that we
3535 3539 * should use is what checkexport4 returns, because root_exi is
3536 3540 * actually a mostly empty struct.
3537 3541 */
3538 3542 exi = checkexport4(&fh_fmtp->fh4_fsid,
3539 3543 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3540 3544 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3541 3545 } else {
3542 3546 /*
3543 3547 * it's a properly shared filesystem
3544 3548 */
3545 3549 cs->exi = ne->exi_public;
3546 3550 }
3547 3551
3548 3552 if (is_system_labeled()) {
3549 3553 bslabel_t *clabel;
3550 3554
3551 3555 ASSERT(req->rq_label != NULL);
3552 3556 clabel = req->rq_label;
3553 3557 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3554 3558 "got client label from request(1)",
3555 3559 struct svc_req *, req);
3556 3560 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3557 3561 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3558 3562 cs->exi)) {
3559 3563 *cs->statusp = resp->status =
3560 3564 NFS4ERR_SERVERFAULT;
3561 3565 goto out;
3562 3566 }
3563 3567 }
3564 3568 }
3565 3569
3566 3570 VN_HOLD(vp);
3567 3571 cs->vp = vp;
3568 3572
3569 3573 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3570 3574 VN_RELE(cs->vp);
3571 3575 cs->vp = NULL;
3572 3576 cs->exi = sav_exi;
3573 3577 goto out;
3574 3578 }
3575 3579
3576 3580 *cs->statusp = resp->status = NFS4_OK;
3577 3581 out:
3578 3582 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3579 3583 PUTPUBFH4res *, resp);
3580 3584 }
3581 3585
3582 3586 /*
3583 3587 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3584 3588 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3585 3589 * or joe have restrictive search permissions, then we shouldn't let
3586 3590 * the client get a file handle. This is easy to enforce. However, we
3587 3591 * don't know what security flavor should be used until we resolve the
3588 3592 * path name. Another complication is uid mapping. If root is
3589 3593 * the user, then it will be mapped to the anonymous user by default,
3590 3594 * but we won't know that till we've resolved the path name. And we won't
3591 3595 * know what the anonymous user is.
3592 3596 * Luckily, SECINFO is specified to take a full filename.
3593 3597 * So what we will have to in rfs4_op_lookup is check that flavor of
3594 3598 * the target object matches that of the request, and if root was the
3595 3599 * caller, check for the root= and anon= options, and if necessary,
3596 3600 * repeat the lookup using the right cred_t. But that's not done yet.
3597 3601 */
3598 3602 /* ARGSUSED */
3599 3603 static void
3600 3604 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3601 3605 struct compound_state *cs)
3602 3606 {
3603 3607 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3604 3608 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3605 3609 nfs_fh4_fmt_t *fh_fmtp;
3606 3610
3607 3611 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3608 3612 PUTFH4args *, args);
3609 3613
3610 3614 if (cs->vp) {
3611 3615 VN_RELE(cs->vp);
3612 3616 cs->vp = NULL;
3613 3617 }
3614 3618
3615 3619 if (cs->cr) {
3616 3620 crfree(cs->cr);
3617 3621 cs->cr = NULL;
3618 3622 }
3619 3623
3620 3624
3621 3625 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3622 3626 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3623 3627 goto out;
3624 3628 }
3625 3629
3626 3630 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3627 3631 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3628 3632 NULL);
3629 3633
3630 3634 if (cs->exi == NULL) {
3631 3635 *cs->statusp = resp->status = NFS4ERR_STALE;
3632 3636 goto out;
3633 3637 }
3634 3638
3635 3639 cs->cr = crdup(cs->basecr);
3636 3640
3637 3641 ASSERT(cs->cr != NULL);
3638 3642
3639 3643 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3640 3644 *cs->statusp = resp->status;
3641 3645 goto out;
3642 3646 }
3643 3647
3644 3648 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3645 3649 VN_RELE(cs->vp);
3646 3650 cs->vp = NULL;
3647 3651 goto out;
3648 3652 }
3649 3653
3650 3654 nfs_fh4_copy(&args->object, &cs->fh);
3651 3655 *cs->statusp = resp->status = NFS4_OK;
3652 3656 cs->deleg = FALSE;
3653 3657
3654 3658 out:
3655 3659 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3656 3660 PUTFH4res *, resp);
3657 3661 }
3658 3662
3659 3663 /* ARGSUSED */
3660 3664 static void
3661 3665 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3662 3666 struct compound_state *cs)
3663 3667 {
3664 3668 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3665 3669 int error;
3666 3670 fid_t fid;
3667 3671 struct exportinfo *exi, *sav_exi;
3668 3672
3669 3673 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3670 3674
3671 3675 if (cs->vp) {
3672 3676 VN_RELE(cs->vp);
3673 3677 cs->vp = NULL;
3674 3678 }
3675 3679
3676 3680 if (cs->cr)
3677 3681 crfree(cs->cr);
3678 3682
3679 3683 cs->cr = crdup(cs->basecr);
3680 3684
3681 3685 /*
3682 3686 * Using rootdir, the system root vnode,
3683 3687 * get its fid.
3684 3688 */
3685 3689 bzero(&fid, sizeof (fid));
3686 3690 fid.fid_len = MAXFIDSZ;
3687 3691 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3688 3692 if (error != 0) {
3689 3693 *cs->statusp = resp->status = puterrno4(error);
3690 3694 goto out;
3691 3695 }
3692 3696
3693 3697 /*
3694 3698 * Then use the root fsid & fid it to find out if it's exported
3695 3699 *
3696 3700 * If the server root isn't exported directly, then
3697 3701 * it should at least be a pseudo export based on
3698 3702 * one or more exports further down in the server's
3699 3703 * file tree.
3700 3704 */
3701 3705 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3702 3706 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3703 3707 NFS4_DEBUG(rfs4_debug,
3704 3708 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3705 3709 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3706 3710 goto out;
3707 3711 }
3708 3712
3709 3713 /*
3710 3714 * Now make a filehandle based on the root
3711 3715 * export and root vnode.
3712 3716 */
3713 3717 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3714 3718 if (error != 0) {
3715 3719 *cs->statusp = resp->status = puterrno4(error);
3716 3720 goto out;
3717 3721 }
3718 3722
3719 3723 sav_exi = cs->exi;
3720 3724 cs->exi = exi;
3721 3725
3722 3726 VN_HOLD(ZONE_ROOTVP());
3723 3727 cs->vp = ZONE_ROOTVP();
3724 3728
3725 3729 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3726 3730 VN_RELE(cs->vp);
3727 3731 cs->vp = NULL;
3728 3732 cs->exi = sav_exi;
3729 3733 goto out;
3730 3734 }
3731 3735
3732 3736 *cs->statusp = resp->status = NFS4_OK;
3733 3737 cs->deleg = FALSE;
3734 3738 out:
3735 3739 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3736 3740 PUTROOTFH4res *, resp);
3737 3741 }
3738 3742
3739 3743 /*
3740 3744 * readlink: args: CURRENT_FH.
3741 3745 * res: status. If success - CURRENT_FH unchanged, return linktext.
3742 3746 */
3743 3747
3744 3748 /* ARGSUSED */
3745 3749 static void
3746 3750 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3747 3751 struct compound_state *cs)
3748 3752 {
3749 3753 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3750 3754 int error;
3751 3755 vnode_t *vp;
3752 3756 struct iovec iov;
3753 3757 struct vattr va;
3754 3758 struct uio uio;
3755 3759 char *data;
3756 3760 struct sockaddr *ca;
3757 3761 char *name = NULL;
3758 3762 int is_referral;
3759 3763
3760 3764 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3761 3765
3762 3766 /* CURRENT_FH: directory */
3763 3767 vp = cs->vp;
3764 3768 if (vp == NULL) {
3765 3769 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3766 3770 goto out;
3767 3771 }
3768 3772
3769 3773 if (cs->access == CS_ACCESS_DENIED) {
3770 3774 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3771 3775 goto out;
3772 3776 }
3773 3777
3774 3778 /* Is it a referral? */
3775 3779 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3776 3780
3777 3781 is_referral = 1;
3778 3782
3779 3783 } else {
3780 3784
3781 3785 is_referral = 0;
3782 3786
3783 3787 if (vp->v_type == VDIR) {
3784 3788 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3785 3789 goto out;
3786 3790 }
3787 3791
3788 3792 if (vp->v_type != VLNK) {
3789 3793 *cs->statusp = resp->status = NFS4ERR_INVAL;
3790 3794 goto out;
3791 3795 }
3792 3796
3793 3797 }
3794 3798
3795 3799 va.va_mask = AT_MODE;
3796 3800 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3797 3801 if (error) {
3798 3802 *cs->statusp = resp->status = puterrno4(error);
3799 3803 goto out;
3800 3804 }
3801 3805
3802 3806 if (MANDLOCK(vp, va.va_mode)) {
3803 3807 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3804 3808 goto out;
3805 3809 }
3806 3810
3807 3811 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3808 3812
3809 3813 if (is_referral) {
3810 3814 char *s;
3811 3815 size_t strsz;
3812 3816
3813 3817 /* Get an artificial symlink based on a referral */
3814 3818 s = build_symlink(vp, cs->cr, &strsz);
3815 3819 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3816 3820 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3817 3821 vnode_t *, vp, char *, s);
3818 3822 if (s == NULL)
3819 3823 error = EINVAL;
3820 3824 else {
3821 3825 error = 0;
3822 3826 (void) strlcpy(data, s, MAXPATHLEN + 1);
3823 3827 kmem_free(s, strsz);
3824 3828 }
3825 3829
3826 3830 } else {
3827 3831
3828 3832 iov.iov_base = data;
3829 3833 iov.iov_len = MAXPATHLEN;
3830 3834 uio.uio_iov = &iov;
3831 3835 uio.uio_iovcnt = 1;
3832 3836 uio.uio_segflg = UIO_SYSSPACE;
3833 3837 uio.uio_extflg = UIO_COPY_CACHED;
3834 3838 uio.uio_loffset = 0;
3835 3839 uio.uio_resid = MAXPATHLEN;
3836 3840
3837 3841 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3838 3842
3839 3843 if (!error)
3840 3844 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3841 3845 }
3842 3846
3843 3847 if (error) {
3844 3848 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3845 3849 *cs->statusp = resp->status = puterrno4(error);
3846 3850 goto out;
3847 3851 }
3848 3852
3849 3853 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3850 3854 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3851 3855 MAXPATHLEN + 1);
3852 3856
3853 3857 if (name == NULL) {
3854 3858 /*
3855 3859 * Even though the conversion failed, we return
3856 3860 * something. We just don't translate it.
3857 3861 */
3858 3862 name = data;
3859 3863 }
3860 3864
3861 3865 /*
3862 3866 * treat link name as data
3863 3867 */
3864 3868 (void) str_to_utf8(name, (utf8string *)&resp->link);
3865 3869
3866 3870 if (name != data)
3867 3871 kmem_free(name, MAXPATHLEN + 1);
3868 3872 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3869 3873 *cs->statusp = resp->status = NFS4_OK;
3870 3874
3871 3875 out:
3872 3876 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3873 3877 READLINK4res *, resp);
3874 3878 }
3875 3879
3876 3880 static void
3877 3881 rfs4_op_readlink_free(nfs_resop4 *resop)
3878 3882 {
3879 3883 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3880 3884 utf8string *symlink = (utf8string *)&resp->link;
3881 3885
3882 3886 if (symlink->utf8string_val) {
3883 3887 UTF8STRING_FREE(*symlink)
3884 3888 }
3885 3889 }
3886 3890
3887 3891 /*
3888 3892 * release_lockowner:
3889 3893 * Release any state associated with the supplied
3890 3894 * lockowner. Note if any lo_state is holding locks we will not
3891 3895 * rele that lo_state and thus the lockowner will not be destroyed.
3892 3896 * A client using lock after the lock owner stateid has been released
3893 3897 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3894 3898 * to reissue the lock with new_lock_owner set to TRUE.
3895 3899 * args: lock_owner
3896 3900 * res: status
3897 3901 */
3898 3902 /* ARGSUSED */
3899 3903 static void
3900 3904 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3901 3905 struct svc_req *req, struct compound_state *cs)
3902 3906 {
3903 3907 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3904 3908 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3905 3909 rfs4_lockowner_t *lo;
3906 3910 rfs4_openowner_t *oo;
3907 3911 rfs4_state_t *sp;
3908 3912 rfs4_lo_state_t *lsp;
3909 3913 rfs4_client_t *cp;
3910 3914 bool_t create = FALSE;
3911 3915 locklist_t *llist;
3912 3916 sysid_t sysid;
3913 3917
3914 3918 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3915 3919 cs, RELEASE_LOCKOWNER4args *, ap);
3916 3920
3917 3921 /* Make sure there is a clientid around for this request */
3918 3922 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3919 3923
3920 3924 if (cp == NULL) {
3921 3925 *cs->statusp = resp->status =
3922 3926 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3923 3927 goto out;
3924 3928 }
3925 3929 rfs4_client_rele(cp);
3926 3930
3927 3931 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3928 3932 if (lo == NULL) {
3929 3933 *cs->statusp = resp->status = NFS4_OK;
3930 3934 goto out;
3931 3935 }
3932 3936 ASSERT(lo->rl_client != NULL);
3933 3937
3934 3938 /*
3935 3939 * Check for EXPIRED client. If so will reap state with in a lease
3936 3940 * period or on next set_clientid_confirm step
3937 3941 */
3938 3942 if (rfs4_lease_expired(lo->rl_client)) {
3939 3943 rfs4_lockowner_rele(lo);
3940 3944 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3941 3945 goto out;
3942 3946 }
3943 3947
3944 3948 /*
3945 3949 * If no sysid has been assigned, then no locks exist; just return.
3946 3950 */
3947 3951 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3948 3952 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3949 3953 rfs4_lockowner_rele(lo);
3950 3954 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3951 3955 goto out;
3952 3956 }
3953 3957
3954 3958 sysid = lo->rl_client->rc_sysidt;
3955 3959 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3956 3960
3957 3961 /*
3958 3962 * Mark the lockowner invalid.
3959 3963 */
3960 3964 rfs4_dbe_hide(lo->rl_dbe);
3961 3965
3962 3966 /*
3963 3967 * sysid-pid pair should now not be used since the lockowner is
3964 3968 * invalid. If the client were to instantiate the lockowner again
3965 3969 * it would be assigned a new pid. Thus we can get the list of
3966 3970 * current locks.
3967 3971 */
3968 3972
3969 3973 llist = flk_get_active_locks(sysid, lo->rl_pid);
3970 3974 /* If we are still holding locks fail */
3971 3975 if (llist != NULL) {
3972 3976
3973 3977 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3974 3978
3975 3979 flk_free_locklist(llist);
3976 3980 /*
3977 3981 * We need to unhide the lockowner so the client can
3978 3982 * try it again. The bad thing here is if the client
3979 3983 * has a logic error that took it here in the first place
3980 3984 * they probably have lost accounting of the locks that it
3981 3985 * is holding. So we may have dangling state until the
3982 3986 * open owner state is reaped via close. One scenario
3983 3987 * that could possibly occur is that the client has
3984 3988 * sent the unlock request(s) in separate threads
3985 3989 * and has not waited for the replies before sending the
3986 3990 * RELEASE_LOCKOWNER request. Presumably, it would expect
3987 3991 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3988 3992 * reissuing the request.
3989 3993 */
3990 3994 rfs4_dbe_unhide(lo->rl_dbe);
3991 3995 rfs4_lockowner_rele(lo);
3992 3996 goto out;
3993 3997 }
3994 3998
3995 3999 /*
3996 4000 * For the corresponding client we need to check each open
3997 4001 * owner for any opens that have lockowner state associated
3998 4002 * with this lockowner.
3999 4003 */
4000 4004
4001 4005 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4002 4006 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4003 4007 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4004 4008
4005 4009 rfs4_dbe_lock(oo->ro_dbe);
4006 4010 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4007 4011 sp = list_next(&oo->ro_statelist, sp)) {
4008 4012
4009 4013 rfs4_dbe_lock(sp->rs_dbe);
4010 4014 for (lsp = list_head(&sp->rs_lostatelist);
4011 4015 lsp != NULL;
4012 4016 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4013 4017 if (lsp->rls_locker == lo) {
4014 4018 rfs4_dbe_lock(lsp->rls_dbe);
4015 4019 rfs4_dbe_invalidate(lsp->rls_dbe);
4016 4020 rfs4_dbe_unlock(lsp->rls_dbe);
4017 4021 }
4018 4022 }
4019 4023 rfs4_dbe_unlock(sp->rs_dbe);
4020 4024 }
4021 4025 rfs4_dbe_unlock(oo->ro_dbe);
4022 4026 }
4023 4027 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4024 4028
4025 4029 rfs4_lockowner_rele(lo);
4026 4030
4027 4031 *cs->statusp = resp->status = NFS4_OK;
4028 4032
4029 4033 out:
4030 4034 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4031 4035 cs, RELEASE_LOCKOWNER4res *, resp);
4032 4036 }
4033 4037
4034 4038 /*
4035 4039 * short utility function to lookup a file and recall the delegation
4036 4040 */
4037 4041 static rfs4_file_t *
4038 4042 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4039 4043 int *lkup_error, cred_t *cr)
4040 4044 {
4041 4045 vnode_t *vp;
4042 4046 rfs4_file_t *fp = NULL;
4043 4047 bool_t fcreate = FALSE;
4044 4048 int error;
4045 4049
4046 4050 if (vpp)
4047 4051 *vpp = NULL;
4048 4052
4049 4053 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4050 4054 NULL)) == 0) {
4051 4055 if (vp->v_type == VREG)
4052 4056 fp = rfs4_findfile(vp, NULL, &fcreate);
4053 4057 if (vpp)
4054 4058 *vpp = vp;
4055 4059 else
4056 4060 VN_RELE(vp);
4057 4061 }
4058 4062
4059 4063 if (lkup_error)
4060 4064 *lkup_error = error;
4061 4065
4062 4066 return (fp);
4063 4067 }
4064 4068
4065 4069 /*
4066 4070 * remove: args: CURRENT_FH: directory; name.
4067 4071 * res: status. If success - CURRENT_FH unchanged, return change_info
4068 4072 * for directory.
4069 4073 */
4070 4074 /* ARGSUSED */
4071 4075 static void
4072 4076 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4073 4077 struct compound_state *cs)
4074 4078 {
4075 4079 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4076 4080 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4077 4081 int error;
4078 4082 vnode_t *dvp, *vp;
4079 4083 struct vattr bdva, idva, adva;
4080 4084 char *nm;
4081 4085 uint_t len;
4082 4086 rfs4_file_t *fp;
4083 4087 int in_crit = 0;
4084 4088 bslabel_t *clabel;
4085 4089 struct sockaddr *ca;
4086 4090 char *name = NULL;
4087 4091 nfsstat4 status;
4088 4092
4089 4093 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4090 4094 REMOVE4args *, args);
4091 4095
4092 4096 /* CURRENT_FH: directory */
4093 4097 dvp = cs->vp;
4094 4098 if (dvp == NULL) {
4095 4099 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4096 4100 goto out;
4097 4101 }
4098 4102
4099 4103 if (cs->access == CS_ACCESS_DENIED) {
4100 4104 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4101 4105 goto out;
4102 4106 }
4103 4107
4104 4108 /*
4105 4109 * If there is an unshared filesystem mounted on this vnode,
4106 4110 * Do not allow to remove anything in this directory.
4107 4111 */
4108 4112 if (vn_ismntpt(dvp)) {
4109 4113 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4110 4114 goto out;
4111 4115 }
4112 4116
4113 4117 if (dvp->v_type != VDIR) {
4114 4118 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4115 4119 goto out;
4116 4120 }
4117 4121
4118 4122 status = utf8_dir_verify(&args->target);
4119 4123 if (status != NFS4_OK) {
4120 4124 *cs->statusp = resp->status = status;
4121 4125 goto out;
4122 4126 }
4123 4127
4124 4128 /*
4125 4129 * Lookup the file so that we can check if it's a directory
4126 4130 */
4127 4131 nm = utf8_to_fn(&args->target, &len, NULL);
4128 4132 if (nm == NULL) {
4129 4133 *cs->statusp = resp->status = NFS4ERR_INVAL;
4130 4134 goto out;
4131 4135 }
4132 4136
4133 4137 if (len > MAXNAMELEN) {
4134 4138 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4135 4139 kmem_free(nm, len);
4136 4140 goto out;
4137 4141 }
4138 4142
4139 4143 if (rdonly4(req, cs)) {
4140 4144 *cs->statusp = resp->status = NFS4ERR_ROFS;
4141 4145 kmem_free(nm, len);
4142 4146 goto out;
4143 4147 }
4144 4148
4145 4149 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4146 4150 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4147 4151 MAXPATHLEN + 1);
4148 4152
4149 4153 if (name == NULL) {
4150 4154 *cs->statusp = resp->status = NFS4ERR_INVAL;
4151 4155 kmem_free(nm, len);
4152 4156 goto out;
4153 4157 }
4154 4158
4155 4159 /*
4156 4160 * Lookup the file to determine type and while we are see if
4157 4161 * there is a file struct around and check for delegation.
4158 4162 * We don't need to acquire va_seq before this lookup, if
4159 4163 * it causes an update, cinfo.before will not match, which will
4160 4164 * trigger a cache flush even if atomic is TRUE.
4161 4165 */
4162 4166 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4163 4167 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4164 4168 NULL)) {
4165 4169 VN_RELE(vp);
4166 4170 rfs4_file_rele(fp);
4167 4171 *cs->statusp = resp->status = NFS4ERR_DELAY;
4168 4172 if (nm != name)
4169 4173 kmem_free(name, MAXPATHLEN + 1);
4170 4174 kmem_free(nm, len);
4171 4175 goto out;
4172 4176 }
4173 4177 }
4174 4178
4175 4179 /* Didn't find anything to remove */
4176 4180 if (vp == NULL) {
4177 4181 *cs->statusp = resp->status = error;
4178 4182 if (nm != name)
4179 4183 kmem_free(name, MAXPATHLEN + 1);
4180 4184 kmem_free(nm, len);
4181 4185 goto out;
4182 4186 }
4183 4187
4184 4188 if (nbl_need_check(vp)) {
4185 4189 nbl_start_crit(vp, RW_READER);
4186 4190 in_crit = 1;
4187 4191 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4188 4192 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4189 4193 if (nm != name)
4190 4194 kmem_free(name, MAXPATHLEN + 1);
4191 4195 kmem_free(nm, len);
4192 4196 nbl_end_crit(vp);
4193 4197 VN_RELE(vp);
4194 4198 if (fp) {
4195 4199 rfs4_clear_dont_grant(fp);
4196 4200 rfs4_file_rele(fp);
4197 4201 }
4198 4202 goto out;
4199 4203 }
4200 4204 }
4201 4205
4202 4206 /* check label before allowing removal */
4203 4207 if (is_system_labeled()) {
4204 4208 ASSERT(req->rq_label != NULL);
4205 4209 clabel = req->rq_label;
4206 4210 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4207 4211 "got client label from request(1)",
4208 4212 struct svc_req *, req);
4209 4213 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4210 4214 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4211 4215 cs->exi)) {
4212 4216 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4213 4217 if (name != nm)
4214 4218 kmem_free(name, MAXPATHLEN + 1);
4215 4219 kmem_free(nm, len);
4216 4220 if (in_crit)
4217 4221 nbl_end_crit(vp);
4218 4222 VN_RELE(vp);
4219 4223 if (fp) {
4220 4224 rfs4_clear_dont_grant(fp);
4221 4225 rfs4_file_rele(fp);
4222 4226 }
4223 4227 goto out;
4224 4228 }
4225 4229 }
4226 4230 }
4227 4231
4228 4232 /* Get dir "before" change value */
4229 4233 bdva.va_mask = AT_CTIME|AT_SEQ;
4230 4234 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4231 4235 if (error) {
4232 4236 *cs->statusp = resp->status = puterrno4(error);
4233 4237 if (nm != name)
4234 4238 kmem_free(name, MAXPATHLEN + 1);
4235 4239 kmem_free(nm, len);
4236 4240 if (in_crit)
4237 4241 nbl_end_crit(vp);
4238 4242 VN_RELE(vp);
4239 4243 if (fp) {
4240 4244 rfs4_clear_dont_grant(fp);
4241 4245 rfs4_file_rele(fp);
4242 4246 }
4243 4247 goto out;
4244 4248 }
4245 4249 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4246 4250
4247 4251 /* Actually do the REMOVE operation */
4248 4252 if (vp->v_type == VDIR) {
4249 4253 /*
4250 4254 * Can't remove a directory that has a mounted-on filesystem.
4251 4255 */
4252 4256 if (vn_ismntpt(vp)) {
4253 4257 error = EACCES;
4254 4258 } else {
4255 4259 /*
4256 4260 * System V defines rmdir to return EEXIST,
4257 4261 * not ENOTEMPTY, if the directory is not
4258 4262 * empty. A System V NFS server needs to map
4259 4263 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4260 4264 * transmit over the wire.
4261 4265 */
4262 4266 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4263 4267 NULL, 0)) == EEXIST)
4264 4268 error = ENOTEMPTY;
4265 4269 }
4266 4270 } else {
4267 4271 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4268 4272 fp != NULL) {
4269 4273 struct vattr va;
4270 4274 vnode_t *tvp;
4271 4275
4272 4276 rfs4_dbe_lock(fp->rf_dbe);
4273 4277 tvp = fp->rf_vp;
4274 4278 if (tvp)
4275 4279 VN_HOLD(tvp);
4276 4280 rfs4_dbe_unlock(fp->rf_dbe);
4277 4281
4278 4282 if (tvp) {
4279 4283 /*
4280 4284 * This is va_seq safe because we are not
4281 4285 * manipulating dvp.
4282 4286 */
4283 4287 va.va_mask = AT_NLINK;
4284 4288 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4285 4289 va.va_nlink == 0) {
4286 4290 /* Remove state on file remove */
4287 4291 if (in_crit) {
4288 4292 nbl_end_crit(vp);
4289 4293 in_crit = 0;
4290 4294 }
4291 4295 rfs4_close_all_state(fp);
4292 4296 }
4293 4297 VN_RELE(tvp);
4294 4298 }
4295 4299 }
4296 4300 }
4297 4301
4298 4302 if (in_crit)
4299 4303 nbl_end_crit(vp);
4300 4304 VN_RELE(vp);
4301 4305
4302 4306 if (fp) {
4303 4307 rfs4_clear_dont_grant(fp);
4304 4308 rfs4_file_rele(fp);
4305 4309 }
4306 4310 if (nm != name)
4307 4311 kmem_free(name, MAXPATHLEN + 1);
4308 4312 kmem_free(nm, len);
4309 4313
4310 4314 if (error) {
4311 4315 *cs->statusp = resp->status = puterrno4(error);
4312 4316 goto out;
4313 4317 }
4314 4318
4315 4319 /*
4316 4320 * Get the initial "after" sequence number, if it fails, set to zero
4317 4321 */
4318 4322 idva.va_mask = AT_SEQ;
4319 4323 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4320 4324 idva.va_seq = 0;
4321 4325
4322 4326 /*
4323 4327 * Force modified data and metadata out to stable storage.
4324 4328 */
4325 4329 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4326 4330
4327 4331 /*
4328 4332 * Get "after" change value, if it fails, simply return the
4329 4333 * before value.
4330 4334 */
4331 4335 adva.va_mask = AT_CTIME|AT_SEQ;
4332 4336 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4333 4337 adva.va_ctime = bdva.va_ctime;
4334 4338 adva.va_seq = 0;
4335 4339 }
4336 4340
4337 4341 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4338 4342
4339 4343 /*
4340 4344 * The cinfo.atomic = TRUE only if we have
4341 4345 * non-zero va_seq's, and it has incremented by exactly one
4342 4346 * during the VOP_REMOVE/RMDIR and it didn't change during
4343 4347 * the VOP_FSYNC.
4344 4348 */
4345 4349 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4346 4350 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4347 4351 resp->cinfo.atomic = TRUE;
4348 4352 else
4349 4353 resp->cinfo.atomic = FALSE;
4350 4354
4351 4355 *cs->statusp = resp->status = NFS4_OK;
4352 4356
4353 4357 out:
4354 4358 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4355 4359 REMOVE4res *, resp);
4356 4360 }
4357 4361
4358 4362 /*
4359 4363 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4360 4364 * oldname and newname.
4361 4365 * res: status. If success - CURRENT_FH unchanged, return change_info
4362 4366 * for both from and target directories.
4363 4367 */
4364 4368 /* ARGSUSED */
4365 4369 static void
4366 4370 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4367 4371 struct compound_state *cs)
4368 4372 {
4369 4373 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4370 4374 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4371 4375 int error;
4372 4376 vnode_t *odvp;
4373 4377 vnode_t *ndvp;
4374 4378 vnode_t *srcvp, *targvp, *tvp;
4375 4379 struct vattr obdva, oidva, oadva;
4376 4380 struct vattr nbdva, nidva, nadva;
4377 4381 char *onm, *nnm;
4378 4382 uint_t olen, nlen;
4379 4383 rfs4_file_t *fp, *sfp;
4380 4384 int in_crit_src, in_crit_targ;
4381 4385 int fp_rele_grant_hold, sfp_rele_grant_hold;
4382 4386 int unlinked;
4383 4387 bslabel_t *clabel;
4384 4388 struct sockaddr *ca;
4385 4389 char *converted_onm = NULL;
4386 4390 char *converted_nnm = NULL;
4387 4391 nfsstat4 status;
4388 4392
4389 4393 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4390 4394 RENAME4args *, args);
4391 4395
4392 4396 fp = sfp = NULL;
4393 4397 srcvp = targvp = tvp = NULL;
4394 4398 in_crit_src = in_crit_targ = 0;
4395 4399 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4396 4400 unlinked = 0;
4397 4401
4398 4402 /* CURRENT_FH: target directory */
4399 4403 ndvp = cs->vp;
4400 4404 if (ndvp == NULL) {
4401 4405 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4402 4406 goto out;
4403 4407 }
4404 4408
4405 4409 /* SAVED_FH: from directory */
4406 4410 odvp = cs->saved_vp;
4407 4411 if (odvp == NULL) {
4408 4412 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4409 4413 goto out;
4410 4414 }
4411 4415
4412 4416 if (cs->access == CS_ACCESS_DENIED) {
4413 4417 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4414 4418 goto out;
4415 4419 }
4416 4420
4417 4421 /*
4418 4422 * If there is an unshared filesystem mounted on this vnode,
4419 4423 * do not allow to rename objects in this directory.
4420 4424 */
4421 4425 if (vn_ismntpt(odvp)) {
4422 4426 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4423 4427 goto out;
4424 4428 }
4425 4429
4426 4430 /*
4427 4431 * If there is an unshared filesystem mounted on this vnode,
4428 4432 * do not allow to rename to this directory.
4429 4433 */
4430 4434 if (vn_ismntpt(ndvp)) {
4431 4435 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4432 4436 goto out;
4433 4437 }
4434 4438
4435 4439 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4436 4440 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4437 4441 goto out;
4438 4442 }
4439 4443
4440 4444 if (cs->saved_exi != cs->exi) {
4441 4445 *cs->statusp = resp->status = NFS4ERR_XDEV;
4442 4446 goto out;
4443 4447 }
4444 4448
4445 4449 status = utf8_dir_verify(&args->oldname);
4446 4450 if (status != NFS4_OK) {
4447 4451 *cs->statusp = resp->status = status;
4448 4452 goto out;
4449 4453 }
4450 4454
4451 4455 status = utf8_dir_verify(&args->newname);
4452 4456 if (status != NFS4_OK) {
4453 4457 *cs->statusp = resp->status = status;
4454 4458 goto out;
4455 4459 }
4456 4460
4457 4461 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4458 4462 if (onm == NULL) {
4459 4463 *cs->statusp = resp->status = NFS4ERR_INVAL;
4460 4464 goto out;
4461 4465 }
4462 4466 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4463 4467 nlen = MAXPATHLEN + 1;
4464 4468 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4465 4469 nlen);
4466 4470
4467 4471 if (converted_onm == NULL) {
4468 4472 *cs->statusp = resp->status = NFS4ERR_INVAL;
4469 4473 kmem_free(onm, olen);
4470 4474 goto out;
4471 4475 }
4472 4476
4473 4477 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4474 4478 if (nnm == NULL) {
4475 4479 *cs->statusp = resp->status = NFS4ERR_INVAL;
4476 4480 if (onm != converted_onm)
4477 4481 kmem_free(converted_onm, MAXPATHLEN + 1);
4478 4482 kmem_free(onm, olen);
4479 4483 goto out;
4480 4484 }
4481 4485 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4482 4486 MAXPATHLEN + 1);
4483 4487
4484 4488 if (converted_nnm == NULL) {
4485 4489 *cs->statusp = resp->status = NFS4ERR_INVAL;
4486 4490 kmem_free(nnm, nlen);
4487 4491 nnm = NULL;
4488 4492 if (onm != converted_onm)
4489 4493 kmem_free(converted_onm, MAXPATHLEN + 1);
4490 4494 kmem_free(onm, olen);
4491 4495 goto out;
4492 4496 }
4493 4497
4494 4498
4495 4499 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4496 4500 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4497 4501 kmem_free(onm, olen);
4498 4502 kmem_free(nnm, nlen);
4499 4503 goto out;
4500 4504 }
4501 4505
4502 4506
4503 4507 if (rdonly4(req, cs)) {
4504 4508 *cs->statusp = resp->status = NFS4ERR_ROFS;
4505 4509 if (onm != converted_onm)
4506 4510 kmem_free(converted_onm, MAXPATHLEN + 1);
4507 4511 kmem_free(onm, olen);
4508 4512 if (nnm != converted_nnm)
4509 4513 kmem_free(converted_nnm, MAXPATHLEN + 1);
4510 4514 kmem_free(nnm, nlen);
4511 4515 goto out;
4512 4516 }
4513 4517
4514 4518 /* check label of the target dir */
4515 4519 if (is_system_labeled()) {
4516 4520 ASSERT(req->rq_label != NULL);
4517 4521 clabel = req->rq_label;
4518 4522 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4519 4523 "got client label from request(1)",
4520 4524 struct svc_req *, req);
4521 4525 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4522 4526 if (!do_rfs_label_check(clabel, ndvp,
4523 4527 EQUALITY_CHECK, cs->exi)) {
4524 4528 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4525 4529 goto err_out;
4526 4530 }
4527 4531 }
4528 4532 }
4529 4533
4530 4534 /*
4531 4535 * Is the source a file and have a delegation?
4532 4536 * We don't need to acquire va_seq before these lookups, if
4533 4537 * it causes an update, cinfo.before will not match, which will
4534 4538 * trigger a cache flush even if atomic is TRUE.
4535 4539 */
4536 4540 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4537 4541 &error, cs->cr)) {
4538 4542 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4539 4543 NULL)) {
4540 4544 *cs->statusp = resp->status = NFS4ERR_DELAY;
4541 4545 goto err_out;
4542 4546 }
4543 4547 }
4544 4548
4545 4549 if (srcvp == NULL) {
4546 4550 *cs->statusp = resp->status = puterrno4(error);
4547 4551 if (onm != converted_onm)
4548 4552 kmem_free(converted_onm, MAXPATHLEN + 1);
4549 4553 kmem_free(onm, olen);
4550 4554 if (nnm != converted_nnm)
4551 4555 kmem_free(converted_nnm, MAXPATHLEN + 1);
4552 4556 kmem_free(nnm, nlen);
4553 4557 goto out;
4554 4558 }
4555 4559
4556 4560 sfp_rele_grant_hold = 1;
4557 4561
4558 4562 /* Does the destination exist and a file and have a delegation? */
4559 4563 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4560 4564 NULL, cs->cr)) {
4561 4565 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4562 4566 NULL)) {
4563 4567 *cs->statusp = resp->status = NFS4ERR_DELAY;
4564 4568 goto err_out;
4565 4569 }
4566 4570 }
4567 4571 fp_rele_grant_hold = 1;
4568 4572
4569 4573 /* Check for NBMAND lock on both source and target */
4570 4574 if (nbl_need_check(srcvp)) {
4571 4575 nbl_start_crit(srcvp, RW_READER);
4572 4576 in_crit_src = 1;
4573 4577 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4574 4578 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4575 4579 goto err_out;
4576 4580 }
4577 4581 }
4578 4582
4579 4583 if (targvp && nbl_need_check(targvp)) {
4580 4584 nbl_start_crit(targvp, RW_READER);
4581 4585 in_crit_targ = 1;
4582 4586 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4583 4587 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4584 4588 goto err_out;
4585 4589 }
4586 4590 }
4587 4591
4588 4592 /* Get source "before" change value */
4589 4593 obdva.va_mask = AT_CTIME|AT_SEQ;
4590 4594 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4591 4595 if (!error) {
4592 4596 nbdva.va_mask = AT_CTIME|AT_SEQ;
4593 4597 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4594 4598 }
4595 4599 if (error) {
4596 4600 *cs->statusp = resp->status = puterrno4(error);
4597 4601 goto err_out;
4598 4602 }
4599 4603
4600 4604 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4601 4605 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4602 4606
4603 4607 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4604 4608 NULL, 0);
4605 4609
4606 4610 /*
4607 4611 * If target existed and was unlinked by VOP_RENAME, state will need
4608 4612 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4609 4613 * any necessary nbl_end_crit on srcvp and tgtvp.
4610 4614 */
4611 4615 if (error == 0 && fp != NULL) {
4612 4616 rfs4_dbe_lock(fp->rf_dbe);
4613 4617 tvp = fp->rf_vp;
4614 4618 if (tvp)
4615 4619 VN_HOLD(tvp);
4616 4620 rfs4_dbe_unlock(fp->rf_dbe);
4617 4621
4618 4622 if (tvp) {
4619 4623 struct vattr va;
4620 4624 va.va_mask = AT_NLINK;
4621 4625
4622 4626 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4623 4627 va.va_nlink == 0) {
4624 4628 unlinked = 1;
4625 4629
4626 4630 /* DEBUG data */
4627 4631 if ((srcvp == targvp) || (tvp != targvp)) {
4628 4632 cmn_err(CE_WARN, "rfs4_op_rename: "
4629 4633 "srcvp %p, targvp: %p, tvp: %p",
4630 4634 (void *)srcvp, (void *)targvp,
4631 4635 (void *)tvp);
4632 4636 }
4633 4637 } else {
4634 4638 VN_RELE(tvp);
4635 4639 }
4636 4640 }
4637 4641 }
4638 4642 if (error == 0)
4639 4643 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4640 4644
4641 4645 if (in_crit_src)
4642 4646 nbl_end_crit(srcvp);
4643 4647 if (srcvp)
4644 4648 VN_RELE(srcvp);
4645 4649 if (in_crit_targ)
4646 4650 nbl_end_crit(targvp);
4647 4651 if (targvp)
4648 4652 VN_RELE(targvp);
4649 4653
4650 4654 if (unlinked) {
4651 4655 ASSERT(fp != NULL);
4652 4656 ASSERT(tvp != NULL);
4653 4657
4654 4658 /* DEBUG data */
4655 4659 if (RW_READ_HELD(&tvp->v_nbllock)) {
4656 4660 cmn_err(CE_WARN, "rfs4_op_rename: "
4657 4661 "RW_READ_HELD(%p)", (void *)tvp);
4658 4662 }
4659 4663
4660 4664 /* The file is gone and so should the state */
4661 4665 rfs4_close_all_state(fp);
4662 4666 VN_RELE(tvp);
4663 4667 }
4664 4668
4665 4669 if (sfp) {
4666 4670 rfs4_clear_dont_grant(sfp);
4667 4671 rfs4_file_rele(sfp);
4668 4672 }
4669 4673 if (fp) {
4670 4674 rfs4_clear_dont_grant(fp);
4671 4675 rfs4_file_rele(fp);
4672 4676 }
4673 4677
4674 4678 if (converted_onm != onm)
4675 4679 kmem_free(converted_onm, MAXPATHLEN + 1);
4676 4680 kmem_free(onm, olen);
4677 4681 if (converted_nnm != nnm)
4678 4682 kmem_free(converted_nnm, MAXPATHLEN + 1);
4679 4683 kmem_free(nnm, nlen);
4680 4684
4681 4685 /*
4682 4686 * Get the initial "after" sequence number, if it fails, set to zero
4683 4687 */
4684 4688 oidva.va_mask = AT_SEQ;
4685 4689 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4686 4690 oidva.va_seq = 0;
4687 4691
4688 4692 nidva.va_mask = AT_SEQ;
4689 4693 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4690 4694 nidva.va_seq = 0;
4691 4695
4692 4696 /*
4693 4697 * Force modified data and metadata out to stable storage.
4694 4698 */
4695 4699 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4696 4700 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4697 4701
4698 4702 if (error) {
4699 4703 *cs->statusp = resp->status = puterrno4(error);
4700 4704 goto out;
4701 4705 }
4702 4706
4703 4707 /*
4704 4708 * Get "after" change values, if it fails, simply return the
4705 4709 * before value.
4706 4710 */
4707 4711 oadva.va_mask = AT_CTIME|AT_SEQ;
4708 4712 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4709 4713 oadva.va_ctime = obdva.va_ctime;
4710 4714 oadva.va_seq = 0;
4711 4715 }
4712 4716
4713 4717 nadva.va_mask = AT_CTIME|AT_SEQ;
4714 4718 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4715 4719 nadva.va_ctime = nbdva.va_ctime;
4716 4720 nadva.va_seq = 0;
4717 4721 }
4718 4722
4719 4723 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4720 4724 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4721 4725
4722 4726 /*
4723 4727 * The cinfo.atomic = TRUE only if we have
4724 4728 * non-zero va_seq's, and it has incremented by exactly one
4725 4729 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4726 4730 */
4727 4731 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4728 4732 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4729 4733 resp->source_cinfo.atomic = TRUE;
4730 4734 else
4731 4735 resp->source_cinfo.atomic = FALSE;
4732 4736
4733 4737 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4734 4738 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4735 4739 resp->target_cinfo.atomic = TRUE;
4736 4740 else
4737 4741 resp->target_cinfo.atomic = FALSE;
4738 4742
4739 4743 #ifdef VOLATILE_FH_TEST
4740 4744 {
4741 4745 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4742 4746
4743 4747 /*
4744 4748 * Add the renamed file handle to the volatile rename list
4745 4749 */
4746 4750 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4747 4751 /* file handles may expire on rename */
4748 4752 vnode_t *vp;
4749 4753
4750 4754 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4751 4755 /*
4752 4756 * Already know that nnm will be a valid string
4753 4757 */
4754 4758 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4755 4759 NULL, NULL, NULL);
4756 4760 kmem_free(nnm, nlen);
4757 4761 if (!error) {
4758 4762 add_volrnm_fh(cs->exi, vp);
4759 4763 VN_RELE(vp);
4760 4764 }
4761 4765 }
4762 4766 }
4763 4767 #endif /* VOLATILE_FH_TEST */
4764 4768
4765 4769 *cs->statusp = resp->status = NFS4_OK;
4766 4770 out:
4767 4771 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4768 4772 RENAME4res *, resp);
4769 4773 return;
4770 4774
4771 4775 err_out:
4772 4776 if (onm != converted_onm)
4773 4777 kmem_free(converted_onm, MAXPATHLEN + 1);
4774 4778 if (onm != NULL)
4775 4779 kmem_free(onm, olen);
4776 4780 if (nnm != converted_nnm)
4777 4781 kmem_free(converted_nnm, MAXPATHLEN + 1);
4778 4782 if (nnm != NULL)
4779 4783 kmem_free(nnm, nlen);
4780 4784
4781 4785 if (in_crit_src) nbl_end_crit(srcvp);
4782 4786 if (in_crit_targ) nbl_end_crit(targvp);
4783 4787 if (targvp) VN_RELE(targvp);
4784 4788 if (srcvp) VN_RELE(srcvp);
4785 4789 if (sfp) {
4786 4790 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4787 4791 rfs4_file_rele(sfp);
4788 4792 }
4789 4793 if (fp) {
4790 4794 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4791 4795 rfs4_file_rele(fp);
4792 4796 }
4793 4797
4794 4798 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4795 4799 RENAME4res *, resp);
4796 4800 }
4797 4801
4798 4802 /* ARGSUSED */
4799 4803 static void
4800 4804 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4801 4805 struct compound_state *cs)
4802 4806 {
4803 4807 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4804 4808 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4805 4809 rfs4_client_t *cp;
4806 4810
4807 4811 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4808 4812 RENEW4args *, args);
4809 4813
4810 4814 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4811 4815 *cs->statusp = resp->status =
4812 4816 rfs4_check_clientid(&args->clientid, 0);
4813 4817 goto out;
4814 4818 }
4815 4819
4816 4820 if (rfs4_lease_expired(cp)) {
4817 4821 rfs4_client_rele(cp);
4818 4822 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4819 4823 goto out;
4820 4824 }
4821 4825
4822 4826 rfs4_update_lease(cp);
4823 4827
4824 4828 mutex_enter(cp->rc_cbinfo.cb_lock);
4825 4829 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4826 4830 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4827 4831 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4828 4832 } else {
4829 4833 *cs->statusp = resp->status = NFS4_OK;
4830 4834 }
4831 4835 mutex_exit(cp->rc_cbinfo.cb_lock);
4832 4836
4833 4837 rfs4_client_rele(cp);
4834 4838
4835 4839 out:
4836 4840 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4837 4841 RENEW4res *, resp);
4838 4842 }
4839 4843
4840 4844 /* ARGSUSED */
4841 4845 static void
4842 4846 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4843 4847 struct compound_state *cs)
4844 4848 {
4845 4849 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4846 4850
4847 4851 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4848 4852
4849 4853 /* No need to check cs->access - we are not accessing any object */
4850 4854 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4851 4855 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4852 4856 goto out;
4853 4857 }
4854 4858 if (cs->vp != NULL) {
4855 4859 VN_RELE(cs->vp);
4856 4860 }
4857 4861 cs->vp = cs->saved_vp;
4858 4862 cs->saved_vp = NULL;
4859 4863 cs->exi = cs->saved_exi;
4860 4864 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4861 4865 *cs->statusp = resp->status = NFS4_OK;
4862 4866 cs->deleg = FALSE;
4863 4867
4864 4868 out:
4865 4869 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4866 4870 RESTOREFH4res *, resp);
4867 4871 }
4868 4872
4869 4873 /* ARGSUSED */
4870 4874 static void
4871 4875 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4872 4876 struct compound_state *cs)
4873 4877 {
4874 4878 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4875 4879
4876 4880 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4877 4881
4878 4882 /* No need to check cs->access - we are not accessing any object */
4879 4883 if (cs->vp == NULL) {
4880 4884 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4881 4885 goto out;
4882 4886 }
4883 4887 if (cs->saved_vp != NULL) {
4884 4888 VN_RELE(cs->saved_vp);
4885 4889 }
4886 4890 cs->saved_vp = cs->vp;
4887 4891 VN_HOLD(cs->saved_vp);
4888 4892 cs->saved_exi = cs->exi;
4889 4893 /*
4890 4894 * since SAVEFH is fairly rare, don't alloc space for its fh
4891 4895 * unless necessary.
4892 4896 */
4893 4897 if (cs->saved_fh.nfs_fh4_val == NULL) {
4894 4898 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4895 4899 }
4896 4900 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4897 4901 *cs->statusp = resp->status = NFS4_OK;
4898 4902
4899 4903 out:
4900 4904 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4901 4905 SAVEFH4res *, resp);
4902 4906 }
4903 4907
4904 4908 /*
4905 4909 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4906 4910 * return the bitmap of attrs that were set successfully. It is also
4907 4911 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4908 4912 * always be called only after rfs4_do_set_attrs().
4909 4913 *
4910 4914 * Verify that the attributes are same as the expected ones. sargp->vap
4911 4915 * and sargp->sbp contain the input attributes as translated from fattr4.
4912 4916 *
4913 4917 * This function verifies only the attrs that correspond to a vattr or
4914 4918 * vfsstat struct. That is because of the extra step needed to get the
4915 4919 * corresponding system structs. Other attributes have already been set or
4916 4920 * verified by do_rfs4_set_attrs.
4917 4921 *
4918 4922 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4919 4923 */
4920 4924 static int
4921 4925 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4922 4926 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4923 4927 {
4924 4928 int error, ret_error = 0;
4925 4929 int i, k;
4926 4930 uint_t sva_mask = sargp->vap->va_mask;
4927 4931 uint_t vbit;
4928 4932 union nfs4_attr_u *na;
4929 4933 uint8_t *amap;
4930 4934 bool_t getsb = ntovp->vfsstat;
4931 4935
4932 4936 if (sva_mask != 0) {
4933 4937 /*
4934 4938 * Okay to overwrite sargp->vap because we verify based
4935 4939 * on the incoming values.
4936 4940 */
4937 4941 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4938 4942 sargp->cs->cr, NULL);
4939 4943 if (ret_error) {
4940 4944 if (resp == NULL)
4941 4945 return (ret_error);
4942 4946 /*
4943 4947 * Must return bitmap of successful attrs
4944 4948 */
4945 4949 sva_mask = 0; /* to prevent checking vap later */
4946 4950 } else {
4947 4951 /*
4948 4952 * Some file systems clobber va_mask. it is probably
4949 4953 * wrong of them to do so, nonethless we practice
4950 4954 * defensive coding.
4951 4955 * See bug id 4276830.
4952 4956 */
4953 4957 sargp->vap->va_mask = sva_mask;
4954 4958 }
4955 4959 }
4956 4960
4957 4961 if (getsb) {
4958 4962 /*
4959 4963 * Now get the superblock and loop on the bitmap, as there is
4960 4964 * no simple way of translating from superblock to bitmap4.
4961 4965 */
4962 4966 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4963 4967 if (ret_error) {
4964 4968 if (resp == NULL)
4965 4969 goto errout;
4966 4970 getsb = FALSE;
4967 4971 }
4968 4972 }
4969 4973
4970 4974 /*
4971 4975 * Now loop and verify each attribute which getattr returned
4972 4976 * whether it's the same as the input.
4973 4977 */
4974 4978 if (resp == NULL && !getsb && (sva_mask == 0))
4975 4979 goto errout;
4976 4980
4977 4981 na = ntovp->na;
4978 4982 amap = ntovp->amap;
4979 4983 k = 0;
4980 4984 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4981 4985 k = *amap;
4982 4986 ASSERT(nfs4_ntov_map[k].nval == k);
4983 4987 vbit = nfs4_ntov_map[k].vbit;
4984 4988
4985 4989 /*
4986 4990 * If vattr attribute but VOP_GETATTR failed, or it's
4987 4991 * superblock attribute but VFS_STATVFS failed, skip
4988 4992 */
4989 4993 if (vbit) {
4990 4994 if ((vbit & sva_mask) == 0)
4991 4995 continue;
4992 4996 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4993 4997 continue;
4994 4998 }
4995 4999 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4996 5000 if (resp != NULL) {
4997 5001 if (error)
4998 5002 ret_error = -1; /* not all match */
4999 5003 else /* update response bitmap */
5000 5004 *resp |= nfs4_ntov_map[k].fbit;
5001 5005 continue;
5002 5006 }
5003 5007 if (error) {
5004 5008 ret_error = -1; /* not all match */
5005 5009 break;
5006 5010 }
5007 5011 }
5008 5012 errout:
5009 5013 return (ret_error);
5010 5014 }
5011 5015
5012 5016 /*
5013 5017 * Decode the attribute to be set/verified. If the attr requires a sys op
5014 5018 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5015 5019 * call the sv_getit function for it, because the sys op hasn't yet been done.
5016 5020 * Return 0 for success, error code if failed.
5017 5021 *
5018 5022 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5019 5023 */
5020 5024 static int
5021 5025 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5022 5026 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5023 5027 {
5024 5028 int error = 0;
5025 5029 bool_t set_later;
5026 5030
5027 5031 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5028 5032
5029 5033 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5030 5034 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5031 5035 /*
5032 5036 * don't verify yet if a vattr or sb dependent attr,
5033 5037 * because we don't have their sys values yet.
5034 5038 * Will be done later.
5035 5039 */
5036 5040 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5037 5041 /*
5038 5042 * ACLs are a special case, since setting the MODE
5039 5043 * conflicts with setting the ACL. We delay setting
5040 5044 * the ACL until all other attributes have been set.
5041 5045 * The ACL gets set in do_rfs4_op_setattr().
5042 5046 */
5043 5047 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5044 5048 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5045 5049 sargp, nap);
5046 5050 if (error) {
5047 5051 xdr_free(nfs4_ntov_map[k].xfunc,
5048 5052 (caddr_t)nap);
5049 5053 }
5050 5054 }
5051 5055 }
5052 5056 } else {
5053 5057 #ifdef DEBUG
5054 5058 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5055 5059 "decoding attribute %d\n", k);
5056 5060 #endif
5057 5061 error = EINVAL;
5058 5062 }
5059 5063 if (!error && resp_bval && !set_later) {
5060 5064 *resp_bval |= nfs4_ntov_map[k].fbit;
5061 5065 }
5062 5066
5063 5067 return (error);
5064 5068 }
5065 5069
5066 5070 /*
5067 5071 * Set vattr based on incoming fattr4 attrs - used by setattr.
5068 5072 * Set response mask. Ignore any values that are not writable vattr attrs.
5069 5073 */
5070 5074 static nfsstat4
5071 5075 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5072 5076 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5073 5077 nfs4_attr_cmd_t cmd)
5074 5078 {
5075 5079 int error = 0;
5076 5080 int i;
5077 5081 char *attrs = fattrp->attrlist4;
5078 5082 uint32_t attrslen = fattrp->attrlist4_len;
5079 5083 XDR xdr;
5080 5084 nfsstat4 status = NFS4_OK;
5081 5085 vnode_t *vp = cs->vp;
5082 5086 union nfs4_attr_u *na;
5083 5087 uint8_t *amap;
5084 5088
5085 5089 #ifndef lint
5086 5090 /*
5087 5091 * Make sure that maximum attribute number can be expressed as an
5088 5092 * 8 bit quantity.
5089 5093 */
5090 5094 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5091 5095 #endif
5092 5096
5093 5097 if (vp == NULL) {
5094 5098 if (resp)
5095 5099 *resp = 0;
5096 5100 return (NFS4ERR_NOFILEHANDLE);
5097 5101 }
5098 5102 if (cs->access == CS_ACCESS_DENIED) {
5099 5103 if (resp)
5100 5104 *resp = 0;
5101 5105 return (NFS4ERR_ACCESS);
5102 5106 }
5103 5107
5104 5108 sargp->op = cmd;
5105 5109 sargp->cs = cs;
5106 5110 sargp->flag = 0; /* may be set later */
5107 5111 sargp->vap->va_mask = 0;
5108 5112 sargp->rdattr_error = NFS4_OK;
5109 5113 sargp->rdattr_error_req = FALSE;
5110 5114 /* sargp->sbp is set by the caller */
5111 5115
5112 5116 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5113 5117
5114 5118 na = ntovp->na;
5115 5119 amap = ntovp->amap;
5116 5120
5117 5121 /*
5118 5122 * The following loop iterates on the nfs4_ntov_map checking
5119 5123 * if the fbit is set in the requested bitmap.
5120 5124 * If set then we process the arguments using the
5121 5125 * rfs4_fattr4 conversion functions to populate the setattr
5122 5126 * vattr and va_mask. Any settable attrs that are not using vattr
5123 5127 * will be set in this loop.
5124 5128 */
5125 5129 for (i = 0; i < nfs4_ntov_map_size; i++) {
5126 5130 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5127 5131 continue;
5128 5132 }
5129 5133 /*
5130 5134 * If setattr, must be a writable attr.
5131 5135 * If verify/nverify, must be a readable attr.
5132 5136 */
5133 5137 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5134 5138 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5135 5139 /*
5136 5140 * Client tries to set/verify an
5137 5141 * unsupported attribute, tries to set
5138 5142 * a read only attr or verify a write
5139 5143 * only one - error!
5140 5144 */
5141 5145 break;
5142 5146 }
5143 5147 /*
5144 5148 * Decode the attribute to set/verify
5145 5149 */
5146 5150 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5147 5151 &xdr, resp ? resp : NULL, na);
5148 5152 if (error)
5149 5153 break;
5150 5154 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5151 5155 na++;
5152 5156 (ntovp->attrcnt)++;
5153 5157 if (nfs4_ntov_map[i].vfsstat)
5154 5158 ntovp->vfsstat = TRUE;
5155 5159 }
5156 5160
5157 5161 if (error != 0)
5158 5162 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5159 5163 puterrno4(error));
5160 5164 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5161 5165 return (status);
5162 5166 }
5163 5167
5164 5168 static nfsstat4
5165 5169 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5166 5170 stateid4 *stateid)
5167 5171 {
5168 5172 int error = 0;
5169 5173 struct nfs4_svgetit_arg sarg;
5170 5174 bool_t trunc;
5171 5175
5172 5176 nfsstat4 status = NFS4_OK;
5173 5177 cred_t *cr = cs->cr;
5174 5178 vnode_t *vp = cs->vp;
5175 5179 struct nfs4_ntov_table ntov;
5176 5180 struct statvfs64 sb;
5177 5181 struct vattr bva;
5178 5182 struct flock64 bf;
5179 5183 int in_crit = 0;
5180 5184 uint_t saved_mask = 0;
5181 5185 caller_context_t ct;
5182 5186
5183 5187 *resp = 0;
5184 5188 sarg.sbp = &sb;
5185 5189 sarg.is_referral = B_FALSE;
5186 5190 nfs4_ntov_table_init(&ntov);
5187 5191 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5188 5192 NFS4ATTR_SETIT);
5189 5193 if (status != NFS4_OK) {
5190 5194 /*
5191 5195 * failed set attrs
5192 5196 */
5193 5197 goto done;
5194 5198 }
5195 5199 if ((sarg.vap->va_mask == 0) &&
5196 5200 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5197 5201 /*
5198 5202 * no further work to be done
5199 5203 */
5200 5204 goto done;
5201 5205 }
5202 5206
5203 5207 /*
5204 5208 * If we got a request to set the ACL and the MODE, only
5205 5209 * allow changing VSUID, VSGID, and VSVTX. Attempting
5206 5210 * to change any other bits, along with setting an ACL,
5207 5211 * gives NFS4ERR_INVAL.
5208 5212 */
5209 5213 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5210 5214 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5211 5215 vattr_t va;
5212 5216
5213 5217 va.va_mask = AT_MODE;
5214 5218 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5215 5219 if (error) {
5216 5220 status = puterrno4(error);
5217 5221 goto done;
5218 5222 }
5219 5223 if ((sarg.vap->va_mode ^ va.va_mode) &
5220 5224 ~(VSUID | VSGID | VSVTX)) {
5221 5225 status = NFS4ERR_INVAL;
5222 5226 goto done;
5223 5227 }
5224 5228 }
5225 5229
5226 5230 /* Check stateid only if size has been set */
5227 5231 if (sarg.vap->va_mask & AT_SIZE) {
5228 5232 trunc = (sarg.vap->va_size == 0);
5229 5233 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5230 5234 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5231 5235 if (status != NFS4_OK)
5232 5236 goto done;
5233 5237 } else {
5234 5238 ct.cc_sysid = 0;
5235 5239 ct.cc_pid = 0;
5236 5240 ct.cc_caller_id = nfs4_srv_caller_id;
5237 5241 ct.cc_flags = CC_DONTBLOCK;
5238 5242 }
5239 5243
5240 5244 /* XXX start of possible race with delegations */
5241 5245
5242 5246 /*
5243 5247 * We need to specially handle size changes because it is
5244 5248 * possible for the client to create a file with read-only
5245 5249 * modes, but with the file opened for writing. If the client
5246 5250 * then tries to set the file size, e.g. ftruncate(3C),
5247 5251 * fcntl(F_FREESP), the normal access checking done in
5248 5252 * VOP_SETATTR would prevent the client from doing it even though
5249 5253 * it should be allowed to do so. To get around this, we do the
5250 5254 * access checking for ourselves and use VOP_SPACE which doesn't
5251 5255 * do the access checking.
5252 5256 * Also the client should not be allowed to change the file
5253 5257 * size if there is a conflicting non-blocking mandatory lock in
5254 5258 * the region of the change.
5255 5259 */
5256 5260 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5257 5261 u_offset_t offset;
5258 5262 ssize_t length;
5259 5263
5260 5264 /*
5261 5265 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5262 5266 * before returning, sarg.vap->va_mask is used to
5263 5267 * generate the setattr reply bitmap. We also clear
5264 5268 * AT_SIZE below before calling VOP_SPACE. For both
5265 5269 * of these cases, the va_mask needs to be saved here
5266 5270 * and restored after calling VOP_SETATTR.
5267 5271 */
5268 5272 saved_mask = sarg.vap->va_mask;
5269 5273
5270 5274 /*
5271 5275 * Check any possible conflict due to NBMAND locks.
5272 5276 * Get into critical region before VOP_GETATTR, so the
5273 5277 * size attribute is valid when checking conflicts.
5274 5278 */
5275 5279 if (nbl_need_check(vp)) {
5276 5280 nbl_start_crit(vp, RW_READER);
5277 5281 in_crit = 1;
5278 5282 }
5279 5283
5280 5284 bva.va_mask = AT_UID|AT_SIZE;
5281 5285 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5282 5286 status = puterrno4(error);
5283 5287 goto done;
5284 5288 }
5285 5289
5286 5290 if (in_crit) {
5287 5291 if (sarg.vap->va_size < bva.va_size) {
5288 5292 offset = sarg.vap->va_size;
5289 5293 length = bva.va_size - sarg.vap->va_size;
5290 5294 } else {
5291 5295 offset = bva.va_size;
5292 5296 length = sarg.vap->va_size - bva.va_size;
5293 5297 }
5294 5298 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5295 5299 &ct)) {
5296 5300 status = NFS4ERR_LOCKED;
5297 5301 goto done;
5298 5302 }
5299 5303 }
5300 5304
5301 5305 if (crgetuid(cr) == bva.va_uid) {
5302 5306 sarg.vap->va_mask &= ~AT_SIZE;
5303 5307 bf.l_type = F_WRLCK;
5304 5308 bf.l_whence = 0;
5305 5309 bf.l_start = (off64_t)sarg.vap->va_size;
5306 5310 bf.l_len = 0;
5307 5311 bf.l_sysid = 0;
5308 5312 bf.l_pid = 0;
5309 5313 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5310 5314 (offset_t)sarg.vap->va_size, cr, &ct);
5311 5315 }
5312 5316 }
5313 5317
5314 5318 if (!error && sarg.vap->va_mask != 0)
5315 5319 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5316 5320
5317 5321 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5318 5322 if (saved_mask & AT_SIZE)
5319 5323 sarg.vap->va_mask |= AT_SIZE;
5320 5324
5321 5325 /*
5322 5326 * If an ACL was being set, it has been delayed until now,
5323 5327 * in order to set the mode (via the VOP_SETATTR() above) first.
5324 5328 */
5325 5329 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5326 5330 int i;
5327 5331
5328 5332 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5329 5333 if (ntov.amap[i] == FATTR4_ACL)
5330 5334 break;
5331 5335 if (i < NFS4_MAXNUM_ATTRS) {
5332 5336 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5333 5337 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5334 5338 if (error == 0) {
5335 5339 *resp |= FATTR4_ACL_MASK;
5336 5340 } else if (error == ENOTSUP) {
5337 5341 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5338 5342 status = NFS4ERR_ATTRNOTSUPP;
5339 5343 goto done;
5340 5344 }
5341 5345 } else {
5342 5346 NFS4_DEBUG(rfs4_debug,
5343 5347 (CE_NOTE, "do_rfs4_op_setattr: "
5344 5348 "unable to find ACL in fattr4"));
5345 5349 error = EINVAL;
5346 5350 }
5347 5351 }
5348 5352
5349 5353 if (error) {
5350 5354 /* check if a monitor detected a delegation conflict */
5351 5355 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5352 5356 status = NFS4ERR_DELAY;
5353 5357 else
5354 5358 status = puterrno4(error);
5355 5359
5356 5360 /*
5357 5361 * Set the response bitmap when setattr failed.
5358 5362 * If VOP_SETATTR partially succeeded, test by doing a
5359 5363 * VOP_GETATTR on the object and comparing the data
5360 5364 * to the setattr arguments.
5361 5365 */
5362 5366 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5363 5367 } else {
5364 5368 /*
5365 5369 * Force modified metadata out to stable storage.
5366 5370 */
5367 5371 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5368 5372 /*
5369 5373 * Set response bitmap
5370 5374 */
5371 5375 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5372 5376 }
5373 5377
5374 5378 /* Return early and already have a NFSv4 error */
5375 5379 done:
5376 5380 /*
5377 5381 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5378 5382 * conversion sets both readable and writeable NFS4 attrs
5379 5383 * for AT_MTIME and AT_ATIME. The line below masks out
5380 5384 * unrequested attrs from the setattr result bitmap. This
5381 5385 * is placed after the done: label to catch the ATTRNOTSUP
5382 5386 * case.
5383 5387 */
5384 5388 *resp &= fattrp->attrmask;
5385 5389
5386 5390 if (in_crit)
5387 5391 nbl_end_crit(vp);
5388 5392
5389 5393 nfs4_ntov_table_free(&ntov, &sarg);
5390 5394
5391 5395 return (status);
5392 5396 }
5393 5397
5394 5398 /* ARGSUSED */
5395 5399 static void
5396 5400 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5397 5401 struct compound_state *cs)
5398 5402 {
5399 5403 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5400 5404 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5401 5405 bslabel_t *clabel;
5402 5406
5403 5407 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5404 5408 SETATTR4args *, args);
5405 5409
5406 5410 if (cs->vp == NULL) {
5407 5411 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5408 5412 goto out;
5409 5413 }
5410 5414
5411 5415 /*
5412 5416 * If there is an unshared filesystem mounted on this vnode,
5413 5417 * do not allow to setattr on this vnode.
5414 5418 */
5415 5419 if (vn_ismntpt(cs->vp)) {
5416 5420 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5417 5421 goto out;
5418 5422 }
5419 5423
5420 5424 resp->attrsset = 0;
5421 5425
5422 5426 if (rdonly4(req, cs)) {
5423 5427 *cs->statusp = resp->status = NFS4ERR_ROFS;
5424 5428 goto out;
5425 5429 }
5426 5430
5427 5431 /* check label before setting attributes */
5428 5432 if (is_system_labeled()) {
5429 5433 ASSERT(req->rq_label != NULL);
5430 5434 clabel = req->rq_label;
5431 5435 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5432 5436 "got client label from request(1)",
5433 5437 struct svc_req *, req);
5434 5438 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5435 5439 if (!do_rfs_label_check(clabel, cs->vp,
5436 5440 EQUALITY_CHECK, cs->exi)) {
5437 5441 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5438 5442 goto out;
5439 5443 }
5440 5444 }
5441 5445 }
5442 5446
5443 5447 *cs->statusp = resp->status =
5444 5448 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5445 5449 &args->stateid);
5446 5450
5447 5451 out:
5448 5452 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5449 5453 SETATTR4res *, resp);
5450 5454 }
5451 5455
5452 5456 /* ARGSUSED */
5453 5457 static void
5454 5458 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5455 5459 struct compound_state *cs)
5456 5460 {
5457 5461 /*
5458 5462 * verify and nverify are exactly the same, except that nverify
5459 5463 * succeeds when some argument changed, and verify succeeds when
5460 5464 * when none changed.
5461 5465 */
5462 5466
5463 5467 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5464 5468 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5465 5469
5466 5470 int error;
5467 5471 struct nfs4_svgetit_arg sarg;
5468 5472 struct statvfs64 sb;
5469 5473 struct nfs4_ntov_table ntov;
5470 5474
5471 5475 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5472 5476 VERIFY4args *, args);
5473 5477
5474 5478 if (cs->vp == NULL) {
5475 5479 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5476 5480 goto out;
5477 5481 }
5478 5482
5479 5483 sarg.sbp = &sb;
5480 5484 sarg.is_referral = B_FALSE;
5481 5485 nfs4_ntov_table_init(&ntov);
5482 5486 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5483 5487 &sarg, &ntov, NFS4ATTR_VERIT);
5484 5488 if (resp->status != NFS4_OK) {
5485 5489 /*
5486 5490 * do_rfs4_set_attrs will try to verify systemwide attrs,
5487 5491 * so could return -1 for "no match".
5488 5492 */
5489 5493 if (resp->status == -1)
5490 5494 resp->status = NFS4ERR_NOT_SAME;
5491 5495 goto done;
5492 5496 }
5493 5497 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5494 5498 switch (error) {
5495 5499 case 0:
5496 5500 resp->status = NFS4_OK;
5497 5501 break;
5498 5502 case -1:
5499 5503 resp->status = NFS4ERR_NOT_SAME;
5500 5504 break;
5501 5505 default:
5502 5506 resp->status = puterrno4(error);
5503 5507 break;
5504 5508 }
5505 5509 done:
5506 5510 *cs->statusp = resp->status;
5507 5511 nfs4_ntov_table_free(&ntov, &sarg);
5508 5512 out:
5509 5513 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5510 5514 VERIFY4res *, resp);
5511 5515 }
5512 5516
5513 5517 /* ARGSUSED */
5514 5518 static void
5515 5519 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5516 5520 struct compound_state *cs)
5517 5521 {
5518 5522 /*
5519 5523 * verify and nverify are exactly the same, except that nverify
5520 5524 * succeeds when some argument changed, and verify succeeds when
5521 5525 * when none changed.
5522 5526 */
5523 5527
5524 5528 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5525 5529 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5526 5530
5527 5531 int error;
5528 5532 struct nfs4_svgetit_arg sarg;
5529 5533 struct statvfs64 sb;
5530 5534 struct nfs4_ntov_table ntov;
5531 5535
5532 5536 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5533 5537 NVERIFY4args *, args);
5534 5538
5535 5539 if (cs->vp == NULL) {
5536 5540 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5537 5541 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5538 5542 NVERIFY4res *, resp);
5539 5543 return;
5540 5544 }
5541 5545 sarg.sbp = &sb;
5542 5546 sarg.is_referral = B_FALSE;
5543 5547 nfs4_ntov_table_init(&ntov);
5544 5548 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5545 5549 &sarg, &ntov, NFS4ATTR_VERIT);
5546 5550 if (resp->status != NFS4_OK) {
5547 5551 /*
5548 5552 * do_rfs4_set_attrs will try to verify systemwide attrs,
5549 5553 * so could return -1 for "no match".
5550 5554 */
5551 5555 if (resp->status == -1)
5552 5556 resp->status = NFS4_OK;
5553 5557 goto done;
5554 5558 }
5555 5559 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5556 5560 switch (error) {
5557 5561 case 0:
5558 5562 resp->status = NFS4ERR_SAME;
5559 5563 break;
5560 5564 case -1:
5561 5565 resp->status = NFS4_OK;
5562 5566 break;
5563 5567 default:
5564 5568 resp->status = puterrno4(error);
5565 5569 break;
5566 5570 }
5567 5571 done:
5568 5572 *cs->statusp = resp->status;
5569 5573 nfs4_ntov_table_free(&ntov, &sarg);
5570 5574
5571 5575 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5572 5576 NVERIFY4res *, resp);
5573 5577 }
5574 5578
5575 5579 /*
5576 5580 * XXX - This should live in an NFS header file.
5577 5581 */
5578 5582 #define MAX_IOVECS 12
5579 5583
5580 5584 /* ARGSUSED */
5581 5585 static void
5582 5586 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5583 5587 struct compound_state *cs)
5584 5588 {
5585 5589 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5586 5590 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5587 5591 int error;
5588 5592 vnode_t *vp;
5589 5593 struct vattr bva;
5590 5594 u_offset_t rlimit;
5591 5595 struct uio uio;
5592 5596 struct iovec iov[MAX_IOVECS];
5593 5597 struct iovec *iovp;
5594 5598 int iovcnt;
5595 5599 int ioflag;
5596 5600 cred_t *savecred, *cr;
5597 5601 bool_t *deleg = &cs->deleg;
5598 5602 nfsstat4 stat;
5599 5603 int in_crit = 0;
5600 5604 caller_context_t ct;
5601 5605 nfs4_srv_t *nsrv4;
5602 5606
5603 5607 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5604 5608 WRITE4args *, args);
5605 5609
5606 5610 vp = cs->vp;
5607 5611 if (vp == NULL) {
5608 5612 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5609 5613 goto out;
5610 5614 }
5611 5615 if (cs->access == CS_ACCESS_DENIED) {
5612 5616 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5613 5617 goto out;
5614 5618 }
5615 5619
5616 5620 cr = cs->cr;
5617 5621
5618 5622 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5619 5623 deleg, TRUE, &ct)) != NFS4_OK) {
5620 5624 *cs->statusp = resp->status = stat;
5621 5625 goto out;
5622 5626 }
5623 5627
5624 5628 /*
5625 5629 * We have to enter the critical region before calling VOP_RWLOCK
5626 5630 * to avoid a deadlock with ufs.
5627 5631 */
5628 5632 if (nbl_need_check(vp)) {
5629 5633 nbl_start_crit(vp, RW_READER);
5630 5634 in_crit = 1;
5631 5635 if (nbl_conflict(vp, NBL_WRITE,
5632 5636 args->offset, args->data_len, 0, &ct)) {
5633 5637 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5634 5638 goto out;
5635 5639 }
5636 5640 }
5637 5641
5638 5642 bva.va_mask = AT_MODE | AT_UID;
5639 5643 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5640 5644
5641 5645 /*
5642 5646 * If we can't get the attributes, then we can't do the
5643 5647 * right access checking. So, we'll fail the request.
5644 5648 */
5645 5649 if (error) {
5646 5650 *cs->statusp = resp->status = puterrno4(error);
5647 5651 goto out;
5648 5652 }
5649 5653
5650 5654 if (rdonly4(req, cs)) {
5651 5655 *cs->statusp = resp->status = NFS4ERR_ROFS;
5652 5656 goto out;
5653 5657 }
5654 5658
5655 5659 if (vp->v_type != VREG) {
5656 5660 *cs->statusp = resp->status =
5657 5661 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5658 5662 goto out;
5659 5663 }
5660 5664
5661 5665 if (crgetuid(cr) != bva.va_uid &&
5662 5666 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5663 5667 *cs->statusp = resp->status = puterrno4(error);
5664 5668 goto out;
5665 5669 }
5666 5670
5667 5671 if (MANDLOCK(vp, bva.va_mode)) {
5668 5672 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5669 5673 goto out;
5670 5674 }
5671 5675
5672 5676 nsrv4 = nfs4_get_srv();
5673 5677 if (args->data_len == 0) {
5674 5678 *cs->statusp = resp->status = NFS4_OK;
5675 5679 resp->count = 0;
5676 5680 resp->committed = args->stable;
5677 5681 resp->writeverf = nsrv4->write4verf;
5678 5682 goto out;
5679 5683 }
5680 5684
5681 5685 if (args->mblk != NULL) {
5682 5686 mblk_t *m;
5683 5687 uint_t bytes, round_len;
5684 5688
5685 5689 iovcnt = 0;
5686 5690 bytes = 0;
5687 5691 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5688 5692 for (m = args->mblk;
5689 5693 m != NULL && bytes < round_len;
5690 5694 m = m->b_cont) {
5691 5695 iovcnt++;
5692 5696 bytes += MBLKL(m);
5693 5697 }
5694 5698 #ifdef DEBUG
5695 5699 /* should have ended on an mblk boundary */
5696 5700 if (bytes != round_len) {
5697 5701 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5698 5702 bytes, round_len, args->data_len);
5699 5703 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5700 5704 (void *)args->mblk, (void *)m);
5701 5705 ASSERT(bytes == round_len);
5702 5706 }
5703 5707 #endif
5704 5708 if (iovcnt <= MAX_IOVECS) {
5705 5709 iovp = iov;
5706 5710 } else {
5707 5711 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5708 5712 }
5709 5713 mblk_to_iov(args->mblk, iovcnt, iovp);
5710 5714 } else if (args->rlist != NULL) {
5711 5715 iovcnt = 1;
5712 5716 iovp = iov;
5713 5717 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5714 5718 iovp->iov_len = args->data_len;
5715 5719 } else {
5716 5720 iovcnt = 1;
5717 5721 iovp = iov;
5718 5722 iovp->iov_base = args->data_val;
5719 5723 iovp->iov_len = args->data_len;
5720 5724 }
5721 5725
5722 5726 uio.uio_iov = iovp;
5723 5727 uio.uio_iovcnt = iovcnt;
5724 5728
5725 5729 uio.uio_segflg = UIO_SYSSPACE;
5726 5730 uio.uio_extflg = UIO_COPY_DEFAULT;
5727 5731 uio.uio_loffset = args->offset;
5728 5732 uio.uio_resid = args->data_len;
5729 5733 uio.uio_llimit = curproc->p_fsz_ctl;
5730 5734 rlimit = uio.uio_llimit - args->offset;
5731 5735 if (rlimit < (u_offset_t)uio.uio_resid)
5732 5736 uio.uio_resid = (int)rlimit;
5733 5737
5734 5738 if (args->stable == UNSTABLE4)
5735 5739 ioflag = 0;
5736 5740 else if (args->stable == FILE_SYNC4)
5737 5741 ioflag = FSYNC;
5738 5742 else if (args->stable == DATA_SYNC4)
5739 5743 ioflag = FDSYNC;
5740 5744 else {
5741 5745 if (iovp != iov)
5742 5746 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5743 5747 *cs->statusp = resp->status = NFS4ERR_INVAL;
5744 5748 goto out;
5745 5749 }
5746 5750
5747 5751 /*
5748 5752 * We're changing creds because VM may fault and we need
5749 5753 * the cred of the current thread to be used if quota
5750 5754 * checking is enabled.
5751 5755 */
5752 5756 savecred = curthread->t_cred;
5753 5757 curthread->t_cred = cr;
5754 5758 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5755 5759 curthread->t_cred = savecred;
5756 5760
5757 5761 if (iovp != iov)
5758 5762 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5759 5763
5760 5764 if (error) {
5761 5765 *cs->statusp = resp->status = puterrno4(error);
5762 5766 goto out;
5763 5767 }
5764 5768
5765 5769 *cs->statusp = resp->status = NFS4_OK;
5766 5770 resp->count = args->data_len - uio.uio_resid;
5767 5771
5768 5772 if (ioflag == 0)
5769 5773 resp->committed = UNSTABLE4;
5770 5774 else
5771 5775 resp->committed = FILE_SYNC4;
5772 5776
5773 5777 resp->writeverf = nsrv4->write4verf;
5774 5778
5775 5779 out:
5776 5780 if (in_crit)
5777 5781 nbl_end_crit(vp);
5778 5782
5779 5783 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5780 5784 WRITE4res *, resp);
5781 5785 }
5782 5786
5783 5787
5784 5788 /* XXX put in a header file */
5785 5789 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5786 5790
5787 5791 void
5788 5792 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5789 5793 struct svc_req *req, cred_t *cr, int *rv)
5790 5794 {
5791 5795 uint_t i;
5792 5796 struct compound_state cs;
5793 5797 nfs4_srv_t *nsrv4;
5794 5798 nfs_export_t *ne = nfs_get_export();
5795 5799
5796 5800 if (rv != NULL)
5797 5801 *rv = 0;
5798 5802 rfs4_init_compound_state(&cs);
5799 5803 /*
5800 5804 * Form a reply tag by copying over the reqeuest tag.
5801 5805 */
5802 5806 resp->tag.utf8string_val =
5803 5807 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5804 5808 resp->tag.utf8string_len = args->tag.utf8string_len;
5805 5809 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5806 5810 resp->tag.utf8string_len);
5807 5811
5808 5812 cs.statusp = &resp->status;
5809 5813 cs.req = req;
5810 5814 resp->array = NULL;
5811 5815 resp->array_len = 0;
5812 5816
5813 5817 /*
5814 5818 * XXX for now, minorversion should be zero
5815 5819 */
5816 5820 if (args->minorversion != NFS4_MINORVERSION) {
5817 5821 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5818 5822 &cs, COMPOUND4args *, args);
5819 5823 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5820 5824 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5821 5825 &cs, COMPOUND4res *, resp);
5822 5826 return;
5823 5827 }
5824 5828
5825 5829 if (args->array_len == 0) {
5826 5830 resp->status = NFS4_OK;
5827 5831 return;
5828 5832 }
5829 5833
5830 5834 ASSERT(exi == NULL);
5831 5835 ASSERT(cr == NULL);
5832 5836
5833 5837 cr = crget();
5834 5838 ASSERT(cr != NULL);
5835 5839
5836 5840 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5837 5841 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5838 5842 &cs, COMPOUND4args *, args);
5839 5843 crfree(cr);
5840 5844 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5841 5845 &cs, COMPOUND4res *, resp);
5842 5846 svcerr_badcred(req->rq_xprt);
5843 5847 if (rv != NULL)
5844 5848 *rv = 1;
5845 5849 return;
5846 5850 }
5847 5851 resp->array_len = args->array_len;
5848 5852 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5849 5853 KM_SLEEP);
5850 5854
5851 5855 cs.basecr = cr;
5852 5856 nsrv4 = nfs4_get_srv();
5853 5857
5854 5858 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5855 5859 COMPOUND4args *, args);
5856 5860
5857 5861 /*
5858 5862 * For now, NFS4 compound processing must be protected by
5859 5863 * exported_lock because it can access more than one exportinfo
5860 5864 * per compound and share/unshare can now change multiple
5861 5865 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5862 5866 * per proc (excluding public exinfo), and exi_count design
5863 5867 * is sufficient to protect concurrent execution of NFS2/3
5864 5868 * ops along with unexport. This lock will be removed as
5865 5869 * part of the NFSv4 phase 2 namespace redesign work.
5866 5870 */
5867 5871 rw_enter(&ne->exported_lock, RW_READER);
5868 5872
5869 5873 /*
5870 5874 * If this is the first compound we've seen, we need to start all
5871 5875 * new instances' grace periods.
5872 5876 */
5873 5877 if (nsrv4->seen_first_compound == 0) {
5874 5878 rfs4_grace_start_new(nsrv4);
5875 5879 /*
5876 5880 * This must be set after rfs4_grace_start_new(), otherwise
5877 5881 * another thread could proceed past here before the former
5878 5882 * is finished.
5879 5883 */
5880 5884 nsrv4->seen_first_compound = 1;
5881 5885 }
5882 5886
5883 5887 for (i = 0; i < args->array_len && cs.cont; i++) {
5884 5888 nfs_argop4 *argop;
5885 5889 nfs_resop4 *resop;
5886 5890 uint_t op;
5887 5891
5888 5892 argop = &args->array[i];
5889 5893 resop = &resp->array[i];
5890 5894 resop->resop = argop->argop;
5891 5895 op = (uint_t)resop->resop;
5892 5896
5893 5897 if (op < rfsv4disp_cnt) {
5894 5898 /*
5895 5899 * Count the individual ops here; NULL and COMPOUND
5896 5900 * are counted in common_dispatch()
5897 5901 */
5898 5902 rfsproccnt_v4_ptr[op].value.ui64++;
5899 5903
5900 5904 NFS4_DEBUG(rfs4_debug > 1,
5901 5905 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5902 5906 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5903 5907 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5904 5908 rfs4_op_string[op], *cs.statusp));
5905 5909 if (*cs.statusp != NFS4_OK)
5906 5910 cs.cont = FALSE;
5907 5911 } else {
5908 5912 /*
5909 5913 * This is effectively dead code since XDR code
5910 5914 * will have already returned BADXDR if op doesn't
5911 5915 * decode to legal value. This only done for a
5912 5916 * day when XDR code doesn't verify v4 opcodes.
5913 5917 */
5914 5918 op = OP_ILLEGAL;
5915 5919 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5916 5920
5917 5921 rfs4_op_illegal(argop, resop, req, &cs);
5918 5922 cs.cont = FALSE;
5919 5923 }
5920 5924
5921 5925 /*
5922 5926 * If not at last op, and if we are to stop, then
5923 5927 * compact the results array.
5924 5928 */
5925 5929 if ((i + 1) < args->array_len && !cs.cont) {
5926 5930 nfs_resop4 *new_res = kmem_alloc(
5927 5931 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5928 5932 bcopy(resp->array,
5929 5933 new_res, (i+1) * sizeof (nfs_resop4));
5930 5934 kmem_free(resp->array,
5931 5935 args->array_len * sizeof (nfs_resop4));
5932 5936
5933 5937 resp->array_len = i + 1;
5934 5938 resp->array = new_res;
5935 5939 }
5936 5940 }
5937 5941
5938 5942 rw_exit(&ne->exported_lock);
5939 5943
5940 5944 /*
5941 5945 * clear exportinfo and vnode fields from compound_state before dtrace
5942 5946 * probe, to avoid tracing residual values for path and share path.
5943 5947 */
5944 5948 if (cs.vp)
5945 5949 VN_RELE(cs.vp);
5946 5950 if (cs.saved_vp)
5947 5951 VN_RELE(cs.saved_vp);
5948 5952 cs.exi = cs.saved_exi = NULL;
5949 5953 cs.vp = cs.saved_vp = NULL;
5950 5954
5951 5955 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5952 5956 COMPOUND4res *, resp);
5953 5957
5954 5958 if (cs.saved_fh.nfs_fh4_val)
5955 5959 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5956 5960
5957 5961 if (cs.basecr)
5958 5962 crfree(cs.basecr);
5959 5963 if (cs.cr)
5960 5964 crfree(cs.cr);
5961 5965 /*
5962 5966 * done with this compound request, free the label
5963 5967 */
5964 5968
5965 5969 if (req->rq_label != NULL) {
5966 5970 kmem_free(req->rq_label, sizeof (bslabel_t));
5967 5971 req->rq_label = NULL;
5968 5972 }
5969 5973 }
5970 5974
5971 5975 /*
5972 5976 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5973 5977 * XXX zero out the tag and array values. Need to investigate why the
5974 5978 * XXX calls occur, but at least prevent the panic for now.
5975 5979 */
5976 5980 void
5977 5981 rfs4_compound_free(COMPOUND4res *resp)
5978 5982 {
5979 5983 uint_t i;
5980 5984
5981 5985 if (resp->tag.utf8string_val) {
5982 5986 UTF8STRING_FREE(resp->tag)
5983 5987 }
5984 5988
5985 5989 for (i = 0; i < resp->array_len; i++) {
5986 5990 nfs_resop4 *resop;
5987 5991 uint_t op;
5988 5992
5989 5993 resop = &resp->array[i];
5990 5994 op = (uint_t)resop->resop;
5991 5995 if (op < rfsv4disp_cnt) {
5992 5996 (*rfsv4disptab[op].dis_resfree)(resop);
5993 5997 }
5994 5998 }
5995 5999 if (resp->array != NULL) {
5996 6000 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5997 6001 }
5998 6002 }
5999 6003
6000 6004 /*
6001 6005 * Process the value of the compound request rpc flags, as a bit-AND
6002 6006 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6003 6007 */
6004 6008 void
6005 6009 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6006 6010 {
6007 6011 int i;
6008 6012 int flag = RPC_ALL;
6009 6013
6010 6014 for (i = 0; flag && i < args->array_len; i++) {
6011 6015 uint_t op;
6012 6016
6013 6017 op = (uint_t)args->array[i].argop;
6014 6018
6015 6019 if (op < rfsv4disp_cnt)
6016 6020 flag &= rfsv4disptab[op].dis_flags;
6017 6021 else
6018 6022 flag = 0;
6019 6023 }
6020 6024 *flagp = flag;
6021 6025 }
6022 6026
6023 6027 nfsstat4
6024 6028 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6025 6029 {
6026 6030 nfsstat4 e;
6027 6031
6028 6032 rfs4_dbe_lock(cp->rc_dbe);
6029 6033
6030 6034 if (cp->rc_sysidt != LM_NOSYSID) {
6031 6035 *sp = cp->rc_sysidt;
6032 6036 e = NFS4_OK;
6033 6037
6034 6038 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6035 6039 *sp = cp->rc_sysidt;
6036 6040 e = NFS4_OK;
6037 6041
6038 6042 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6039 6043 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6040 6044 } else
6041 6045 e = NFS4ERR_DELAY;
6042 6046
6043 6047 rfs4_dbe_unlock(cp->rc_dbe);
6044 6048 return (e);
6045 6049 }
6046 6050
6047 6051 #if defined(DEBUG) && ! defined(lint)
6048 6052 static void lock_print(char *str, int operation, struct flock64 *flk)
6049 6053 {
6050 6054 char *op, *type;
6051 6055
6052 6056 switch (operation) {
6053 6057 case F_GETLK: op = "F_GETLK";
6054 6058 break;
6055 6059 case F_SETLK: op = "F_SETLK";
6056 6060 break;
6057 6061 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6058 6062 break;
6059 6063 default: op = "F_UNKNOWN";
6060 6064 break;
6061 6065 }
6062 6066 switch (flk->l_type) {
6063 6067 case F_UNLCK: type = "F_UNLCK";
6064 6068 break;
6065 6069 case F_RDLCK: type = "F_RDLCK";
6066 6070 break;
6067 6071 case F_WRLCK: type = "F_WRLCK";
6068 6072 break;
6069 6073 default: type = "F_UNKNOWN";
6070 6074 break;
6071 6075 }
6072 6076
6073 6077 ASSERT(flk->l_whence == 0);
6074 6078 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6075 6079 str, op, type, (longlong_t)flk->l_start,
6076 6080 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6077 6081 }
6078 6082
6079 6083 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6080 6084 #else
6081 6085 #define LOCK_PRINT(d, s, t, f)
6082 6086 #endif
6083 6087
6084 6088 /*ARGSUSED*/
6085 6089 static bool_t
6086 6090 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6087 6091 {
6088 6092 return (TRUE);
6089 6093 }
6090 6094
6091 6095 /*
6092 6096 * Look up the pathname using the vp in cs as the directory vnode.
6093 6097 * cs->vp will be the vnode for the file on success
6094 6098 */
6095 6099
6096 6100 static nfsstat4
6097 6101 rfs4_lookup(component4 *component, struct svc_req *req,
6098 6102 struct compound_state *cs)
6099 6103 {
6100 6104 char *nm;
6101 6105 uint32_t len;
6102 6106 nfsstat4 status;
6103 6107 struct sockaddr *ca;
6104 6108 char *name;
6105 6109
6106 6110 if (cs->vp == NULL) {
6107 6111 return (NFS4ERR_NOFILEHANDLE);
6108 6112 }
6109 6113 if (cs->vp->v_type != VDIR) {
6110 6114 return (NFS4ERR_NOTDIR);
6111 6115 }
6112 6116
6113 6117 status = utf8_dir_verify(component);
6114 6118 if (status != NFS4_OK)
6115 6119 return (status);
6116 6120
6117 6121 nm = utf8_to_fn(component, &len, NULL);
6118 6122 if (nm == NULL) {
6119 6123 return (NFS4ERR_INVAL);
6120 6124 }
6121 6125
6122 6126 if (len > MAXNAMELEN) {
6123 6127 kmem_free(nm, len);
6124 6128 return (NFS4ERR_NAMETOOLONG);
6125 6129 }
6126 6130
6127 6131 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6128 6132 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6129 6133 MAXPATHLEN + 1);
6130 6134
6131 6135 if (name == NULL) {
6132 6136 kmem_free(nm, len);
6133 6137 return (NFS4ERR_INVAL);
6134 6138 }
6135 6139
6136 6140 status = do_rfs4_op_lookup(name, req, cs);
6137 6141
6138 6142 if (name != nm)
6139 6143 kmem_free(name, MAXPATHLEN + 1);
6140 6144
6141 6145 kmem_free(nm, len);
6142 6146
6143 6147 return (status);
6144 6148 }
6145 6149
6146 6150 static nfsstat4
6147 6151 rfs4_lookupfile(component4 *component, struct svc_req *req,
6148 6152 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6149 6153 {
6150 6154 nfsstat4 status;
6151 6155 vnode_t *dvp = cs->vp;
6152 6156 vattr_t bva, ava, fva;
6153 6157 int error;
6154 6158
6155 6159 /* Get "before" change value */
6156 6160 bva.va_mask = AT_CTIME|AT_SEQ;
6157 6161 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6158 6162 if (error)
6159 6163 return (puterrno4(error));
6160 6164
6161 6165 /* rfs4_lookup may VN_RELE directory */
6162 6166 VN_HOLD(dvp);
6163 6167
6164 6168 status = rfs4_lookup(component, req, cs);
6165 6169 if (status != NFS4_OK) {
6166 6170 VN_RELE(dvp);
6167 6171 return (status);
6168 6172 }
6169 6173
6170 6174 /*
6171 6175 * Get "after" change value, if it fails, simply return the
6172 6176 * before value.
6173 6177 */
6174 6178 ava.va_mask = AT_CTIME|AT_SEQ;
6175 6179 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6176 6180 ava.va_ctime = bva.va_ctime;
6177 6181 ava.va_seq = 0;
6178 6182 }
6179 6183 VN_RELE(dvp);
6180 6184
6181 6185 /*
6182 6186 * Validate the file is a file
6183 6187 */
6184 6188 fva.va_mask = AT_TYPE|AT_MODE;
6185 6189 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6186 6190 if (error)
6187 6191 return (puterrno4(error));
6188 6192
6189 6193 if (fva.va_type != VREG) {
6190 6194 if (fva.va_type == VDIR)
6191 6195 return (NFS4ERR_ISDIR);
6192 6196 if (fva.va_type == VLNK)
6193 6197 return (NFS4ERR_SYMLINK);
6194 6198 return (NFS4ERR_INVAL);
6195 6199 }
6196 6200
6197 6201 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6198 6202 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6199 6203
6200 6204 /*
6201 6205 * It is undefined if VOP_LOOKUP will change va_seq, so
6202 6206 * cinfo.atomic = TRUE only if we have
6203 6207 * non-zero va_seq's, and they have not changed.
6204 6208 */
6205 6209 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6206 6210 cinfo->atomic = TRUE;
6207 6211 else
6208 6212 cinfo->atomic = FALSE;
6209 6213
6210 6214 /* Check for mandatory locking */
6211 6215 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6212 6216 return (check_open_access(access, cs, req));
6213 6217 }
6214 6218
6215 6219 static nfsstat4
6216 6220 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6217 6221 cred_t *cr, vnode_t **vpp, bool_t *created)
6218 6222 {
6219 6223 int error;
6220 6224 nfsstat4 status = NFS4_OK;
6221 6225 vattr_t va;
6222 6226
6223 6227 tryagain:
6224 6228
6225 6229 /*
6226 6230 * The file open mode used is VWRITE. If the client needs
6227 6231 * some other semantic, then it should do the access checking
6228 6232 * itself. It would have been nice to have the file open mode
6229 6233 * passed as part of the arguments.
6230 6234 */
6231 6235
6232 6236 *created = TRUE;
6233 6237 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6234 6238
6235 6239 if (error) {
6236 6240 *created = FALSE;
6237 6241
6238 6242 /*
6239 6243 * If we got something other than file already exists
6240 6244 * then just return this error. Otherwise, we got
6241 6245 * EEXIST. If we were doing a GUARDED create, then
6242 6246 * just return this error. Otherwise, we need to
6243 6247 * make sure that this wasn't a duplicate of an
6244 6248 * exclusive create request.
6245 6249 *
6246 6250 * The assumption is made that a non-exclusive create
6247 6251 * request will never return EEXIST.
6248 6252 */
6249 6253
6250 6254 if (error != EEXIST || mode == GUARDED4) {
6251 6255 status = puterrno4(error);
6252 6256 return (status);
6253 6257 }
6254 6258 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6255 6259 NULL, NULL, NULL);
6256 6260
6257 6261 if (error) {
6258 6262 /*
6259 6263 * We couldn't find the file that we thought that
6260 6264 * we just created. So, we'll just try creating
6261 6265 * it again.
6262 6266 */
6263 6267 if (error == ENOENT)
6264 6268 goto tryagain;
6265 6269
6266 6270 status = puterrno4(error);
6267 6271 return (status);
6268 6272 }
6269 6273
6270 6274 if (mode == UNCHECKED4) {
6271 6275 /* existing object must be regular file */
6272 6276 if ((*vpp)->v_type != VREG) {
6273 6277 if ((*vpp)->v_type == VDIR)
6274 6278 status = NFS4ERR_ISDIR;
6275 6279 else if ((*vpp)->v_type == VLNK)
6276 6280 status = NFS4ERR_SYMLINK;
6277 6281 else
6278 6282 status = NFS4ERR_INVAL;
6279 6283 VN_RELE(*vpp);
6280 6284 return (status);
6281 6285 }
6282 6286
6283 6287 return (NFS4_OK);
6284 6288 }
6285 6289
6286 6290 /* Check for duplicate request */
6287 6291 va.va_mask = AT_MTIME;
6288 6292 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6289 6293 if (!error) {
6290 6294 /* We found the file */
6291 6295 const timestruc_t *mtime = &vap->va_mtime;
6292 6296
6293 6297 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6294 6298 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6295 6299 /* but its not our creation */
6296 6300 VN_RELE(*vpp);
6297 6301 return (NFS4ERR_EXIST);
6298 6302 }
6299 6303 *created = TRUE; /* retrans of create == created */
6300 6304 return (NFS4_OK);
6301 6305 }
6302 6306 VN_RELE(*vpp);
6303 6307 return (NFS4ERR_EXIST);
6304 6308 }
6305 6309
6306 6310 return (NFS4_OK);
6307 6311 }
6308 6312
6309 6313 static nfsstat4
6310 6314 check_open_access(uint32_t access, struct compound_state *cs,
6311 6315 struct svc_req *req)
6312 6316 {
6313 6317 int error;
6314 6318 vnode_t *vp;
6315 6319 bool_t readonly;
6316 6320 cred_t *cr = cs->cr;
6317 6321
6318 6322 /* For now we don't allow mandatory locking as per V2/V3 */
6319 6323 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6320 6324 return (NFS4ERR_ACCESS);
6321 6325 }
6322 6326
6323 6327 vp = cs->vp;
6324 6328 ASSERT(cr != NULL && vp->v_type == VREG);
6325 6329
6326 6330 /*
6327 6331 * If the file system is exported read only and we are trying
6328 6332 * to open for write, then return NFS4ERR_ROFS
6329 6333 */
6330 6334
6331 6335 readonly = rdonly4(req, cs);
6332 6336
6333 6337 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6334 6338 return (NFS4ERR_ROFS);
6335 6339
6336 6340 if (access & OPEN4_SHARE_ACCESS_READ) {
6337 6341 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6338 6342 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6339 6343 return (NFS4ERR_ACCESS);
6340 6344 }
6341 6345 }
6342 6346
6343 6347 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6344 6348 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6345 6349 if (error)
6346 6350 return (NFS4ERR_ACCESS);
6347 6351 }
6348 6352
6349 6353 return (NFS4_OK);
6350 6354 }
6351 6355
6352 6356 static nfsstat4
6353 6357 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6354 6358 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6355 6359 {
6356 6360 struct nfs4_svgetit_arg sarg;
6357 6361 struct nfs4_ntov_table ntov;
6358 6362
6359 6363 bool_t ntov_table_init = FALSE;
6360 6364 struct statvfs64 sb;
6361 6365 nfsstat4 status;
6362 6366 vnode_t *vp;
6363 6367 vattr_t bva, ava, iva, cva, *vap;
6364 6368 vnode_t *dvp;
6365 6369 timespec32_t *mtime;
6366 6370 char *nm = NULL;
6367 6371 uint_t buflen;
6368 6372 bool_t created;
6369 6373 bool_t setsize = FALSE;
6370 6374 len_t reqsize;
6371 6375 int error;
6372 6376 bool_t trunc;
6373 6377 caller_context_t ct;
6374 6378 component4 *component;
6375 6379 bslabel_t *clabel;
6376 6380 struct sockaddr *ca;
6377 6381 char *name = NULL;
6378 6382
6379 6383 sarg.sbp = &sb;
6380 6384 sarg.is_referral = B_FALSE;
6381 6385
6382 6386 dvp = cs->vp;
6383 6387
6384 6388 /* Check if the file system is read only */
6385 6389 if (rdonly4(req, cs))
6386 6390 return (NFS4ERR_ROFS);
6387 6391
6388 6392 /* check the label of including directory */
6389 6393 if (is_system_labeled()) {
6390 6394 ASSERT(req->rq_label != NULL);
6391 6395 clabel = req->rq_label;
6392 6396 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6393 6397 "got client label from request(1)",
6394 6398 struct svc_req *, req);
6395 6399 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6396 6400 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6397 6401 cs->exi)) {
6398 6402 return (NFS4ERR_ACCESS);
6399 6403 }
6400 6404 }
6401 6405 }
6402 6406
6403 6407 /*
6404 6408 * Get the last component of path name in nm. cs will reference
6405 6409 * the including directory on success.
6406 6410 */
6407 6411 component = &args->open_claim4_u.file;
6408 6412 status = utf8_dir_verify(component);
6409 6413 if (status != NFS4_OK)
6410 6414 return (status);
6411 6415
6412 6416 nm = utf8_to_fn(component, &buflen, NULL);
6413 6417
6414 6418 if (nm == NULL)
6415 6419 return (NFS4ERR_RESOURCE);
6416 6420
6417 6421 if (buflen > MAXNAMELEN) {
6418 6422 kmem_free(nm, buflen);
6419 6423 return (NFS4ERR_NAMETOOLONG);
6420 6424 }
6421 6425
6422 6426 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6423 6427 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6424 6428 if (error) {
6425 6429 kmem_free(nm, buflen);
6426 6430 return (puterrno4(error));
6427 6431 }
6428 6432
6429 6433 if (bva.va_type != VDIR) {
6430 6434 kmem_free(nm, buflen);
6431 6435 return (NFS4ERR_NOTDIR);
6432 6436 }
6433 6437
6434 6438 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6435 6439
6436 6440 switch (args->mode) {
6437 6441 case GUARDED4:
6438 6442 /*FALLTHROUGH*/
6439 6443 case UNCHECKED4:
6440 6444 nfs4_ntov_table_init(&ntov);
6441 6445 ntov_table_init = TRUE;
6442 6446
6443 6447 *attrset = 0;
6444 6448 status = do_rfs4_set_attrs(attrset,
6445 6449 &args->createhow4_u.createattrs,
6446 6450 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6447 6451
6448 6452 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6449 6453 sarg.vap->va_type != VREG) {
6450 6454 if (sarg.vap->va_type == VDIR)
6451 6455 status = NFS4ERR_ISDIR;
6452 6456 else if (sarg.vap->va_type == VLNK)
6453 6457 status = NFS4ERR_SYMLINK;
6454 6458 else
6455 6459 status = NFS4ERR_INVAL;
6456 6460 }
6457 6461
6458 6462 if (status != NFS4_OK) {
6459 6463 kmem_free(nm, buflen);
6460 6464 nfs4_ntov_table_free(&ntov, &sarg);
6461 6465 *attrset = 0;
6462 6466 return (status);
6463 6467 }
6464 6468
6465 6469 vap = sarg.vap;
6466 6470 vap->va_type = VREG;
6467 6471 vap->va_mask |= AT_TYPE;
6468 6472
6469 6473 if ((vap->va_mask & AT_MODE) == 0) {
6470 6474 vap->va_mask |= AT_MODE;
6471 6475 vap->va_mode = (mode_t)0600;
6472 6476 }
6473 6477
6474 6478 if (vap->va_mask & AT_SIZE) {
6475 6479
6476 6480 /* Disallow create with a non-zero size */
6477 6481
6478 6482 if ((reqsize = sarg.vap->va_size) != 0) {
6479 6483 kmem_free(nm, buflen);
6480 6484 nfs4_ntov_table_free(&ntov, &sarg);
6481 6485 *attrset = 0;
6482 6486 return (NFS4ERR_INVAL);
6483 6487 }
6484 6488 setsize = TRUE;
6485 6489 }
6486 6490 break;
6487 6491
6488 6492 case EXCLUSIVE4:
6489 6493 /* prohibit EXCL create of named attributes */
6490 6494 if (dvp->v_flag & V_XATTRDIR) {
6491 6495 kmem_free(nm, buflen);
6492 6496 *attrset = 0;
6493 6497 return (NFS4ERR_INVAL);
6494 6498 }
6495 6499
6496 6500 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6497 6501 cva.va_type = VREG;
6498 6502 /*
6499 6503 * Ensure no time overflows. Assumes underlying
6500 6504 * filesystem supports at least 32 bits.
6501 6505 * Truncate nsec to usec resolution to allow valid
6502 6506 * compares even if the underlying filesystem truncates.
6503 6507 */
6504 6508 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6505 6509 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6506 6510 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6507 6511 cva.va_mode = (mode_t)0;
6508 6512 vap = &cva;
6509 6513
6510 6514 /*
6511 6515 * For EXCL create, attrset is set to the server attr
6512 6516 * used to cache the client's verifier.
6513 6517 */
6514 6518 *attrset = FATTR4_TIME_MODIFY_MASK;
6515 6519 break;
6516 6520 }
6517 6521
6518 6522 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6519 6523 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6520 6524 MAXPATHLEN + 1);
6521 6525
6522 6526 if (name == NULL) {
6523 6527 kmem_free(nm, buflen);
6524 6528 return (NFS4ERR_SERVERFAULT);
6525 6529 }
6526 6530
6527 6531 status = create_vnode(dvp, name, vap, args->mode,
6528 6532 cs->cr, &vp, &created);
6529 6533 if (nm != name)
6530 6534 kmem_free(name, MAXPATHLEN + 1);
6531 6535 kmem_free(nm, buflen);
6532 6536
6533 6537 if (status != NFS4_OK) {
6534 6538 if (ntov_table_init)
6535 6539 nfs4_ntov_table_free(&ntov, &sarg);
6536 6540 *attrset = 0;
6537 6541 return (status);
6538 6542 }
6539 6543
6540 6544 trunc = (setsize && !created);
6541 6545
6542 6546 if (args->mode != EXCLUSIVE4) {
6543 6547 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6544 6548
6545 6549 /*
6546 6550 * True verification that object was created with correct
6547 6551 * attrs is impossible. The attrs could have been changed
6548 6552 * immediately after object creation. If attributes did
6549 6553 * not verify, the only recourse for the server is to
6550 6554 * destroy the object. Maybe if some attrs (like gid)
6551 6555 * are set incorrectly, the object should be destroyed;
6552 6556 * however, seems bad as a default policy. Do we really
6553 6557 * want to destroy an object over one of the times not
6554 6558 * verifying correctly? For these reasons, the server
6555 6559 * currently sets bits in attrset for createattrs
6556 6560 * that were set; however, no verification is done.
6557 6561 *
6558 6562 * vmask_to_nmask accounts for vattr bits set on create
6559 6563 * [do_rfs4_set_attrs() only sets resp bits for
6560 6564 * non-vattr/vfs bits.]
6561 6565 * Mask off any bits we set by default so as not to return
6562 6566 * more attrset bits than were requested in createattrs
6563 6567 */
6564 6568 if (created) {
6565 6569 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6566 6570 *attrset &= createmask;
6567 6571 } else {
6568 6572 /*
6569 6573 * We did not create the vnode (we tried but it
6570 6574 * already existed). In this case, the only createattr
6571 6575 * that the spec allows the server to set is size,
6572 6576 * and even then, it can only be set if it is 0.
6573 6577 */
6574 6578 *attrset = 0;
6575 6579 if (trunc)
6576 6580 *attrset = FATTR4_SIZE_MASK;
6577 6581 }
6578 6582 }
6579 6583 if (ntov_table_init)
6580 6584 nfs4_ntov_table_free(&ntov, &sarg);
6581 6585
6582 6586 /*
6583 6587 * Get the initial "after" sequence number, if it fails,
6584 6588 * set to zero, time to before.
6585 6589 */
6586 6590 iva.va_mask = AT_CTIME|AT_SEQ;
6587 6591 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6588 6592 iva.va_seq = 0;
6589 6593 iva.va_ctime = bva.va_ctime;
6590 6594 }
6591 6595
6592 6596 /*
6593 6597 * create_vnode attempts to create the file exclusive,
6594 6598 * if it already exists the VOP_CREATE will fail and
6595 6599 * may not increase va_seq. It is atomic if
6596 6600 * we haven't changed the directory, but if it has changed
6597 6601 * we don't know what changed it.
6598 6602 */
6599 6603 if (!created) {
6600 6604 if (bva.va_seq && iva.va_seq &&
6601 6605 bva.va_seq == iva.va_seq)
6602 6606 cinfo->atomic = TRUE;
6603 6607 else
6604 6608 cinfo->atomic = FALSE;
6605 6609 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6606 6610 } else {
6607 6611 /*
6608 6612 * The entry was created, we need to sync the
6609 6613 * directory metadata.
6610 6614 */
6611 6615 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6612 6616
6613 6617 /*
6614 6618 * Get "after" change value, if it fails, simply return the
6615 6619 * before value.
6616 6620 */
6617 6621 ava.va_mask = AT_CTIME|AT_SEQ;
6618 6622 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6619 6623 ava.va_ctime = bva.va_ctime;
6620 6624 ava.va_seq = 0;
6621 6625 }
6622 6626
6623 6627 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6624 6628
6625 6629 /*
6626 6630 * The cinfo->atomic = TRUE only if we have
6627 6631 * non-zero va_seq's, and it has incremented by exactly one
6628 6632 * during the create_vnode and it didn't
6629 6633 * change during the VOP_FSYNC.
6630 6634 */
6631 6635 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6632 6636 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6633 6637 cinfo->atomic = TRUE;
6634 6638 else
6635 6639 cinfo->atomic = FALSE;
6636 6640 }
6637 6641
6638 6642 /* Check for mandatory locking and that the size gets set. */
6639 6643 cva.va_mask = AT_MODE;
6640 6644 if (setsize)
6641 6645 cva.va_mask |= AT_SIZE;
6642 6646
6643 6647 /* Assume the worst */
6644 6648 cs->mandlock = TRUE;
6645 6649
6646 6650 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6647 6651 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6648 6652
6649 6653 /*
6650 6654 * Truncate the file if necessary; this would be
6651 6655 * the case for create over an existing file.
6652 6656 */
6653 6657
6654 6658 if (trunc) {
6655 6659 int in_crit = 0;
6656 6660 rfs4_file_t *fp;
6657 6661 nfs4_srv_t *nsrv4;
6658 6662 bool_t create = FALSE;
6659 6663
6660 6664 /*
6661 6665 * We are writing over an existing file.
6662 6666 * Check to see if we need to recall a delegation.
6663 6667 */
6664 6668 nsrv4 = nfs4_get_srv();
6665 6669 rfs4_hold_deleg_policy(nsrv4);
6666 6670 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6667 6671 if (rfs4_check_delegated_byfp(FWRITE, fp,
6668 6672 (reqsize == 0), FALSE, FALSE, &clientid)) {
6669 6673 rfs4_file_rele(fp);
6670 6674 rfs4_rele_deleg_policy(nsrv4);
6671 6675 VN_RELE(vp);
6672 6676 *attrset = 0;
6673 6677 return (NFS4ERR_DELAY);
6674 6678 }
6675 6679 rfs4_file_rele(fp);
6676 6680 }
6677 6681 rfs4_rele_deleg_policy(nsrv4);
6678 6682
6679 6683 if (nbl_need_check(vp)) {
6680 6684 in_crit = 1;
6681 6685
6682 6686 ASSERT(reqsize == 0);
6683 6687
6684 6688 nbl_start_crit(vp, RW_READER);
6685 6689 if (nbl_conflict(vp, NBL_WRITE, 0,
6686 6690 cva.va_size, 0, NULL)) {
6687 6691 in_crit = 0;
6688 6692 nbl_end_crit(vp);
6689 6693 VN_RELE(vp);
6690 6694 *attrset = 0;
6691 6695 return (NFS4ERR_ACCESS);
6692 6696 }
6693 6697 }
6694 6698 ct.cc_sysid = 0;
6695 6699 ct.cc_pid = 0;
6696 6700 ct.cc_caller_id = nfs4_srv_caller_id;
6697 6701 ct.cc_flags = CC_DONTBLOCK;
6698 6702
6699 6703 cva.va_mask = AT_SIZE;
6700 6704 cva.va_size = reqsize;
6701 6705 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6702 6706 if (in_crit)
6703 6707 nbl_end_crit(vp);
6704 6708 }
6705 6709 }
6706 6710
6707 6711 error = makefh4(&cs->fh, vp, cs->exi);
6708 6712
6709 6713 /*
6710 6714 * Force modified data and metadata out to stable storage.
6711 6715 */
6712 6716 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6713 6717
6714 6718 if (error) {
6715 6719 VN_RELE(vp);
6716 6720 *attrset = 0;
6717 6721 return (puterrno4(error));
6718 6722 }
6719 6723
6720 6724 /* if parent dir is attrdir, set namedattr fh flag */
6721 6725 if (dvp->v_flag & V_XATTRDIR)
6722 6726 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6723 6727
6724 6728 if (cs->vp)
6725 6729 VN_RELE(cs->vp);
6726 6730
6727 6731 cs->vp = vp;
6728 6732
6729 6733 /*
6730 6734 * if we did not create the file, we will need to check
6731 6735 * the access bits on the file
6732 6736 */
6733 6737
6734 6738 if (!created) {
6735 6739 if (setsize)
6736 6740 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6737 6741 status = check_open_access(args->share_access, cs, req);
6738 6742 if (status != NFS4_OK)
6739 6743 *attrset = 0;
6740 6744 }
6741 6745 return (status);
6742 6746 }
6743 6747
6744 6748 /*ARGSUSED*/
6745 6749 static void
6746 6750 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6747 6751 rfs4_openowner_t *oo, delegreq_t deleg,
6748 6752 uint32_t access, uint32_t deny,
6749 6753 OPEN4res *resp, int deleg_cur)
6750 6754 {
6751 6755 /* XXX Currently not using req */
6752 6756 rfs4_state_t *sp;
6753 6757 rfs4_file_t *fp;
6754 6758 bool_t screate = TRUE;
6755 6759 bool_t fcreate = TRUE;
6756 6760 uint32_t open_a, share_a;
6757 6761 uint32_t open_d, share_d;
6758 6762 rfs4_deleg_state_t *dsp;
6759 6763 sysid_t sysid;
6760 6764 nfsstat4 status;
6761 6765 caller_context_t ct;
6762 6766 int fflags = 0;
6763 6767 int recall = 0;
6764 6768 int err;
6765 6769 int first_open;
6766 6770
6767 6771 /* get the file struct and hold a lock on it during initial open */
6768 6772 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6769 6773 if (fp == NULL) {
6770 6774 resp->status = NFS4ERR_RESOURCE;
6771 6775 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6772 6776 return;
6773 6777 }
6774 6778
6775 6779 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6776 6780 if (sp == NULL) {
6777 6781 resp->status = NFS4ERR_RESOURCE;
6778 6782 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6779 6783 /* No need to keep any reference */
6780 6784 rw_exit(&fp->rf_file_rwlock);
6781 6785 rfs4_file_rele(fp);
6782 6786 return;
6783 6787 }
6784 6788
6785 6789 /* try to get the sysid before continuing */
6786 6790 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6787 6791 resp->status = status;
6788 6792 rfs4_file_rele(fp);
6789 6793 /* Not a fully formed open; "close" it */
6790 6794 if (screate == TRUE)
6791 6795 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6792 6796 rfs4_state_rele(sp);
6793 6797 return;
6794 6798 }
6795 6799
6796 6800 /* Calculate the fflags for this OPEN. */
6797 6801 if (access & OPEN4_SHARE_ACCESS_READ)
6798 6802 fflags |= FREAD;
6799 6803 if (access & OPEN4_SHARE_ACCESS_WRITE)
6800 6804 fflags |= FWRITE;
6801 6805
6802 6806 rfs4_dbe_lock(sp->rs_dbe);
6803 6807
6804 6808 /*
6805 6809 * Calculate the new deny and access mode that this open is adding to
6806 6810 * the file for this open owner;
6807 6811 */
6808 6812 open_d = (deny & ~sp->rs_open_deny);
6809 6813 open_a = (access & ~sp->rs_open_access);
6810 6814
6811 6815 /*
6812 6816 * Calculate the new share access and share deny modes that this open
6813 6817 * is adding to the file for this open owner;
6814 6818 */
6815 6819 share_a = (access & ~sp->rs_share_access);
6816 6820 share_d = (deny & ~sp->rs_share_deny);
6817 6821
6818 6822 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6819 6823
6820 6824 /*
6821 6825 * Check to see the client has already sent an open for this
6822 6826 * open owner on this file with the same share/deny modes.
6823 6827 * If so, we don't need to check for a conflict and we don't
6824 6828 * need to add another shrlock. If not, then we need to
6825 6829 * check for conflicts in deny and access before checking for
6826 6830 * conflicts in delegation. We don't want to recall a
6827 6831 * delegation based on an open that will eventually fail based
6828 6832 * on shares modes.
6829 6833 */
6830 6834
6831 6835 if (share_a || share_d) {
6832 6836 if ((err = rfs4_share(sp, access, deny)) != 0) {
6833 6837 rfs4_dbe_unlock(sp->rs_dbe);
6834 6838 resp->status = err;
6835 6839
6836 6840 rfs4_file_rele(fp);
6837 6841 /* Not a fully formed open; "close" it */
6838 6842 if (screate == TRUE)
6839 6843 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6840 6844 rfs4_state_rele(sp);
6841 6845 return;
6842 6846 }
6843 6847 }
6844 6848
6845 6849 rfs4_dbe_lock(fp->rf_dbe);
6846 6850
6847 6851 /*
6848 6852 * Check to see if this file is delegated and if so, if a
6849 6853 * recall needs to be done.
6850 6854 */
6851 6855 if (rfs4_check_recall(sp, access)) {
6852 6856 rfs4_dbe_unlock(fp->rf_dbe);
6853 6857 rfs4_dbe_unlock(sp->rs_dbe);
6854 6858 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6855 6859 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6856 6860 rfs4_dbe_lock(sp->rs_dbe);
6857 6861
6858 6862 /* if state closed while lock was dropped */
6859 6863 if (sp->rs_closed) {
6860 6864 if (share_a || share_d)
6861 6865 (void) rfs4_unshare(sp);
6862 6866 rfs4_dbe_unlock(sp->rs_dbe);
6863 6867 rfs4_file_rele(fp);
6864 6868 /* Not a fully formed open; "close" it */
6865 6869 if (screate == TRUE)
6866 6870 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6867 6871 rfs4_state_rele(sp);
6868 6872 resp->status = NFS4ERR_OLD_STATEID;
6869 6873 return;
6870 6874 }
6871 6875
6872 6876 rfs4_dbe_lock(fp->rf_dbe);
6873 6877 /* Let's see if the delegation was returned */
6874 6878 if (rfs4_check_recall(sp, access)) {
6875 6879 rfs4_dbe_unlock(fp->rf_dbe);
6876 6880 if (share_a || share_d)
6877 6881 (void) rfs4_unshare(sp);
6878 6882 rfs4_dbe_unlock(sp->rs_dbe);
6879 6883 rfs4_file_rele(fp);
6880 6884 rfs4_update_lease(sp->rs_owner->ro_client);
6881 6885
6882 6886 /* Not a fully formed open; "close" it */
6883 6887 if (screate == TRUE)
6884 6888 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6885 6889 rfs4_state_rele(sp);
6886 6890 resp->status = NFS4ERR_DELAY;
6887 6891 return;
6888 6892 }
6889 6893 }
6890 6894 /*
6891 6895 * the share check passed and any delegation conflict has been
6892 6896 * taken care of, now call vop_open.
6893 6897 * if this is the first open then call vop_open with fflags.
6894 6898 * if not, call vn_open_upgrade with just the upgrade flags.
6895 6899 *
6896 6900 * if the file has been opened already, it will have the current
6897 6901 * access mode in the state struct. if it has no share access, then
6898 6902 * this is a new open.
6899 6903 *
6900 6904 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6901 6905 * call VOP_OPEN(), just do the open upgrade.
6902 6906 */
6903 6907 if (first_open && !deleg_cur) {
6904 6908 ct.cc_sysid = sysid;
6905 6909 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6906 6910 ct.cc_caller_id = nfs4_srv_caller_id;
6907 6911 ct.cc_flags = CC_DONTBLOCK;
6908 6912 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6909 6913 if (err) {
6910 6914 rfs4_dbe_unlock(fp->rf_dbe);
6911 6915 if (share_a || share_d)
6912 6916 (void) rfs4_unshare(sp);
6913 6917 rfs4_dbe_unlock(sp->rs_dbe);
6914 6918 rfs4_file_rele(fp);
6915 6919
6916 6920 /* Not a fully formed open; "close" it */
6917 6921 if (screate == TRUE)
6918 6922 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6919 6923 rfs4_state_rele(sp);
6920 6924 /* check if a monitor detected a delegation conflict */
6921 6925 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6922 6926 resp->status = NFS4ERR_DELAY;
6923 6927 else
6924 6928 resp->status = NFS4ERR_SERVERFAULT;
6925 6929 return;
6926 6930 }
6927 6931 } else { /* open upgrade */
6928 6932 /*
6929 6933 * calculate the fflags for the new mode that is being added
6930 6934 * by this upgrade.
6931 6935 */
6932 6936 fflags = 0;
6933 6937 if (open_a & OPEN4_SHARE_ACCESS_READ)
6934 6938 fflags |= FREAD;
6935 6939 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6936 6940 fflags |= FWRITE;
6937 6941 vn_open_upgrade(cs->vp, fflags);
6938 6942 }
6939 6943 sp->rs_open_access |= access;
6940 6944 sp->rs_open_deny |= deny;
6941 6945
6942 6946 if (open_d & OPEN4_SHARE_DENY_READ)
6943 6947 fp->rf_deny_read++;
6944 6948 if (open_d & OPEN4_SHARE_DENY_WRITE)
6945 6949 fp->rf_deny_write++;
6946 6950 fp->rf_share_deny |= deny;
6947 6951
6948 6952 if (open_a & OPEN4_SHARE_ACCESS_READ)
6949 6953 fp->rf_access_read++;
6950 6954 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6951 6955 fp->rf_access_write++;
6952 6956 fp->rf_share_access |= access;
6953 6957
6954 6958 /*
6955 6959 * Check for delegation here. if the deleg argument is not
6956 6960 * DELEG_ANY, then this is a reclaim from a client and
6957 6961 * we must honor the delegation requested. If necessary we can
6958 6962 * set the recall flag.
6959 6963 */
6960 6964
6961 6965 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6962 6966
6963 6967 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6964 6968
6965 6969 next_stateid(&sp->rs_stateid);
6966 6970
6967 6971 resp->stateid = sp->rs_stateid.stateid;
6968 6972
6969 6973 rfs4_dbe_unlock(fp->rf_dbe);
6970 6974 rfs4_dbe_unlock(sp->rs_dbe);
6971 6975
6972 6976 if (dsp) {
6973 6977 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6974 6978 rfs4_deleg_state_rele(dsp);
6975 6979 }
6976 6980
6977 6981 rfs4_file_rele(fp);
6978 6982 rfs4_state_rele(sp);
6979 6983
6980 6984 resp->status = NFS4_OK;
6981 6985 }
6982 6986
6983 6987 /*ARGSUSED*/
6984 6988 static void
6985 6989 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6986 6990 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6987 6991 {
6988 6992 change_info4 *cinfo = &resp->cinfo;
6989 6993 bitmap4 *attrset = &resp->attrset;
6990 6994
6991 6995 if (args->opentype == OPEN4_NOCREATE)
6992 6996 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6993 6997 req, cs, args->share_access, cinfo);
6994 6998 else {
6995 6999 /* inhibit delegation grants during exclusive create */
6996 7000
6997 7001 if (args->mode == EXCLUSIVE4)
6998 7002 rfs4_disable_delegation();
6999 7003
7000 7004 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7001 7005 oo->ro_client->rc_clientid);
7002 7006 }
7003 7007
7004 7008 if (resp->status == NFS4_OK) {
7005 7009
7006 7010 /* cs->vp cs->fh now reference the desired file */
7007 7011
7008 7012 rfs4_do_open(cs, req, oo,
7009 7013 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7010 7014 args->share_access, args->share_deny, resp, 0);
7011 7015
7012 7016 /*
7013 7017 * If rfs4_createfile set attrset, we must
7014 7018 * clear this attrset before the response is copied.
7015 7019 */
7016 7020 if (resp->status != NFS4_OK && resp->attrset) {
7017 7021 resp->attrset = 0;
7018 7022 }
7019 7023 }
7020 7024 else
7021 7025 *cs->statusp = resp->status;
7022 7026
7023 7027 if (args->mode == EXCLUSIVE4)
7024 7028 rfs4_enable_delegation();
7025 7029 }
7026 7030
7027 7031 /*ARGSUSED*/
7028 7032 static void
7029 7033 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7030 7034 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7031 7035 {
7032 7036 change_info4 *cinfo = &resp->cinfo;
7033 7037 vattr_t va;
7034 7038 vtype_t v_type = cs->vp->v_type;
7035 7039 int error = 0;
7036 7040
7037 7041 /* Verify that we have a regular file */
7038 7042 if (v_type != VREG) {
7039 7043 if (v_type == VDIR)
7040 7044 resp->status = NFS4ERR_ISDIR;
7041 7045 else if (v_type == VLNK)
7042 7046 resp->status = NFS4ERR_SYMLINK;
7043 7047 else
7044 7048 resp->status = NFS4ERR_INVAL;
7045 7049 return;
7046 7050 }
7047 7051
7048 7052 va.va_mask = AT_MODE|AT_UID;
7049 7053 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7050 7054 if (error) {
7051 7055 resp->status = puterrno4(error);
7052 7056 return;
7053 7057 }
7054 7058
7055 7059 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7056 7060
7057 7061 /*
7058 7062 * Check if we have access to the file, Note the the file
7059 7063 * could have originally been open UNCHECKED or GUARDED
7060 7064 * with mode bits that will now fail, but there is nothing
7061 7065 * we can really do about that except in the case that the
7062 7066 * owner of the file is the one requesting the open.
7063 7067 */
7064 7068 if (crgetuid(cs->cr) != va.va_uid) {
7065 7069 resp->status = check_open_access(args->share_access, cs, req);
7066 7070 if (resp->status != NFS4_OK) {
7067 7071 return;
7068 7072 }
7069 7073 }
7070 7074
7071 7075 /*
7072 7076 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7073 7077 */
7074 7078 cinfo->before = 0;
7075 7079 cinfo->after = 0;
7076 7080 cinfo->atomic = FALSE;
7077 7081
7078 7082 rfs4_do_open(cs, req, oo,
7079 7083 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7080 7084 args->share_access, args->share_deny, resp, 0);
7081 7085 }
7082 7086
7083 7087 static void
7084 7088 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7085 7089 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7086 7090 {
7087 7091 int error;
7088 7092 nfsstat4 status;
7089 7093 stateid4 stateid =
7090 7094 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7091 7095 rfs4_deleg_state_t *dsp;
7092 7096
7093 7097 /*
7094 7098 * Find the state info from the stateid and confirm that the
7095 7099 * file is delegated. If the state openowner is the same as
7096 7100 * the supplied openowner we're done. If not, get the file
7097 7101 * info from the found state info. Use that file info to
7098 7102 * create the state for this lock owner. Note solaris doen't
7099 7103 * really need the pathname to find the file. We may want to
7100 7104 * lookup the pathname and make sure that the vp exist and
7101 7105 * matches the vp in the file structure. However it is
7102 7106 * possible that the pathname nolonger exists (local process
7103 7107 * unlinks the file), so this may not be that useful.
7104 7108 */
7105 7109
7106 7110 status = rfs4_get_deleg_state(&stateid, &dsp);
7107 7111 if (status != NFS4_OK) {
7108 7112 resp->status = status;
7109 7113 return;
7110 7114 }
7111 7115
7112 7116 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7113 7117
7114 7118 /*
7115 7119 * New lock owner, create state. Since this was probably called
7116 7120 * in response to a CB_RECALL we set deleg to DELEG_NONE
7117 7121 */
7118 7122
7119 7123 ASSERT(cs->vp != NULL);
7120 7124 VN_RELE(cs->vp);
7121 7125 VN_HOLD(dsp->rds_finfo->rf_vp);
7122 7126 cs->vp = dsp->rds_finfo->rf_vp;
7123 7127
7124 7128 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7125 7129 rfs4_deleg_state_rele(dsp);
7126 7130 *cs->statusp = resp->status = puterrno4(error);
7127 7131 return;
7128 7132 }
7129 7133
7130 7134 /* Mark progress for delegation returns */
7131 7135 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7132 7136 rfs4_deleg_state_rele(dsp);
7133 7137 rfs4_do_open(cs, req, oo, DELEG_NONE,
7134 7138 args->share_access, args->share_deny, resp, 1);
7135 7139 }
7136 7140
7137 7141 /*ARGSUSED*/
7138 7142 static void
7139 7143 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7140 7144 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7141 7145 {
7142 7146 /*
7143 7147 * Lookup the pathname, it must already exist since this file
7144 7148 * was delegated.
7145 7149 *
7146 7150 * Find the file and state info for this vp and open owner pair.
7147 7151 * check that they are in fact delegated.
7148 7152 * check that the state access and deny modes are the same.
7149 7153 *
7150 7154 * Return the delgation possibly seting the recall flag.
7151 7155 */
7152 7156 rfs4_file_t *fp;
7153 7157 rfs4_state_t *sp;
7154 7158 bool_t create = FALSE;
7155 7159 bool_t dcreate = FALSE;
7156 7160 rfs4_deleg_state_t *dsp;
7157 7161 nfsace4 *ace;
7158 7162
7159 7163 /* Note we ignore oflags */
7160 7164 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7161 7165 req, cs, args->share_access, &resp->cinfo);
7162 7166
7163 7167 if (resp->status != NFS4_OK) {
7164 7168 return;
7165 7169 }
7166 7170
7167 7171 /* get the file struct and hold a lock on it during initial open */
7168 7172 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7169 7173 if (fp == NULL) {
7170 7174 resp->status = NFS4ERR_RESOURCE;
7171 7175 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7172 7176 return;
7173 7177 }
7174 7178
7175 7179 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7176 7180 if (sp == NULL) {
7177 7181 resp->status = NFS4ERR_SERVERFAULT;
7178 7182 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7179 7183 rw_exit(&fp->rf_file_rwlock);
7180 7184 rfs4_file_rele(fp);
7181 7185 return;
7182 7186 }
7183 7187
7184 7188 rfs4_dbe_lock(sp->rs_dbe);
7185 7189 rfs4_dbe_lock(fp->rf_dbe);
7186 7190 if (args->share_access != sp->rs_share_access ||
7187 7191 args->share_deny != sp->rs_share_deny ||
7188 7192 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7189 7193 NFS4_DEBUG(rfs4_debug,
7190 7194 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7191 7195 rfs4_dbe_unlock(fp->rf_dbe);
7192 7196 rfs4_dbe_unlock(sp->rs_dbe);
7193 7197 rfs4_file_rele(fp);
7194 7198 rfs4_state_rele(sp);
7195 7199 resp->status = NFS4ERR_SERVERFAULT;
7196 7200 return;
7197 7201 }
7198 7202 rfs4_dbe_unlock(fp->rf_dbe);
7199 7203 rfs4_dbe_unlock(sp->rs_dbe);
7200 7204
7201 7205 dsp = rfs4_finddeleg(sp, &dcreate);
7202 7206 if (dsp == NULL) {
7203 7207 rfs4_state_rele(sp);
7204 7208 rfs4_file_rele(fp);
7205 7209 resp->status = NFS4ERR_SERVERFAULT;
7206 7210 return;
7207 7211 }
7208 7212
7209 7213 next_stateid(&sp->rs_stateid);
7210 7214
7211 7215 resp->stateid = sp->rs_stateid.stateid;
7212 7216
7213 7217 resp->delegation.delegation_type = dsp->rds_dtype;
7214 7218
7215 7219 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7216 7220 open_read_delegation4 *rv =
7217 7221 &resp->delegation.open_delegation4_u.read;
7218 7222
7219 7223 rv->stateid = dsp->rds_delegid.stateid;
7220 7224 rv->recall = FALSE; /* no policy in place to set to TRUE */
7221 7225 ace = &rv->permissions;
7222 7226 } else {
7223 7227 open_write_delegation4 *rv =
7224 7228 &resp->delegation.open_delegation4_u.write;
7225 7229
7226 7230 rv->stateid = dsp->rds_delegid.stateid;
7227 7231 rv->recall = FALSE; /* no policy in place to set to TRUE */
7228 7232 ace = &rv->permissions;
7229 7233 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7230 7234 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7231 7235 }
7232 7236
7233 7237 /* XXX For now */
7234 7238 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7235 7239 ace->flag = 0;
7236 7240 ace->access_mask = 0;
7237 7241 ace->who.utf8string_len = 0;
7238 7242 ace->who.utf8string_val = 0;
7239 7243
7240 7244 rfs4_deleg_state_rele(dsp);
7241 7245 rfs4_state_rele(sp);
7242 7246 rfs4_file_rele(fp);
7243 7247 }
7244 7248
7245 7249 typedef enum {
7246 7250 NFS4_CHKSEQ_OKAY = 0,
7247 7251 NFS4_CHKSEQ_REPLAY = 1,
7248 7252 NFS4_CHKSEQ_BAD = 2
7249 7253 } rfs4_chkseq_t;
7250 7254
7251 7255 /*
7252 7256 * Generic function for sequence number checks.
7253 7257 */
7254 7258 static rfs4_chkseq_t
7255 7259 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7256 7260 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7257 7261 {
7258 7262 /* Same sequence ids and matching operations? */
7259 7263 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7260 7264 if (copyres == TRUE) {
7261 7265 rfs4_free_reply(resop);
7262 7266 rfs4_copy_reply(resop, lastop);
7263 7267 }
7264 7268 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7265 7269 "Replayed SEQID %d\n", seqid));
7266 7270 return (NFS4_CHKSEQ_REPLAY);
7267 7271 }
7268 7272
7269 7273 /* If the incoming sequence is not the next expected then it is bad */
7270 7274 if (rqst_seq != seqid + 1) {
7271 7275 if (rqst_seq == seqid) {
7272 7276 NFS4_DEBUG(rfs4_debug,
7273 7277 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7274 7278 "but last op was %d current op is %d\n",
7275 7279 lastop->resop, resop->resop));
7276 7280 return (NFS4_CHKSEQ_BAD);
7277 7281 }
7278 7282 NFS4_DEBUG(rfs4_debug,
7279 7283 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7280 7284 rqst_seq, seqid));
7281 7285 return (NFS4_CHKSEQ_BAD);
7282 7286 }
7283 7287
7284 7288 /* Everything okay -- next expected */
7285 7289 return (NFS4_CHKSEQ_OKAY);
7286 7290 }
7287 7291
7288 7292
7289 7293 static rfs4_chkseq_t
7290 7294 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7291 7295 {
7292 7296 rfs4_chkseq_t rc;
7293 7297
7294 7298 rfs4_dbe_lock(op->ro_dbe);
7295 7299 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7296 7300 TRUE);
7297 7301 rfs4_dbe_unlock(op->ro_dbe);
7298 7302
7299 7303 if (rc == NFS4_CHKSEQ_OKAY)
7300 7304 rfs4_update_lease(op->ro_client);
7301 7305
7302 7306 return (rc);
7303 7307 }
7304 7308
7305 7309 static rfs4_chkseq_t
7306 7310 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7307 7311 {
7308 7312 rfs4_chkseq_t rc;
7309 7313
7310 7314 rfs4_dbe_lock(op->ro_dbe);
7311 7315 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7312 7316 olo_seqid, resop, FALSE);
7313 7317 rfs4_dbe_unlock(op->ro_dbe);
7314 7318
7315 7319 return (rc);
7316 7320 }
7317 7321
7318 7322 static rfs4_chkseq_t
7319 7323 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7320 7324 {
7321 7325 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7322 7326
7323 7327 rfs4_dbe_lock(lsp->rls_dbe);
7324 7328 if (!lsp->rls_skip_seqid_check)
7325 7329 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7326 7330 resop, TRUE);
7327 7331 rfs4_dbe_unlock(lsp->rls_dbe);
7328 7332
7329 7333 return (rc);
7330 7334 }
7331 7335
7332 7336 static void
7333 7337 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7334 7338 struct svc_req *req, struct compound_state *cs)
7335 7339 {
7336 7340 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7337 7341 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7338 7342 open_owner4 *owner = &args->owner;
7339 7343 open_claim_type4 claim = args->claim;
7340 7344 rfs4_client_t *cp;
7341 7345 rfs4_openowner_t *oo;
7342 7346 bool_t create;
7343 7347 bool_t replay = FALSE;
7344 7348 int can_reclaim;
7345 7349
7346 7350 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7347 7351 OPEN4args *, args);
7348 7352
7349 7353 if (cs->vp == NULL) {
7350 7354 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7351 7355 goto end;
7352 7356 }
7353 7357
7354 7358 /*
7355 7359 * Need to check clientid and lease expiration first based on
7356 7360 * error ordering and incrementing sequence id.
7357 7361 */
7358 7362 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7359 7363 if (cp == NULL) {
7360 7364 *cs->statusp = resp->status =
7361 7365 rfs4_check_clientid(&owner->clientid, 0);
7362 7366 goto end;
7363 7367 }
7364 7368
7365 7369 if (rfs4_lease_expired(cp)) {
7366 7370 rfs4_client_close(cp);
7367 7371 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7368 7372 goto end;
7369 7373 }
7370 7374 can_reclaim = cp->rc_can_reclaim;
7371 7375
7372 7376 /*
7373 7377 * Find the open_owner for use from this point forward. Take
7374 7378 * care in updating the sequence id based on the type of error
7375 7379 * being returned.
7376 7380 */
7377 7381 retry:
7378 7382 create = TRUE;
7379 7383 oo = rfs4_findopenowner(owner, &create, args->seqid);
7380 7384 if (oo == NULL) {
7381 7385 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7382 7386 rfs4_client_rele(cp);
7383 7387 goto end;
7384 7388 }
7385 7389
7386 7390 /* Hold off access to the sequence space while the open is done */
7387 7391 rfs4_sw_enter(&oo->ro_sw);
7388 7392
7389 7393 /*
7390 7394 * If the open_owner existed before at the server, then check
7391 7395 * the sequence id.
7392 7396 */
7393 7397 if (!create && !oo->ro_postpone_confirm) {
7394 7398 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7395 7399 case NFS4_CHKSEQ_BAD:
7396 7400 if ((args->seqid > oo->ro_open_seqid) &&
7397 7401 oo->ro_need_confirm) {
7398 7402 rfs4_free_opens(oo, TRUE, FALSE);
7399 7403 rfs4_sw_exit(&oo->ro_sw);
7400 7404 rfs4_openowner_rele(oo);
7401 7405 goto retry;
7402 7406 }
7403 7407 resp->status = NFS4ERR_BAD_SEQID;
7404 7408 goto out;
7405 7409 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7406 7410 replay = TRUE;
7407 7411 goto out;
7408 7412 default:
7409 7413 break;
7410 7414 }
7411 7415
7412 7416 /*
7413 7417 * Sequence was ok and open owner exists
7414 7418 * check to see if we have yet to see an
7415 7419 * open_confirm.
7416 7420 */
7417 7421 if (oo->ro_need_confirm) {
7418 7422 rfs4_free_opens(oo, TRUE, FALSE);
7419 7423 rfs4_sw_exit(&oo->ro_sw);
7420 7424 rfs4_openowner_rele(oo);
7421 7425 goto retry;
7422 7426 }
7423 7427 }
7424 7428 /* Grace only applies to regular-type OPENs */
7425 7429 if (rfs4_clnt_in_grace(cp) &&
7426 7430 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7427 7431 *cs->statusp = resp->status = NFS4ERR_GRACE;
7428 7432 goto out;
7429 7433 }
7430 7434
7431 7435 /*
7432 7436 * If previous state at the server existed then can_reclaim
7433 7437 * will be set. If not reply NFS4ERR_NO_GRACE to the
7434 7438 * client.
7435 7439 */
7436 7440 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7437 7441 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7438 7442 goto out;
7439 7443 }
7440 7444
7441 7445
7442 7446 /*
7443 7447 * Reject the open if the client has missed the grace period
7444 7448 */
7445 7449 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7446 7450 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7447 7451 goto out;
7448 7452 }
7449 7453
7450 7454 /* Couple of up-front bookkeeping items */
7451 7455 if (oo->ro_need_confirm) {
7452 7456 /*
7453 7457 * If this is a reclaim OPEN then we should not ask
7454 7458 * for a confirmation of the open_owner per the
7455 7459 * protocol specification.
7456 7460 */
7457 7461 if (claim == CLAIM_PREVIOUS)
7458 7462 oo->ro_need_confirm = FALSE;
7459 7463 else
7460 7464 resp->rflags |= OPEN4_RESULT_CONFIRM;
7461 7465 }
7462 7466 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7463 7467
7464 7468 /*
7465 7469 * If there is an unshared filesystem mounted on this vnode,
7466 7470 * do not allow to open/create in this directory.
7467 7471 */
7468 7472 if (vn_ismntpt(cs->vp)) {
7469 7473 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7470 7474 goto out;
7471 7475 }
7472 7476
7473 7477 /*
7474 7478 * access must READ, WRITE, or BOTH. No access is invalid.
7475 7479 * deny can be READ, WRITE, BOTH, or NONE.
7476 7480 * bits not defined for access/deny are invalid.
7477 7481 */
7478 7482 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7479 7483 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7480 7484 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7481 7485 *cs->statusp = resp->status = NFS4ERR_INVAL;
7482 7486 goto out;
7483 7487 }
7484 7488
7485 7489
7486 7490 /*
7487 7491 * make sure attrset is zero before response is built.
7488 7492 */
7489 7493 resp->attrset = 0;
7490 7494
7491 7495 switch (claim) {
7492 7496 case CLAIM_NULL:
7493 7497 rfs4_do_opennull(cs, req, args, oo, resp);
7494 7498 break;
7495 7499 case CLAIM_PREVIOUS:
7496 7500 rfs4_do_openprev(cs, req, args, oo, resp);
7497 7501 break;
7498 7502 case CLAIM_DELEGATE_CUR:
7499 7503 rfs4_do_opendelcur(cs, req, args, oo, resp);
7500 7504 break;
7501 7505 case CLAIM_DELEGATE_PREV:
7502 7506 rfs4_do_opendelprev(cs, req, args, oo, resp);
7503 7507 break;
7504 7508 default:
7505 7509 resp->status = NFS4ERR_INVAL;
7506 7510 break;
7507 7511 }
7508 7512
7509 7513 out:
7510 7514 rfs4_client_rele(cp);
7511 7515
7512 7516 /* Catch sequence id handling here to make it a little easier */
7513 7517 switch (resp->status) {
7514 7518 case NFS4ERR_BADXDR:
7515 7519 case NFS4ERR_BAD_SEQID:
7516 7520 case NFS4ERR_BAD_STATEID:
7517 7521 case NFS4ERR_NOFILEHANDLE:
7518 7522 case NFS4ERR_RESOURCE:
7519 7523 case NFS4ERR_STALE_CLIENTID:
7520 7524 case NFS4ERR_STALE_STATEID:
7521 7525 /*
7522 7526 * The protocol states that if any of these errors are
7523 7527 * being returned, the sequence id should not be
7524 7528 * incremented. Any other return requires an
7525 7529 * increment.
7526 7530 */
7527 7531 break;
7528 7532 default:
7529 7533 /* Always update the lease in this case */
7530 7534 rfs4_update_lease(oo->ro_client);
7531 7535
7532 7536 /* Regular response - copy the result */
7533 7537 if (!replay)
7534 7538 rfs4_update_open_resp(oo, resop, &cs->fh);
7535 7539
7536 7540 /*
7537 7541 * REPLAY case: Only if the previous response was OK
7538 7542 * do we copy the filehandle. If not OK, no
7539 7543 * filehandle to copy.
7540 7544 */
7541 7545 if (replay == TRUE &&
7542 7546 resp->status == NFS4_OK &&
7543 7547 oo->ro_reply_fh.nfs_fh4_val) {
7544 7548 /*
7545 7549 * If this is a replay, we must restore the
7546 7550 * current filehandle/vp to that of what was
7547 7551 * returned originally. Try our best to do
7548 7552 * it.
7549 7553 */
7550 7554 nfs_fh4_fmt_t *fh_fmtp =
7551 7555 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7552 7556
7553 7557 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7554 7558 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7555 7559
7556 7560 if (cs->exi == NULL) {
7557 7561 resp->status = NFS4ERR_STALE;
7558 7562 goto finish;
7559 7563 }
7560 7564
7561 7565 VN_RELE(cs->vp);
7562 7566
7563 7567 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7564 7568 &resp->status);
7565 7569
7566 7570 if (cs->vp == NULL)
7567 7571 goto finish;
7568 7572
7569 7573 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7570 7574 }
7571 7575
7572 7576 /*
7573 7577 * If this was a replay, no need to update the
7574 7578 * sequence id. If the open_owner was not created on
7575 7579 * this pass, then update. The first use of an
7576 7580 * open_owner will not bump the sequence id.
7577 7581 */
7578 7582 if (replay == FALSE && !create)
7579 7583 rfs4_update_open_sequence(oo);
7580 7584 /*
7581 7585 * If the client is receiving an error and the
7582 7586 * open_owner needs to be confirmed, there is no way
7583 7587 * to notify the client of this fact ignoring the fact
7584 7588 * that the server has no method of returning a
7585 7589 * stateid to confirm. Therefore, the server needs to
7586 7590 * mark this open_owner in a way as to avoid the
7587 7591 * sequence id checking the next time the client uses
7588 7592 * this open_owner.
7589 7593 */
7590 7594 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7591 7595 oo->ro_postpone_confirm = TRUE;
7592 7596 /*
7593 7597 * If OK response then clear the postpone flag and
7594 7598 * reset the sequence id to keep in sync with the
7595 7599 * client.
7596 7600 */
7597 7601 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7598 7602 oo->ro_postpone_confirm = FALSE;
7599 7603 oo->ro_open_seqid = args->seqid;
7600 7604 }
7601 7605 break;
7602 7606 }
7603 7607
7604 7608 finish:
7605 7609 *cs->statusp = resp->status;
7606 7610
7607 7611 rfs4_sw_exit(&oo->ro_sw);
7608 7612 rfs4_openowner_rele(oo);
7609 7613
7610 7614 end:
7611 7615 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7612 7616 OPEN4res *, resp);
7613 7617 }
7614 7618
7615 7619 /*ARGSUSED*/
7616 7620 void
7617 7621 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7618 7622 struct svc_req *req, struct compound_state *cs)
7619 7623 {
7620 7624 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7621 7625 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7622 7626 rfs4_state_t *sp;
7623 7627 nfsstat4 status;
7624 7628
7625 7629 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7626 7630 OPEN_CONFIRM4args *, args);
7627 7631
7628 7632 if (cs->vp == NULL) {
7629 7633 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7630 7634 goto out;
7631 7635 }
7632 7636
7633 7637 if (cs->vp->v_type != VREG) {
7634 7638 *cs->statusp = resp->status =
7635 7639 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7636 7640 return;
7637 7641 }
7638 7642
7639 7643 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7640 7644 if (status != NFS4_OK) {
7641 7645 *cs->statusp = resp->status = status;
7642 7646 goto out;
7643 7647 }
7644 7648
7645 7649 /* Ensure specified filehandle matches */
7646 7650 if (cs->vp != sp->rs_finfo->rf_vp) {
7647 7651 rfs4_state_rele(sp);
7648 7652 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7649 7653 goto out;
7650 7654 }
7651 7655
7652 7656 /* hold off other access to open_owner while we tinker */
7653 7657 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7654 7658
7655 7659 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7656 7660 case NFS4_CHECK_STATEID_OKAY:
7657 7661 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7658 7662 resop) != 0) {
7659 7663 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7660 7664 break;
7661 7665 }
7662 7666 /*
7663 7667 * If it is the appropriate stateid and determined to
7664 7668 * be "OKAY" then this means that the stateid does not
7665 7669 * need to be confirmed and the client is in error for
7666 7670 * sending an OPEN_CONFIRM.
7667 7671 */
7668 7672 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7669 7673 break;
7670 7674 case NFS4_CHECK_STATEID_OLD:
7671 7675 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7672 7676 break;
7673 7677 case NFS4_CHECK_STATEID_BAD:
7674 7678 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7675 7679 break;
7676 7680 case NFS4_CHECK_STATEID_EXPIRED:
7677 7681 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7678 7682 break;
7679 7683 case NFS4_CHECK_STATEID_CLOSED:
7680 7684 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7681 7685 break;
7682 7686 case NFS4_CHECK_STATEID_REPLAY:
7683 7687 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7684 7688 resop)) {
7685 7689 case NFS4_CHKSEQ_OKAY:
7686 7690 /*
7687 7691 * This is replayed stateid; if seqid matches
7688 7692 * next expected, then client is using wrong seqid.
7689 7693 */
7690 7694 /* fall through */
7691 7695 case NFS4_CHKSEQ_BAD:
7692 7696 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7693 7697 break;
7694 7698 case NFS4_CHKSEQ_REPLAY:
7695 7699 /*
7696 7700 * Note this case is the duplicate case so
7697 7701 * resp->status is already set.
7698 7702 */
7699 7703 *cs->statusp = resp->status;
7700 7704 rfs4_update_lease(sp->rs_owner->ro_client);
7701 7705 break;
7702 7706 }
7703 7707 break;
7704 7708 case NFS4_CHECK_STATEID_UNCONFIRMED:
7705 7709 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7706 7710 resop) != NFS4_CHKSEQ_OKAY) {
7707 7711 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7708 7712 break;
7709 7713 }
7710 7714 *cs->statusp = resp->status = NFS4_OK;
7711 7715
7712 7716 next_stateid(&sp->rs_stateid);
7713 7717 resp->open_stateid = sp->rs_stateid.stateid;
7714 7718 sp->rs_owner->ro_need_confirm = FALSE;
7715 7719 rfs4_update_lease(sp->rs_owner->ro_client);
7716 7720 rfs4_update_open_sequence(sp->rs_owner);
7717 7721 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7718 7722 break;
7719 7723 default:
7720 7724 ASSERT(FALSE);
7721 7725 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7722 7726 break;
7723 7727 }
7724 7728 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7725 7729 rfs4_state_rele(sp);
7726 7730
7727 7731 out:
7728 7732 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7729 7733 OPEN_CONFIRM4res *, resp);
7730 7734 }
7731 7735
7732 7736 /*ARGSUSED*/
7733 7737 void
7734 7738 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7735 7739 struct svc_req *req, struct compound_state *cs)
7736 7740 {
7737 7741 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7738 7742 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7739 7743 uint32_t access = args->share_access;
7740 7744 uint32_t deny = args->share_deny;
7741 7745 nfsstat4 status;
7742 7746 rfs4_state_t *sp;
7743 7747 rfs4_file_t *fp;
7744 7748 int fflags = 0;
7745 7749
7746 7750 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7747 7751 OPEN_DOWNGRADE4args *, args);
7748 7752
7749 7753 if (cs->vp == NULL) {
7750 7754 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7751 7755 goto out;
7752 7756 }
7753 7757
7754 7758 if (cs->vp->v_type != VREG) {
7755 7759 *cs->statusp = resp->status = NFS4ERR_INVAL;
7756 7760 return;
7757 7761 }
7758 7762
7759 7763 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7760 7764 if (status != NFS4_OK) {
7761 7765 *cs->statusp = resp->status = status;
7762 7766 goto out;
7763 7767 }
7764 7768
7765 7769 /* Ensure specified filehandle matches */
7766 7770 if (cs->vp != sp->rs_finfo->rf_vp) {
7767 7771 rfs4_state_rele(sp);
7768 7772 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7769 7773 goto out;
7770 7774 }
7771 7775
7772 7776 /* hold off other access to open_owner while we tinker */
7773 7777 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7774 7778
7775 7779 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7776 7780 case NFS4_CHECK_STATEID_OKAY:
7777 7781 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7778 7782 resop) != NFS4_CHKSEQ_OKAY) {
7779 7783 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7780 7784 goto end;
7781 7785 }
7782 7786 break;
7783 7787 case NFS4_CHECK_STATEID_OLD:
7784 7788 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7785 7789 goto end;
7786 7790 case NFS4_CHECK_STATEID_BAD:
7787 7791 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7788 7792 goto end;
7789 7793 case NFS4_CHECK_STATEID_EXPIRED:
7790 7794 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7791 7795 goto end;
7792 7796 case NFS4_CHECK_STATEID_CLOSED:
7793 7797 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7794 7798 goto end;
7795 7799 case NFS4_CHECK_STATEID_UNCONFIRMED:
7796 7800 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7797 7801 goto end;
7798 7802 case NFS4_CHECK_STATEID_REPLAY:
7799 7803 /* Check the sequence id for the open owner */
7800 7804 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7801 7805 resop)) {
7802 7806 case NFS4_CHKSEQ_OKAY:
7803 7807 /*
7804 7808 * This is replayed stateid; if seqid matches
7805 7809 * next expected, then client is using wrong seqid.
7806 7810 */
7807 7811 /* fall through */
7808 7812 case NFS4_CHKSEQ_BAD:
7809 7813 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7810 7814 goto end;
7811 7815 case NFS4_CHKSEQ_REPLAY:
7812 7816 /*
7813 7817 * Note this case is the duplicate case so
7814 7818 * resp->status is already set.
7815 7819 */
7816 7820 *cs->statusp = resp->status;
7817 7821 rfs4_update_lease(sp->rs_owner->ro_client);
7818 7822 goto end;
7819 7823 }
7820 7824 break;
7821 7825 default:
7822 7826 ASSERT(FALSE);
7823 7827 break;
7824 7828 }
7825 7829
7826 7830 rfs4_dbe_lock(sp->rs_dbe);
7827 7831 /*
7828 7832 * Check that the new access modes and deny modes are valid.
7829 7833 * Check that no invalid bits are set.
7830 7834 */
7831 7835 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7832 7836 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7833 7837 *cs->statusp = resp->status = NFS4ERR_INVAL;
7834 7838 rfs4_update_open_sequence(sp->rs_owner);
7835 7839 rfs4_dbe_unlock(sp->rs_dbe);
7836 7840 goto end;
7837 7841 }
7838 7842
7839 7843 /*
7840 7844 * The new modes must be a subset of the current modes and
7841 7845 * the access must specify at least one mode. To test that
7842 7846 * the new mode is a subset of the current modes we bitwise
7843 7847 * AND them together and check that the result equals the new
7844 7848 * mode. For example:
7845 7849 * New mode, access == R and current mode, sp->rs_open_access == RW
7846 7850 * access & sp->rs_open_access == R == access, so the new access mode
7847 7851 * is valid. Consider access == RW, sp->rs_open_access = R
7848 7852 * access & sp->rs_open_access == R != access, so the new access mode
7849 7853 * is invalid.
7850 7854 */
7851 7855 if ((access & sp->rs_open_access) != access ||
7852 7856 (deny & sp->rs_open_deny) != deny ||
7853 7857 (access &
7854 7858 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7855 7859 *cs->statusp = resp->status = NFS4ERR_INVAL;
7856 7860 rfs4_update_open_sequence(sp->rs_owner);
7857 7861 rfs4_dbe_unlock(sp->rs_dbe);
7858 7862 goto end;
7859 7863 }
7860 7864
7861 7865 /*
7862 7866 * Release any share locks associated with this stateID.
7863 7867 * Strictly speaking, this violates the spec because the
7864 7868 * spec effectively requires that open downgrade be atomic.
7865 7869 * At present, fs_shrlock does not have this capability.
7866 7870 */
7867 7871 (void) rfs4_unshare(sp);
7868 7872
7869 7873 status = rfs4_share(sp, access, deny);
7870 7874 if (status != NFS4_OK) {
7871 7875 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7872 7876 rfs4_update_open_sequence(sp->rs_owner);
7873 7877 rfs4_dbe_unlock(sp->rs_dbe);
7874 7878 goto end;
7875 7879 }
7876 7880
7877 7881 fp = sp->rs_finfo;
7878 7882 rfs4_dbe_lock(fp->rf_dbe);
7879 7883
7880 7884 /*
7881 7885 * If the current mode has deny read and the new mode
7882 7886 * does not, decrement the number of deny read mode bits
7883 7887 * and if it goes to zero turn off the deny read bit
7884 7888 * on the file.
7885 7889 */
7886 7890 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7887 7891 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7888 7892 fp->rf_deny_read--;
7889 7893 if (fp->rf_deny_read == 0)
7890 7894 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7891 7895 }
7892 7896
7893 7897 /*
7894 7898 * If the current mode has deny write and the new mode
7895 7899 * does not, decrement the number of deny write mode bits
7896 7900 * and if it goes to zero turn off the deny write bit
7897 7901 * on the file.
7898 7902 */
7899 7903 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7900 7904 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7901 7905 fp->rf_deny_write--;
7902 7906 if (fp->rf_deny_write == 0)
7903 7907 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7904 7908 }
7905 7909
7906 7910 /*
7907 7911 * If the current mode has access read and the new mode
7908 7912 * does not, decrement the number of access read mode bits
7909 7913 * and if it goes to zero turn off the access read bit
7910 7914 * on the file. set fflags to FREAD for the call to
7911 7915 * vn_open_downgrade().
7912 7916 */
7913 7917 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7914 7918 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7915 7919 fp->rf_access_read--;
7916 7920 if (fp->rf_access_read == 0)
7917 7921 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7918 7922 fflags |= FREAD;
7919 7923 }
7920 7924
7921 7925 /*
7922 7926 * If the current mode has access write and the new mode
7923 7927 * does not, decrement the number of access write mode bits
7924 7928 * and if it goes to zero turn off the access write bit
7925 7929 * on the file. set fflags to FWRITE for the call to
7926 7930 * vn_open_downgrade().
7927 7931 */
7928 7932 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7929 7933 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7930 7934 fp->rf_access_write--;
7931 7935 if (fp->rf_access_write == 0)
7932 7936 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7933 7937 fflags |= FWRITE;
7934 7938 }
7935 7939
7936 7940 /* Check that the file is still accessible */
7937 7941 ASSERT(fp->rf_share_access);
7938 7942
7939 7943 rfs4_dbe_unlock(fp->rf_dbe);
7940 7944
7941 7945 /* now set the new open access and deny modes */
7942 7946 sp->rs_open_access = access;
7943 7947 sp->rs_open_deny = deny;
7944 7948
7945 7949 /*
7946 7950 * we successfully downgraded the share lock, now we need to downgrade
7947 7951 * the open. it is possible that the downgrade was only for a deny
7948 7952 * mode and we have nothing else to do.
7949 7953 */
7950 7954 if ((fflags & (FREAD|FWRITE)) != 0)
7951 7955 vn_open_downgrade(cs->vp, fflags);
7952 7956
7953 7957 /* Update the stateid */
7954 7958 next_stateid(&sp->rs_stateid);
7955 7959 resp->open_stateid = sp->rs_stateid.stateid;
7956 7960
7957 7961 rfs4_dbe_unlock(sp->rs_dbe);
7958 7962
7959 7963 *cs->statusp = resp->status = NFS4_OK;
7960 7964 /* Update the lease */
7961 7965 rfs4_update_lease(sp->rs_owner->ro_client);
7962 7966 /* And the sequence */
7963 7967 rfs4_update_open_sequence(sp->rs_owner);
7964 7968 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7965 7969
7966 7970 end:
7967 7971 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7968 7972 rfs4_state_rele(sp);
7969 7973 out:
7970 7974 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7971 7975 OPEN_DOWNGRADE4res *, resp);
7972 7976 }
7973 7977
7974 7978 static void *
7975 7979 memstr(const void *s1, const char *s2, size_t n)
7976 7980 {
7977 7981 size_t l = strlen(s2);
7978 7982 char *p = (char *)s1;
7979 7983
7980 7984 while (n >= l) {
7981 7985 if (bcmp(p, s2, l) == 0)
7982 7986 return (p);
7983 7987 p++;
7984 7988 n--;
7985 7989 }
7986 7990
7987 7991 return (NULL);
7988 7992 }
7989 7993
7990 7994 /*
7991 7995 * The logic behind this function is detailed in the NFSv4 RFC in the
7992 7996 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7993 7997 * that section for explicit guidance to server behavior for
7994 7998 * SETCLIENTID.
7995 7999 */
7996 8000 void
7997 8001 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7998 8002 struct svc_req *req, struct compound_state *cs)
7999 8003 {
8000 8004 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8001 8005 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8002 8006 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8003 8007 rfs4_clntip_t *ci;
8004 8008 bool_t create;
8005 8009 char *addr, *netid;
8006 8010 int len;
8007 8011
8008 8012 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8009 8013 SETCLIENTID4args *, args);
8010 8014 retry:
8011 8015 newcp = cp_confirmed = cp_unconfirmed = NULL;
8012 8016
8013 8017 /*
8014 8018 * Save the caller's IP address
8015 8019 */
8016 8020 args->client.cl_addr =
8017 8021 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8018 8022
8019 8023 /*
8020 8024 * Record if it is a Solaris client that cannot handle referrals.
8021 8025 */
8022 8026 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8023 8027 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8024 8028 /* Add a "yes, it's downrev" record */
8025 8029 create = TRUE;
8026 8030 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8027 8031 ASSERT(ci != NULL);
8028 8032 rfs4_dbe_rele(ci->ri_dbe);
8029 8033 } else {
8030 8034 /* Remove any previous record */
8031 8035 rfs4_invalidate_clntip(args->client.cl_addr);
8032 8036 }
8033 8037
8034 8038 /*
8035 8039 * In search of an EXISTING client matching the incoming
8036 8040 * request to establish a new client identifier at the server
8037 8041 */
8038 8042 create = TRUE;
8039 8043 cp = rfs4_findclient(&args->client, &create, NULL);
8040 8044
8041 8045 /* Should never happen */
8042 8046 ASSERT(cp != NULL);
8043 8047
8044 8048 if (cp == NULL) {
8045 8049 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8046 8050 goto out;
8047 8051 }
8048 8052
8049 8053 /*
8050 8054 * Easiest case. Client identifier is newly created and is
8051 8055 * unconfirmed. Also note that for this case, no other
8052 8056 * entries exist for the client identifier. Nothing else to
8053 8057 * check. Just setup the response and respond.
8054 8058 */
8055 8059 if (create) {
8056 8060 *cs->statusp = res->status = NFS4_OK;
8057 8061 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8058 8062 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8059 8063 cp->rc_confirm_verf;
8060 8064 /* Setup callback information; CB_NULL confirmation later */
8061 8065 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8062 8066
8063 8067 rfs4_client_rele(cp);
8064 8068 goto out;
8065 8069 }
8066 8070
8067 8071 /*
8068 8072 * An existing, confirmed client may exist but it may not have
8069 8073 * been active for at least one lease period. If so, then
8070 8074 * "close" the client and create a new client identifier
8071 8075 */
8072 8076 if (rfs4_lease_expired(cp)) {
8073 8077 rfs4_client_close(cp);
8074 8078 goto retry;
8075 8079 }
8076 8080
8077 8081 if (cp->rc_need_confirm == TRUE)
8078 8082 cp_unconfirmed = cp;
8079 8083 else
8080 8084 cp_confirmed = cp;
8081 8085
8082 8086 cp = NULL;
8083 8087
8084 8088 /*
8085 8089 * We have a confirmed client, now check for an
8086 8090 * unconfimred entry
8087 8091 */
8088 8092 if (cp_confirmed) {
8089 8093 /* If creds don't match then client identifier is inuse */
8090 8094 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8091 8095 rfs4_cbinfo_t *cbp;
8092 8096 /*
8093 8097 * Some one else has established this client
8094 8098 * id. Try and say * who they are. We will use
8095 8099 * the call back address supplied by * the
8096 8100 * first client.
8097 8101 */
8098 8102 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8099 8103
8100 8104 addr = netid = NULL;
8101 8105
8102 8106 cbp = &cp_confirmed->rc_cbinfo;
8103 8107 if (cbp->cb_callback.cb_location.r_addr &&
8104 8108 cbp->cb_callback.cb_location.r_netid) {
8105 8109 cb_client4 *cbcp = &cbp->cb_callback;
8106 8110
8107 8111 len = strlen(cbcp->cb_location.r_addr)+1;
8108 8112 addr = kmem_alloc(len, KM_SLEEP);
8109 8113 bcopy(cbcp->cb_location.r_addr, addr, len);
8110 8114 len = strlen(cbcp->cb_location.r_netid)+1;
8111 8115 netid = kmem_alloc(len, KM_SLEEP);
8112 8116 bcopy(cbcp->cb_location.r_netid, netid, len);
8113 8117 }
8114 8118
8115 8119 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8116 8120 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8117 8121
8118 8122 rfs4_client_rele(cp_confirmed);
8119 8123 }
8120 8124
8121 8125 /*
8122 8126 * Confirmed, creds match, and verifier matches; must
8123 8127 * be an update of the callback info
8124 8128 */
8125 8129 if (cp_confirmed->rc_nfs_client.verifier ==
8126 8130 args->client.verifier) {
8127 8131 /* Setup callback information */
8128 8132 rfs4_client_setcb(cp_confirmed, &args->callback,
8129 8133 args->callback_ident);
8130 8134
8131 8135 /* everything okay -- move ahead */
8132 8136 *cs->statusp = res->status = NFS4_OK;
8133 8137 res->SETCLIENTID4res_u.resok4.clientid =
8134 8138 cp_confirmed->rc_clientid;
8135 8139
8136 8140 /* update the confirm_verifier and return it */
8137 8141 rfs4_client_scv_next(cp_confirmed);
8138 8142 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8139 8143 cp_confirmed->rc_confirm_verf;
8140 8144
8141 8145 rfs4_client_rele(cp_confirmed);
8142 8146 goto out;
8143 8147 }
8144 8148
8145 8149 /*
8146 8150 * Creds match but the verifier doesn't. Must search
8147 8151 * for an unconfirmed client that would be replaced by
8148 8152 * this request.
8149 8153 */
8150 8154 create = FALSE;
8151 8155 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8152 8156 cp_confirmed);
8153 8157 }
8154 8158
8155 8159 /*
8156 8160 * At this point, we have taken care of the brand new client
8157 8161 * struct, INUSE case, update of an existing, and confirmed
8158 8162 * client struct.
8159 8163 */
8160 8164
8161 8165 /*
8162 8166 * check to see if things have changed while we originally
8163 8167 * picked up the client struct. If they have, then return and
8164 8168 * retry the processing of this SETCLIENTID request.
8165 8169 */
8166 8170 if (cp_unconfirmed) {
8167 8171 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8168 8172 if (!cp_unconfirmed->rc_need_confirm) {
8169 8173 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8170 8174 rfs4_client_rele(cp_unconfirmed);
8171 8175 if (cp_confirmed)
8172 8176 rfs4_client_rele(cp_confirmed);
8173 8177 goto retry;
8174 8178 }
8175 8179 /* do away with the old unconfirmed one */
8176 8180 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8177 8181 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8178 8182 rfs4_client_rele(cp_unconfirmed);
8179 8183 cp_unconfirmed = NULL;
8180 8184 }
8181 8185
8182 8186 /*
8183 8187 * This search will temporarily hide the confirmed client
8184 8188 * struct while a new client struct is created as the
8185 8189 * unconfirmed one.
8186 8190 */
8187 8191 create = TRUE;
8188 8192 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8189 8193
8190 8194 ASSERT(newcp != NULL);
8191 8195
8192 8196 if (newcp == NULL) {
8193 8197 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8194 8198 rfs4_client_rele(cp_confirmed);
8195 8199 goto out;
8196 8200 }
8197 8201
8198 8202 /*
8199 8203 * If one was not created, then a similar request must be in
8200 8204 * process so release and start over with this one
8201 8205 */
8202 8206 if (create != TRUE) {
8203 8207 rfs4_client_rele(newcp);
8204 8208 if (cp_confirmed)
8205 8209 rfs4_client_rele(cp_confirmed);
8206 8210 goto retry;
8207 8211 }
8208 8212
8209 8213 *cs->statusp = res->status = NFS4_OK;
8210 8214 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8211 8215 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8212 8216 newcp->rc_confirm_verf;
8213 8217 /* Setup callback information; CB_NULL confirmation later */
8214 8218 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8215 8219
8216 8220 newcp->rc_cp_confirmed = cp_confirmed;
8217 8221
8218 8222 rfs4_client_rele(newcp);
8219 8223
8220 8224 out:
8221 8225 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8222 8226 SETCLIENTID4res *, res);
8223 8227 }
8224 8228
8225 8229 /*ARGSUSED*/
8226 8230 void
8227 8231 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8228 8232 struct svc_req *req, struct compound_state *cs)
8229 8233 {
8230 8234 SETCLIENTID_CONFIRM4args *args =
8231 8235 &argop->nfs_argop4_u.opsetclientid_confirm;
8232 8236 SETCLIENTID_CONFIRM4res *res =
8233 8237 &resop->nfs_resop4_u.opsetclientid_confirm;
8234 8238 rfs4_client_t *cp, *cptoclose = NULL;
8235 8239 nfs4_srv_t *nsrv4;
8236 8240
8237 8241 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8238 8242 struct compound_state *, cs,
8239 8243 SETCLIENTID_CONFIRM4args *, args);
8240 8244
8241 8245 nsrv4 = nfs4_get_srv();
8242 8246 *cs->statusp = res->status = NFS4_OK;
8243 8247
8244 8248 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8245 8249
8246 8250 if (cp == NULL) {
8247 8251 *cs->statusp = res->status =
8248 8252 rfs4_check_clientid(&args->clientid, 1);
8249 8253 goto out;
8250 8254 }
8251 8255
8252 8256 if (!creds_ok(cp, req, cs)) {
8253 8257 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8254 8258 rfs4_client_rele(cp);
8255 8259 goto out;
8256 8260 }
8257 8261
8258 8262 /* If the verifier doesn't match, the record doesn't match */
8259 8263 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8260 8264 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8261 8265 rfs4_client_rele(cp);
8262 8266 goto out;
8263 8267 }
8264 8268
8265 8269 rfs4_dbe_lock(cp->rc_dbe);
8266 8270 cp->rc_need_confirm = FALSE;
8267 8271 if (cp->rc_cp_confirmed) {
8268 8272 cptoclose = cp->rc_cp_confirmed;
8269 8273 cptoclose->rc_ss_remove = 1;
8270 8274 cp->rc_cp_confirmed = NULL;
8271 8275 }
8272 8276
8273 8277 /*
8274 8278 * Update the client's associated server instance, if it's changed
8275 8279 * since the client was created.
8276 8280 */
8277 8281 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8278 8282 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8279 8283
8280 8284 /*
8281 8285 * Record clientid in stable storage.
8282 8286 * Must be done after server instance has been assigned.
8283 8287 */
8284 8288 rfs4_ss_clid(nsrv4, cp);
8285 8289
8286 8290 rfs4_dbe_unlock(cp->rc_dbe);
8287 8291
8288 8292 if (cptoclose)
8289 8293 /* don't need to rele, client_close does it */
8290 8294 rfs4_client_close(cptoclose);
8291 8295
8292 8296 /* If needed, initiate CB_NULL call for callback path */
8293 8297 rfs4_deleg_cb_check(cp);
8294 8298 rfs4_update_lease(cp);
8295 8299
8296 8300 /*
8297 8301 * Check to see if client can perform reclaims
8298 8302 */
8299 8303 rfs4_ss_chkclid(nsrv4, cp);
8300 8304
8301 8305 rfs4_client_rele(cp);
8302 8306
8303 8307 out:
8304 8308 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8305 8309 struct compound_state *, cs,
8306 8310 SETCLIENTID_CONFIRM4 *, res);
8307 8311 }
8308 8312
8309 8313
8310 8314 /*ARGSUSED*/
8311 8315 void
8312 8316 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8313 8317 struct svc_req *req, struct compound_state *cs)
8314 8318 {
8315 8319 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8316 8320 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8317 8321 rfs4_state_t *sp;
8318 8322 nfsstat4 status;
8319 8323
8320 8324 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8321 8325 CLOSE4args *, args);
8322 8326
8323 8327 if (cs->vp == NULL) {
8324 8328 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8325 8329 goto out;
8326 8330 }
8327 8331
8328 8332 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8329 8333 if (status != NFS4_OK) {
8330 8334 *cs->statusp = resp->status = status;
8331 8335 goto out;
8332 8336 }
8333 8337
8334 8338 /* Ensure specified filehandle matches */
8335 8339 if (cs->vp != sp->rs_finfo->rf_vp) {
8336 8340 rfs4_state_rele(sp);
8337 8341 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8338 8342 goto out;
8339 8343 }
8340 8344
8341 8345 /* hold off other access to open_owner while we tinker */
8342 8346 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8343 8347
8344 8348 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8345 8349 case NFS4_CHECK_STATEID_OKAY:
8346 8350 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8347 8351 resop) != NFS4_CHKSEQ_OKAY) {
8348 8352 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8349 8353 goto end;
8350 8354 }
8351 8355 break;
8352 8356 case NFS4_CHECK_STATEID_OLD:
8353 8357 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8354 8358 goto end;
8355 8359 case NFS4_CHECK_STATEID_BAD:
8356 8360 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8357 8361 goto end;
8358 8362 case NFS4_CHECK_STATEID_EXPIRED:
8359 8363 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8360 8364 goto end;
8361 8365 case NFS4_CHECK_STATEID_CLOSED:
8362 8366 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8363 8367 goto end;
8364 8368 case NFS4_CHECK_STATEID_UNCONFIRMED:
8365 8369 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8366 8370 goto end;
8367 8371 case NFS4_CHECK_STATEID_REPLAY:
8368 8372 /* Check the sequence id for the open owner */
8369 8373 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8370 8374 resop)) {
8371 8375 case NFS4_CHKSEQ_OKAY:
8372 8376 /*
8373 8377 * This is replayed stateid; if seqid matches
8374 8378 * next expected, then client is using wrong seqid.
8375 8379 */
8376 8380 /* FALL THROUGH */
8377 8381 case NFS4_CHKSEQ_BAD:
8378 8382 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8379 8383 goto end;
8380 8384 case NFS4_CHKSEQ_REPLAY:
8381 8385 /*
8382 8386 * Note this case is the duplicate case so
8383 8387 * resp->status is already set.
8384 8388 */
8385 8389 *cs->statusp = resp->status;
8386 8390 rfs4_update_lease(sp->rs_owner->ro_client);
8387 8391 goto end;
8388 8392 }
8389 8393 break;
8390 8394 default:
8391 8395 ASSERT(FALSE);
8392 8396 break;
8393 8397 }
8394 8398
8395 8399 rfs4_dbe_lock(sp->rs_dbe);
8396 8400
8397 8401 /* Update the stateid. */
8398 8402 next_stateid(&sp->rs_stateid);
8399 8403 resp->open_stateid = sp->rs_stateid.stateid;
8400 8404
8401 8405 rfs4_dbe_unlock(sp->rs_dbe);
8402 8406
8403 8407 rfs4_update_lease(sp->rs_owner->ro_client);
8404 8408 rfs4_update_open_sequence(sp->rs_owner);
8405 8409 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8406 8410
8407 8411 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8408 8412
8409 8413 *cs->statusp = resp->status = status;
8410 8414
8411 8415 end:
8412 8416 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8413 8417 rfs4_state_rele(sp);
8414 8418 out:
8415 8419 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8416 8420 CLOSE4res *, resp);
8417 8421 }
8418 8422
8419 8423 /*
8420 8424 * Manage the counts on the file struct and close all file locks
8421 8425 */
8422 8426 /*ARGSUSED*/
8423 8427 void
8424 8428 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8425 8429 bool_t close_of_client)
8426 8430 {
8427 8431 rfs4_file_t *fp = sp->rs_finfo;
8428 8432 rfs4_lo_state_t *lsp;
8429 8433 int fflags = 0;
8430 8434
8431 8435 /*
8432 8436 * If this call is part of the larger closing down of client
8433 8437 * state then it is just easier to release all locks
8434 8438 * associated with this client instead of going through each
8435 8439 * individual file and cleaning locks there.
8436 8440 */
8437 8441 if (close_of_client) {
8438 8442 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8439 8443 !list_is_empty(&sp->rs_lostatelist) &&
8440 8444 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8441 8445 /* Is the PxFS kernel module loaded? */
8442 8446 if (lm_remove_file_locks != NULL) {
8443 8447 int new_sysid;
8444 8448
8445 8449 /* Encode the cluster nodeid in new sysid */
8446 8450 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8447 8451 lm_set_nlmid_flk(&new_sysid);
8448 8452
8449 8453 /*
8450 8454 * This PxFS routine removes file locks for a
8451 8455 * client over all nodes of a cluster.
8452 8456 */
8453 8457 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8454 8458 "lm_remove_file_locks(sysid=0x%x)\n",
8455 8459 new_sysid));
8456 8460 (*lm_remove_file_locks)(new_sysid);
8457 8461 } else {
8458 8462 struct flock64 flk;
8459 8463
8460 8464 /* Release all locks for this client */
8461 8465 flk.l_type = F_UNLKSYS;
8462 8466 flk.l_whence = 0;
8463 8467 flk.l_start = 0;
8464 8468 flk.l_len = 0;
8465 8469 flk.l_sysid =
8466 8470 sp->rs_owner->ro_client->rc_sysidt;
8467 8471 flk.l_pid = 0;
8468 8472 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8469 8473 &flk, F_REMOTELOCK | FREAD | FWRITE,
8470 8474 (u_offset_t)0, NULL, CRED(), NULL);
8471 8475 }
8472 8476
8473 8477 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8474 8478 }
8475 8479 }
8476 8480
8477 8481 /*
8478 8482 * Release all locks on this file by this lock owner or at
8479 8483 * least mark the locks as having been released
8480 8484 */
8481 8485 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8482 8486 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8483 8487 lsp->rls_locks_cleaned = TRUE;
8484 8488
8485 8489 /* Was this already taken care of above? */
8486 8490 if (!close_of_client &&
8487 8491 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8488 8492 (void) cleanlocks(sp->rs_finfo->rf_vp,
8489 8493 lsp->rls_locker->rl_pid,
8490 8494 lsp->rls_locker->rl_client->rc_sysidt);
8491 8495 }
8492 8496
8493 8497 /*
8494 8498 * Release any shrlocks associated with this open state ID.
8495 8499 * This must be done before the rfs4_state gets marked closed.
8496 8500 */
8497 8501 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8498 8502 (void) rfs4_unshare(sp);
8499 8503
8500 8504 if (sp->rs_open_access) {
8501 8505 rfs4_dbe_lock(fp->rf_dbe);
8502 8506
8503 8507 /*
8504 8508 * Decrement the count for each access and deny bit that this
8505 8509 * state has contributed to the file.
8506 8510 * If the file counts go to zero
8507 8511 * clear the appropriate bit in the appropriate mask.
8508 8512 */
8509 8513 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8510 8514 fp->rf_access_read--;
8511 8515 fflags |= FREAD;
8512 8516 if (fp->rf_access_read == 0)
8513 8517 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8514 8518 }
8515 8519 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8516 8520 fp->rf_access_write--;
8517 8521 fflags |= FWRITE;
8518 8522 if (fp->rf_access_write == 0)
8519 8523 fp->rf_share_access &=
8520 8524 ~OPEN4_SHARE_ACCESS_WRITE;
8521 8525 }
8522 8526 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8523 8527 fp->rf_deny_read--;
8524 8528 if (fp->rf_deny_read == 0)
8525 8529 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8526 8530 }
8527 8531 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8528 8532 fp->rf_deny_write--;
8529 8533 if (fp->rf_deny_write == 0)
8530 8534 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8531 8535 }
8532 8536
8533 8537 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8534 8538
8535 8539 rfs4_dbe_unlock(fp->rf_dbe);
8536 8540
8537 8541 sp->rs_open_access = 0;
8538 8542 sp->rs_open_deny = 0;
8539 8543 }
8540 8544 }
8541 8545
8542 8546 /*
8543 8547 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8544 8548 */
8545 8549 static nfsstat4
8546 8550 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8547 8551 {
8548 8552 rfs4_lockowner_t *lo;
8549 8553 rfs4_client_t *cp;
8550 8554 uint32_t len;
8551 8555
8552 8556 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8553 8557 if (lo != NULL) {
8554 8558 cp = lo->rl_client;
8555 8559 if (rfs4_lease_expired(cp)) {
8556 8560 rfs4_lockowner_rele(lo);
8557 8561 rfs4_dbe_hold(cp->rc_dbe);
8558 8562 rfs4_client_close(cp);
8559 8563 return (NFS4ERR_EXPIRED);
8560 8564 }
8561 8565 dp->owner.clientid = lo->rl_owner.clientid;
8562 8566 len = lo->rl_owner.owner_len;
8563 8567 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8564 8568 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8565 8569 dp->owner.owner_len = len;
8566 8570 rfs4_lockowner_rele(lo);
8567 8571 goto finish;
8568 8572 }
8569 8573
8570 8574 /*
8571 8575 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8572 8576 * of the client id contain the boot time for a NFS4 lock. So we
8573 8577 * fabricate and identity by setting clientid to the sysid, and
8574 8578 * the lock owner to the pid.
8575 8579 */
8576 8580 dp->owner.clientid = flk->l_sysid;
8577 8581 len = sizeof (pid_t);
8578 8582 dp->owner.owner_len = len;
8579 8583 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8580 8584 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8581 8585 finish:
8582 8586 dp->offset = flk->l_start;
8583 8587 dp->length = flk->l_len;
8584 8588
8585 8589 if (flk->l_type == F_RDLCK)
8586 8590 dp->locktype = READ_LT;
8587 8591 else if (flk->l_type == F_WRLCK)
8588 8592 dp->locktype = WRITE_LT;
8589 8593 else
8590 8594 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8591 8595
8592 8596 return (NFS4_OK);
8593 8597 }
8594 8598
8595 8599 /*
8596 8600 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8597 8601 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8598 8602 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8599 8603 * for that (obviously); they are sending the LOCK requests with some delays
8600 8604 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8601 8605 * locking and delay implementation at the client side.
8602 8606 *
8603 8607 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8604 8608 * fast retries on its own (the for loop below) in a hope the lock will be
8605 8609 * available soon. And if not, the client won't need to resend the LOCK
8606 8610 * requests so fast to check the lock availability. This basically saves some
8607 8611 * network traffic and tries to make sure the client gets the lock ASAP.
8608 8612 */
8609 8613 static int
8610 8614 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8611 8615 {
8612 8616 int error;
8613 8617 struct flock64 flk;
8614 8618 int i;
8615 8619 clock_t delaytime;
8616 8620 int cmd;
8617 8621 int spin_cnt = 0;
8618 8622
8619 8623 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8620 8624 retry:
8621 8625 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8622 8626
8623 8627 for (i = 0; i < rfs4_maxlock_tries; i++) {
8624 8628 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8625 8629 error = VOP_FRLOCK(vp, cmd,
8626 8630 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8627 8631
8628 8632 if (error != EAGAIN && error != EACCES)
8629 8633 break;
8630 8634
8631 8635 if (i < rfs4_maxlock_tries - 1) {
8632 8636 delay(delaytime);
8633 8637 delaytime *= 2;
8634 8638 }
8635 8639 }
8636 8640
8637 8641 if (error == EAGAIN || error == EACCES) {
8638 8642 /* Get the owner of the lock */
8639 8643 flk = *flock;
8640 8644 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8641 8645 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8642 8646 NULL) == 0) {
8643 8647 /*
8644 8648 * There's a race inherent in the current VOP_FRLOCK
8645 8649 * design where:
8646 8650 * a: "other guy" takes a lock that conflicts with a
8647 8651 * lock we want
8648 8652 * b: we attempt to take our lock (non-blocking) and
8649 8653 * the attempt fails.
8650 8654 * c: "other guy" releases the conflicting lock
8651 8655 * d: we ask what lock conflicts with the lock we want,
8652 8656 * getting F_UNLCK (no lock blocks us)
8653 8657 *
8654 8658 * If we retry the non-blocking lock attempt in this
8655 8659 * case (restart at step 'b') there's some possibility
8656 8660 * that many such attempts might fail. However a test
8657 8661 * designed to actually provoke this race shows that
8658 8662 * the vast majority of cases require no retry, and
8659 8663 * only a few took as many as three retries. Here's
8660 8664 * the test outcome:
8661 8665 *
8662 8666 * number of retries how many times we needed
8663 8667 * that many retries
8664 8668 * 0 79461
8665 8669 * 1 862
8666 8670 * 2 49
8667 8671 * 3 5
8668 8672 *
8669 8673 * Given those empirical results, we arbitrarily limit
8670 8674 * the retry count to ten.
8671 8675 *
8672 8676 * If we actually make to ten retries and give up,
8673 8677 * nothing catastrophic happens, but we're unable to
8674 8678 * return the information about the conflicting lock to
8675 8679 * the NFS client. That's an acceptable trade off vs.
8676 8680 * letting this retry loop run forever.
8677 8681 */
8678 8682 if (flk.l_type == F_UNLCK) {
8679 8683 if (spin_cnt++ < 10) {
8680 8684 /* No longer locked, retry */
8681 8685 goto retry;
8682 8686 }
8683 8687 } else {
8684 8688 *flock = flk;
8685 8689 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8686 8690 F_GETLK, &flk);
8687 8691 }
8688 8692 }
8689 8693 }
8690 8694
8691 8695 return (error);
8692 8696 }
8693 8697
8694 8698 /*ARGSUSED*/
8695 8699 static nfsstat4
8696 8700 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8697 8701 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8698 8702 {
8699 8703 nfsstat4 status;
8700 8704 rfs4_lockowner_t *lo = lsp->rls_locker;
8701 8705 rfs4_state_t *sp = lsp->rls_state;
8702 8706 struct flock64 flock;
8703 8707 int16_t ltype;
8704 8708 int flag;
8705 8709 int error;
8706 8710 sysid_t sysid;
8707 8711 LOCK4res *lres;
8708 8712 vnode_t *vp;
8709 8713
8710 8714 if (rfs4_lease_expired(lo->rl_client)) {
8711 8715 return (NFS4ERR_EXPIRED);
8712 8716 }
8713 8717
8714 8718 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8715 8719 return (status);
8716 8720
8717 8721 /* Check for zero length. To lock to end of file use all ones for V4 */
8718 8722 if (length == 0)
8719 8723 return (NFS4ERR_INVAL);
8720 8724 else if (length == (length4)(~0))
8721 8725 length = 0; /* Posix to end of file */
8722 8726
8723 8727 retry:
8724 8728 rfs4_dbe_lock(sp->rs_dbe);
8725 8729 if (sp->rs_closed == TRUE) {
8726 8730 rfs4_dbe_unlock(sp->rs_dbe);
8727 8731 return (NFS4ERR_OLD_STATEID);
8728 8732 }
8729 8733
8730 8734 if (resop->resop != OP_LOCKU) {
8731 8735 switch (locktype) {
8732 8736 case READ_LT:
8733 8737 case READW_LT:
8734 8738 if ((sp->rs_share_access
8735 8739 & OPEN4_SHARE_ACCESS_READ) == 0) {
8736 8740 rfs4_dbe_unlock(sp->rs_dbe);
8737 8741
8738 8742 return (NFS4ERR_OPENMODE);
8739 8743 }
8740 8744 ltype = F_RDLCK;
8741 8745 break;
8742 8746 case WRITE_LT:
8743 8747 case WRITEW_LT:
8744 8748 if ((sp->rs_share_access
8745 8749 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8746 8750 rfs4_dbe_unlock(sp->rs_dbe);
8747 8751
8748 8752 return (NFS4ERR_OPENMODE);
8749 8753 }
8750 8754 ltype = F_WRLCK;
8751 8755 break;
8752 8756 }
8753 8757 } else
8754 8758 ltype = F_UNLCK;
8755 8759
8756 8760 flock.l_type = ltype;
8757 8761 flock.l_whence = 0; /* SEEK_SET */
8758 8762 flock.l_start = offset;
8759 8763 flock.l_len = length;
8760 8764 flock.l_sysid = sysid;
8761 8765 flock.l_pid = lsp->rls_locker->rl_pid;
8762 8766
8763 8767 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8764 8768 if (flock.l_len < 0 || flock.l_start < 0) {
8765 8769 rfs4_dbe_unlock(sp->rs_dbe);
8766 8770 return (NFS4ERR_INVAL);
8767 8771 }
8768 8772
8769 8773 /*
8770 8774 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8771 8775 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8772 8776 */
8773 8777 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8774 8778
8775 8779 vp = sp->rs_finfo->rf_vp;
8776 8780 VN_HOLD(vp);
8777 8781
8778 8782 /*
8779 8783 * We need to unlock sp before we call the underlying filesystem to
8780 8784 * acquire the file lock.
8781 8785 */
8782 8786 rfs4_dbe_unlock(sp->rs_dbe);
8783 8787
8784 8788 error = setlock(vp, &flock, flag, cred);
8785 8789
8786 8790 /*
8787 8791 * Make sure the file is still open. In a case the file was closed in
8788 8792 * the meantime, clean the lock we acquired using the setlock() call
8789 8793 * above, and return the appropriate error.
8790 8794 */
8791 8795 rfs4_dbe_lock(sp->rs_dbe);
8792 8796 if (sp->rs_closed == TRUE) {
8793 8797 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8794 8798 rfs4_dbe_unlock(sp->rs_dbe);
8795 8799
8796 8800 VN_RELE(vp);
8797 8801
8798 8802 return (NFS4ERR_OLD_STATEID);
8799 8803 }
8800 8804 rfs4_dbe_unlock(sp->rs_dbe);
8801 8805
8802 8806 VN_RELE(vp);
8803 8807
8804 8808 if (error == 0) {
8805 8809 rfs4_dbe_lock(lsp->rls_dbe);
8806 8810 next_stateid(&lsp->rls_lockid);
8807 8811 rfs4_dbe_unlock(lsp->rls_dbe);
8808 8812 }
8809 8813
8810 8814 /*
8811 8815 * N.B. We map error values to nfsv4 errors. This is differrent
8812 8816 * than puterrno4 routine.
8813 8817 */
8814 8818 switch (error) {
8815 8819 case 0:
8816 8820 status = NFS4_OK;
8817 8821 break;
8818 8822 case EAGAIN:
8819 8823 case EACCES: /* Old value */
8820 8824 /* Can only get here if op is OP_LOCK */
8821 8825 ASSERT(resop->resop == OP_LOCK);
8822 8826 lres = &resop->nfs_resop4_u.oplock;
8823 8827 status = NFS4ERR_DENIED;
8824 8828 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8825 8829 == NFS4ERR_EXPIRED)
8826 8830 goto retry;
8827 8831 break;
8828 8832 case ENOLCK:
8829 8833 status = NFS4ERR_DELAY;
8830 8834 break;
8831 8835 case EOVERFLOW:
8832 8836 status = NFS4ERR_INVAL;
8833 8837 break;
8834 8838 case EINVAL:
8835 8839 status = NFS4ERR_NOTSUPP;
8836 8840 break;
8837 8841 default:
8838 8842 status = NFS4ERR_SERVERFAULT;
8839 8843 break;
8840 8844 }
8841 8845
8842 8846 return (status);
8843 8847 }
8844 8848
8845 8849 /*ARGSUSED*/
8846 8850 void
8847 8851 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8848 8852 struct svc_req *req, struct compound_state *cs)
8849 8853 {
8850 8854 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8851 8855 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8852 8856 nfsstat4 status;
8853 8857 stateid4 *stateid;
8854 8858 rfs4_lockowner_t *lo;
8855 8859 rfs4_client_t *cp;
8856 8860 rfs4_state_t *sp = NULL;
8857 8861 rfs4_lo_state_t *lsp = NULL;
8858 8862 bool_t ls_sw_held = FALSE;
8859 8863 bool_t create = TRUE;
8860 8864 bool_t lcreate = TRUE;
8861 8865 bool_t dup_lock = FALSE;
8862 8866 int rc;
8863 8867
8864 8868 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8865 8869 LOCK4args *, args);
8866 8870
8867 8871 if (cs->vp == NULL) {
8868 8872 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8869 8873 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8870 8874 cs, LOCK4res *, resp);
8871 8875 return;
8872 8876 }
8873 8877
8874 8878 if (args->locker.new_lock_owner) {
8875 8879 /* Create a new lockowner for this instance */
8876 8880 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8877 8881
8878 8882 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8879 8883
8880 8884 stateid = &olo->open_stateid;
8881 8885 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8882 8886 if (status != NFS4_OK) {
8883 8887 NFS4_DEBUG(rfs4_debug,
8884 8888 (CE_NOTE, "Get state failed in lock %d", status));
8885 8889 *cs->statusp = resp->status = status;
8886 8890 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8887 8891 cs, LOCK4res *, resp);
8888 8892 return;
8889 8893 }
8890 8894
8891 8895 /* Ensure specified filehandle matches */
8892 8896 if (cs->vp != sp->rs_finfo->rf_vp) {
8893 8897 rfs4_state_rele(sp);
8894 8898 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8895 8899 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8896 8900 cs, LOCK4res *, resp);
8897 8901 return;
8898 8902 }
8899 8903
8900 8904 /* hold off other access to open_owner while we tinker */
8901 8905 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8902 8906
8903 8907 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8904 8908 case NFS4_CHECK_STATEID_OLD:
8905 8909 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8906 8910 goto end;
8907 8911 case NFS4_CHECK_STATEID_BAD:
8908 8912 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8909 8913 goto end;
8910 8914 case NFS4_CHECK_STATEID_EXPIRED:
8911 8915 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8912 8916 goto end;
8913 8917 case NFS4_CHECK_STATEID_UNCONFIRMED:
8914 8918 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8915 8919 goto end;
8916 8920 case NFS4_CHECK_STATEID_CLOSED:
8917 8921 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8918 8922 goto end;
8919 8923 case NFS4_CHECK_STATEID_OKAY:
8920 8924 case NFS4_CHECK_STATEID_REPLAY:
8921 8925 switch (rfs4_check_olo_seqid(olo->open_seqid,
8922 8926 sp->rs_owner, resop)) {
8923 8927 case NFS4_CHKSEQ_OKAY:
8924 8928 if (rc == NFS4_CHECK_STATEID_OKAY)
8925 8929 break;
8926 8930 /*
8927 8931 * This is replayed stateid; if seqid
8928 8932 * matches next expected, then client
8929 8933 * is using wrong seqid.
8930 8934 */
8931 8935 /* FALLTHROUGH */
8932 8936 case NFS4_CHKSEQ_BAD:
8933 8937 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8934 8938 goto end;
8935 8939 case NFS4_CHKSEQ_REPLAY:
8936 8940 /* This is a duplicate LOCK request */
8937 8941 dup_lock = TRUE;
8938 8942
8939 8943 /*
8940 8944 * For a duplicate we do not want to
8941 8945 * create a new lockowner as it should
8942 8946 * already exist.
8943 8947 * Turn off the lockowner create flag.
8944 8948 */
8945 8949 lcreate = FALSE;
8946 8950 }
8947 8951 break;
8948 8952 }
8949 8953
8950 8954 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8951 8955 if (lo == NULL) {
8952 8956 NFS4_DEBUG(rfs4_debug,
8953 8957 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8954 8958 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8955 8959 goto end;
8956 8960 }
8957 8961
8958 8962 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8959 8963 if (lsp == NULL) {
8960 8964 rfs4_update_lease(sp->rs_owner->ro_client);
8961 8965 /*
8962 8966 * Only update theh open_seqid if this is not
8963 8967 * a duplicate request
8964 8968 */
8965 8969 if (dup_lock == FALSE) {
8966 8970 rfs4_update_open_sequence(sp->rs_owner);
8967 8971 }
8968 8972
8969 8973 NFS4_DEBUG(rfs4_debug,
8970 8974 (CE_NOTE, "rfs4_op_lock: no state"));
8971 8975 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8972 8976 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8973 8977 rfs4_lockowner_rele(lo);
8974 8978 goto end;
8975 8979 }
8976 8980
8977 8981 /*
8978 8982 * This is the new_lock_owner branch and the client is
8979 8983 * supposed to be associating a new lock_owner with
8980 8984 * the open file at this point. If we find that a
8981 8985 * lock_owner/state association already exists and a
8982 8986 * successful LOCK request was returned to the client,
8983 8987 * an error is returned to the client since this is
8984 8988 * not appropriate. The client should be using the
8985 8989 * existing lock_owner branch.
8986 8990 */
8987 8991 if (dup_lock == FALSE && create == FALSE) {
8988 8992 if (lsp->rls_lock_completed == TRUE) {
8989 8993 *cs->statusp =
8990 8994 resp->status = NFS4ERR_BAD_SEQID;
8991 8995 rfs4_lockowner_rele(lo);
8992 8996 goto end;
8993 8997 }
8994 8998 }
8995 8999
8996 9000 rfs4_update_lease(sp->rs_owner->ro_client);
8997 9001
8998 9002 /*
8999 9003 * Only update theh open_seqid if this is not
9000 9004 * a duplicate request
9001 9005 */
9002 9006 if (dup_lock == FALSE) {
9003 9007 rfs4_update_open_sequence(sp->rs_owner);
9004 9008 }
9005 9009
9006 9010 /*
9007 9011 * If this is a duplicate lock request, just copy the
9008 9012 * previously saved reply and return.
9009 9013 */
9010 9014 if (dup_lock == TRUE) {
9011 9015 /* verify that lock_seqid's match */
9012 9016 if (lsp->rls_seqid != olo->lock_seqid) {
9013 9017 NFS4_DEBUG(rfs4_debug,
9014 9018 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9015 9019 "lsp->seqid=%d old->seqid=%d",
9016 9020 lsp->rls_seqid, olo->lock_seqid));
9017 9021 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9018 9022 } else {
9019 9023 rfs4_copy_reply(resop, &lsp->rls_reply);
9020 9024 /*
9021 9025 * Make sure to copy the just
9022 9026 * retrieved reply status into the
9023 9027 * overall compound status
9024 9028 */
9025 9029 *cs->statusp = resp->status;
9026 9030 }
9027 9031 rfs4_lockowner_rele(lo);
9028 9032 goto end;
9029 9033 }
9030 9034
9031 9035 rfs4_dbe_lock(lsp->rls_dbe);
9032 9036
9033 9037 /* Make sure to update the lock sequence id */
9034 9038 lsp->rls_seqid = olo->lock_seqid;
9035 9039
9036 9040 NFS4_DEBUG(rfs4_debug,
9037 9041 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9038 9042
9039 9043 /*
9040 9044 * This is used to signify the newly created lockowner
9041 9045 * stateid and its sequence number. The checks for
9042 9046 * sequence number and increment don't occur on the
9043 9047 * very first lock request for a lockowner.
9044 9048 */
9045 9049 lsp->rls_skip_seqid_check = TRUE;
9046 9050
9047 9051 /* hold off other access to lsp while we tinker */
9048 9052 rfs4_sw_enter(&lsp->rls_sw);
9049 9053 ls_sw_held = TRUE;
9050 9054
9051 9055 rfs4_dbe_unlock(lsp->rls_dbe);
9052 9056
9053 9057 rfs4_lockowner_rele(lo);
9054 9058 } else {
9055 9059 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9056 9060 /* get lsp and hold the lock on the underlying file struct */
9057 9061 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9058 9062 != NFS4_OK) {
9059 9063 *cs->statusp = resp->status = status;
9060 9064 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9061 9065 cs, LOCK4res *, resp);
9062 9066 return;
9063 9067 }
9064 9068 create = FALSE; /* We didn't create lsp */
9065 9069
9066 9070 /* Ensure specified filehandle matches */
9067 9071 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9068 9072 rfs4_lo_state_rele(lsp, TRUE);
9069 9073 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9070 9074 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9071 9075 cs, LOCK4res *, resp);
9072 9076 return;
9073 9077 }
9074 9078
9075 9079 /* hold off other access to lsp while we tinker */
9076 9080 rfs4_sw_enter(&lsp->rls_sw);
9077 9081 ls_sw_held = TRUE;
9078 9082
9079 9083 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9080 9084 /*
9081 9085 * The stateid looks like it was okay (expected to be
9082 9086 * the next one)
9083 9087 */
9084 9088 case NFS4_CHECK_STATEID_OKAY:
9085 9089 /*
9086 9090 * The sequence id is now checked. Determine
9087 9091 * if this is a replay or if it is in the
9088 9092 * expected (next) sequence. In the case of a
9089 9093 * replay, there are two replay conditions
9090 9094 * that may occur. The first is the normal
9091 9095 * condition where a LOCK is done with a
9092 9096 * NFS4_OK response and the stateid is
9093 9097 * updated. That case is handled below when
9094 9098 * the stateid is identified as a REPLAY. The
9095 9099 * second is the case where an error is
9096 9100 * returned, like NFS4ERR_DENIED, and the
9097 9101 * sequence number is updated but the stateid
9098 9102 * is not updated. This second case is dealt
9099 9103 * with here. So it may seem odd that the
9100 9104 * stateid is okay but the sequence id is a
9101 9105 * replay but it is okay.
9102 9106 */
9103 9107 switch (rfs4_check_lock_seqid(
9104 9108 args->locker.locker4_u.lock_owner.lock_seqid,
9105 9109 lsp, resop)) {
9106 9110 case NFS4_CHKSEQ_REPLAY:
9107 9111 if (resp->status != NFS4_OK) {
9108 9112 /*
9109 9113 * Here is our replay and need
9110 9114 * to verify that the last
9111 9115 * response was an error.
9112 9116 */
9113 9117 *cs->statusp = resp->status;
9114 9118 goto end;
9115 9119 }
9116 9120 /*
9117 9121 * This is done since the sequence id
9118 9122 * looked like a replay but it didn't
9119 9123 * pass our check so a BAD_SEQID is
9120 9124 * returned as a result.
9121 9125 */
9122 9126 /*FALLTHROUGH*/
9123 9127 case NFS4_CHKSEQ_BAD:
9124 9128 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9125 9129 goto end;
9126 9130 case NFS4_CHKSEQ_OKAY:
9127 9131 /* Everything looks okay move ahead */
9128 9132 break;
9129 9133 }
9130 9134 break;
9131 9135 case NFS4_CHECK_STATEID_OLD:
9132 9136 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9133 9137 goto end;
9134 9138 case NFS4_CHECK_STATEID_BAD:
9135 9139 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9136 9140 goto end;
9137 9141 case NFS4_CHECK_STATEID_EXPIRED:
9138 9142 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9139 9143 goto end;
9140 9144 case NFS4_CHECK_STATEID_CLOSED:
9141 9145 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9142 9146 goto end;
9143 9147 case NFS4_CHECK_STATEID_REPLAY:
9144 9148 switch (rfs4_check_lock_seqid(
9145 9149 args->locker.locker4_u.lock_owner.lock_seqid,
9146 9150 lsp, resop)) {
9147 9151 case NFS4_CHKSEQ_OKAY:
9148 9152 /*
9149 9153 * This is a replayed stateid; if
9150 9154 * seqid matches the next expected,
9151 9155 * then client is using wrong seqid.
9152 9156 */
9153 9157 case NFS4_CHKSEQ_BAD:
9154 9158 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9155 9159 goto end;
9156 9160 case NFS4_CHKSEQ_REPLAY:
9157 9161 rfs4_update_lease(lsp->rls_locker->rl_client);
9158 9162 *cs->statusp = status = resp->status;
9159 9163 goto end;
9160 9164 }
9161 9165 break;
9162 9166 default:
9163 9167 ASSERT(FALSE);
9164 9168 break;
9165 9169 }
9166 9170
9167 9171 rfs4_update_lock_sequence(lsp);
9168 9172 rfs4_update_lease(lsp->rls_locker->rl_client);
9169 9173 }
9170 9174
9171 9175 /*
9172 9176 * NFS4 only allows locking on regular files, so
9173 9177 * verify type of object.
9174 9178 */
9175 9179 if (cs->vp->v_type != VREG) {
9176 9180 if (cs->vp->v_type == VDIR)
9177 9181 status = NFS4ERR_ISDIR;
9178 9182 else
9179 9183 status = NFS4ERR_INVAL;
9180 9184 goto out;
9181 9185 }
9182 9186
9183 9187 cp = lsp->rls_state->rs_owner->ro_client;
9184 9188
9185 9189 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9186 9190 status = NFS4ERR_GRACE;
9187 9191 goto out;
9188 9192 }
9189 9193
9190 9194 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9191 9195 status = NFS4ERR_NO_GRACE;
9192 9196 goto out;
9193 9197 }
9194 9198
9195 9199 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9196 9200 status = NFS4ERR_NO_GRACE;
9197 9201 goto out;
9198 9202 }
9199 9203
9200 9204 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9201 9205 cs->deleg = TRUE;
9202 9206
9203 9207 status = rfs4_do_lock(lsp, args->locktype,
9204 9208 args->offset, args->length, cs->cr, resop);
9205 9209
9206 9210 out:
9207 9211 lsp->rls_skip_seqid_check = FALSE;
9208 9212
9209 9213 *cs->statusp = resp->status = status;
9210 9214
9211 9215 if (status == NFS4_OK) {
9212 9216 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9213 9217 lsp->rls_lock_completed = TRUE;
9214 9218 }
9215 9219 /*
9216 9220 * Only update the "OPEN" response here if this was a new
9217 9221 * lock_owner
9218 9222 */
9219 9223 if (sp)
9220 9224 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9221 9225
9222 9226 rfs4_update_lock_resp(lsp, resop);
9223 9227
9224 9228 end:
9225 9229 if (lsp) {
9226 9230 if (ls_sw_held)
9227 9231 rfs4_sw_exit(&lsp->rls_sw);
9228 9232 /*
9229 9233 * If an sp obtained, then the lsp does not represent
9230 9234 * a lock on the file struct.
9231 9235 */
9232 9236 if (sp != NULL)
9233 9237 rfs4_lo_state_rele(lsp, FALSE);
9234 9238 else
9235 9239 rfs4_lo_state_rele(lsp, TRUE);
9236 9240 }
9237 9241 if (sp) {
9238 9242 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9239 9243 rfs4_state_rele(sp);
9240 9244 }
9241 9245
9242 9246 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9243 9247 LOCK4res *, resp);
9244 9248 }
9245 9249
9246 9250 /* free function for LOCK/LOCKT */
9247 9251 static void
9248 9252 lock_denied_free(nfs_resop4 *resop)
9249 9253 {
9250 9254 LOCK4denied *dp = NULL;
9251 9255
9252 9256 switch (resop->resop) {
9253 9257 case OP_LOCK:
9254 9258 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9255 9259 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9256 9260 break;
9257 9261 case OP_LOCKT:
9258 9262 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9259 9263 dp = &resop->nfs_resop4_u.oplockt.denied;
9260 9264 break;
9261 9265 default:
9262 9266 break;
9263 9267 }
9264 9268
9265 9269 if (dp)
9266 9270 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9267 9271 }
9268 9272
9269 9273 /*ARGSUSED*/
9270 9274 void
9271 9275 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9272 9276 struct svc_req *req, struct compound_state *cs)
9273 9277 {
9274 9278 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9275 9279 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9276 9280 nfsstat4 status;
9277 9281 stateid4 *stateid = &args->lock_stateid;
9278 9282 rfs4_lo_state_t *lsp;
9279 9283
9280 9284 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9281 9285 LOCKU4args *, args);
9282 9286
9283 9287 if (cs->vp == NULL) {
9284 9288 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9285 9289 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9286 9290 LOCKU4res *, resp);
9287 9291 return;
9288 9292 }
9289 9293
9290 9294 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9291 9295 *cs->statusp = resp->status = status;
9292 9296 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9293 9297 LOCKU4res *, resp);
9294 9298 return;
9295 9299 }
9296 9300
9297 9301 /* Ensure specified filehandle matches */
9298 9302 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9299 9303 rfs4_lo_state_rele(lsp, TRUE);
9300 9304 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9301 9305 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9302 9306 LOCKU4res *, resp);
9303 9307 return;
9304 9308 }
9305 9309
9306 9310 /* hold off other access to lsp while we tinker */
9307 9311 rfs4_sw_enter(&lsp->rls_sw);
9308 9312
9309 9313 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9310 9314 case NFS4_CHECK_STATEID_OKAY:
9311 9315 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9312 9316 != NFS4_CHKSEQ_OKAY) {
9313 9317 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9314 9318 goto end;
9315 9319 }
9316 9320 break;
9317 9321 case NFS4_CHECK_STATEID_OLD:
9318 9322 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9319 9323 goto end;
9320 9324 case NFS4_CHECK_STATEID_BAD:
9321 9325 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9322 9326 goto end;
9323 9327 case NFS4_CHECK_STATEID_EXPIRED:
9324 9328 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9325 9329 goto end;
9326 9330 case NFS4_CHECK_STATEID_CLOSED:
9327 9331 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9328 9332 goto end;
9329 9333 case NFS4_CHECK_STATEID_REPLAY:
9330 9334 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9331 9335 case NFS4_CHKSEQ_OKAY:
9332 9336 /*
9333 9337 * This is a replayed stateid; if
9334 9338 * seqid matches the next expected,
9335 9339 * then client is using wrong seqid.
9336 9340 */
9337 9341 case NFS4_CHKSEQ_BAD:
9338 9342 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9339 9343 goto end;
9340 9344 case NFS4_CHKSEQ_REPLAY:
9341 9345 rfs4_update_lease(lsp->rls_locker->rl_client);
9342 9346 *cs->statusp = status = resp->status;
9343 9347 goto end;
9344 9348 }
9345 9349 break;
9346 9350 default:
9347 9351 ASSERT(FALSE);
9348 9352 break;
9349 9353 }
9350 9354
9351 9355 rfs4_update_lock_sequence(lsp);
9352 9356 rfs4_update_lease(lsp->rls_locker->rl_client);
9353 9357
9354 9358 /*
9355 9359 * NFS4 only allows locking on regular files, so
9356 9360 * verify type of object.
9357 9361 */
9358 9362 if (cs->vp->v_type != VREG) {
9359 9363 if (cs->vp->v_type == VDIR)
9360 9364 status = NFS4ERR_ISDIR;
9361 9365 else
9362 9366 status = NFS4ERR_INVAL;
9363 9367 goto out;
9364 9368 }
9365 9369
9366 9370 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9367 9371 status = NFS4ERR_GRACE;
9368 9372 goto out;
9369 9373 }
9370 9374
9371 9375 status = rfs4_do_lock(lsp, args->locktype,
9372 9376 args->offset, args->length, cs->cr, resop);
9373 9377
9374 9378 out:
9375 9379 *cs->statusp = resp->status = status;
9376 9380
9377 9381 if (status == NFS4_OK)
9378 9382 resp->lock_stateid = lsp->rls_lockid.stateid;
9379 9383
9380 9384 rfs4_update_lock_resp(lsp, resop);
9381 9385
9382 9386 end:
9383 9387 rfs4_sw_exit(&lsp->rls_sw);
9384 9388 rfs4_lo_state_rele(lsp, TRUE);
9385 9389
9386 9390 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9387 9391 LOCKU4res *, resp);
9388 9392 }
9389 9393
9390 9394 /*
9391 9395 * LOCKT is a best effort routine, the client can not be guaranteed that
9392 9396 * the status return is still in effect by the time the reply is received.
9393 9397 * They are numerous race conditions in this routine, but we are not required
9394 9398 * and can not be accurate.
9395 9399 */
9396 9400 /*ARGSUSED*/
9397 9401 void
9398 9402 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9399 9403 struct svc_req *req, struct compound_state *cs)
9400 9404 {
9401 9405 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9402 9406 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9403 9407 rfs4_lockowner_t *lo;
9404 9408 rfs4_client_t *cp;
9405 9409 bool_t create = FALSE;
9406 9410 struct flock64 flk;
9407 9411 int error;
9408 9412 int flag = FREAD | FWRITE;
9409 9413 int ltype;
9410 9414 length4 posix_length;
9411 9415 sysid_t sysid;
9412 9416 pid_t pid;
9413 9417
9414 9418 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9415 9419 LOCKT4args *, args);
9416 9420
9417 9421 if (cs->vp == NULL) {
9418 9422 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9419 9423 goto out;
9420 9424 }
9421 9425
9422 9426 /*
9423 9427 * NFS4 only allows locking on regular files, so
9424 9428 * verify type of object.
9425 9429 */
9426 9430 if (cs->vp->v_type != VREG) {
9427 9431 if (cs->vp->v_type == VDIR)
9428 9432 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9429 9433 else
9430 9434 *cs->statusp = resp->status = NFS4ERR_INVAL;
9431 9435 goto out;
9432 9436 }
9433 9437
9434 9438 /*
9435 9439 * Check out the clientid to ensure the server knows about it
9436 9440 * so that we correctly inform the client of a server reboot.
9437 9441 */
9438 9442 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9439 9443 == NULL) {
9440 9444 *cs->statusp = resp->status =
9441 9445 rfs4_check_clientid(&args->owner.clientid, 0);
9442 9446 goto out;
9443 9447 }
9444 9448 if (rfs4_lease_expired(cp)) {
9445 9449 rfs4_client_close(cp);
9446 9450 /*
9447 9451 * Protocol doesn't allow returning NFS4ERR_STALE as
9448 9452 * other operations do on this check so STALE_CLIENTID
9449 9453 * is returned instead
9450 9454 */
9451 9455 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9452 9456 goto out;
9453 9457 }
9454 9458
9455 9459 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9456 9460 *cs->statusp = resp->status = NFS4ERR_GRACE;
9457 9461 rfs4_client_rele(cp);
9458 9462 goto out;
9459 9463 }
9460 9464 rfs4_client_rele(cp);
9461 9465
9462 9466 resp->status = NFS4_OK;
9463 9467
9464 9468 switch (args->locktype) {
9465 9469 case READ_LT:
9466 9470 case READW_LT:
9467 9471 ltype = F_RDLCK;
9468 9472 break;
9469 9473 case WRITE_LT:
9470 9474 case WRITEW_LT:
9471 9475 ltype = F_WRLCK;
9472 9476 break;
9473 9477 }
9474 9478
9475 9479 posix_length = args->length;
9476 9480 /* Check for zero length. To lock to end of file use all ones for V4 */
9477 9481 if (posix_length == 0) {
9478 9482 *cs->statusp = resp->status = NFS4ERR_INVAL;
9479 9483 goto out;
9480 9484 } else if (posix_length == (length4)(~0)) {
9481 9485 posix_length = 0; /* Posix to end of file */
9482 9486 }
9483 9487
9484 9488 /* Find or create a lockowner */
9485 9489 lo = rfs4_findlockowner(&args->owner, &create);
9486 9490
9487 9491 if (lo) {
9488 9492 pid = lo->rl_pid;
9489 9493 if ((resp->status =
9490 9494 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9491 9495 goto err;
9492 9496 } else {
9493 9497 pid = 0;
9494 9498 sysid = lockt_sysid;
9495 9499 }
9496 9500 retry:
9497 9501 flk.l_type = ltype;
9498 9502 flk.l_whence = 0; /* SEEK_SET */
9499 9503 flk.l_start = args->offset;
9500 9504 flk.l_len = posix_length;
9501 9505 flk.l_sysid = sysid;
9502 9506 flk.l_pid = pid;
9503 9507 flag |= F_REMOTELOCK;
9504 9508
9505 9509 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9506 9510
9507 9511 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9508 9512 if (flk.l_len < 0 || flk.l_start < 0) {
9509 9513 resp->status = NFS4ERR_INVAL;
9510 9514 goto err;
9511 9515 }
9512 9516 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9513 9517 NULL, cs->cr, NULL);
9514 9518
9515 9519 /*
9516 9520 * N.B. We map error values to nfsv4 errors. This is differrent
9517 9521 * than puterrno4 routine.
9518 9522 */
9519 9523 switch (error) {
9520 9524 case 0:
9521 9525 if (flk.l_type == F_UNLCK)
9522 9526 resp->status = NFS4_OK;
9523 9527 else {
9524 9528 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9525 9529 goto retry;
9526 9530 resp->status = NFS4ERR_DENIED;
9527 9531 }
9528 9532 break;
9529 9533 case EOVERFLOW:
9530 9534 resp->status = NFS4ERR_INVAL;
9531 9535 break;
9532 9536 case EINVAL:
9533 9537 resp->status = NFS4ERR_NOTSUPP;
9534 9538 break;
9535 9539 default:
9536 9540 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9537 9541 error);
9538 9542 resp->status = NFS4ERR_SERVERFAULT;
9539 9543 break;
9540 9544 }
9541 9545
9542 9546 err:
9543 9547 if (lo)
9544 9548 rfs4_lockowner_rele(lo);
9545 9549 *cs->statusp = resp->status;
9546 9550 out:
9547 9551 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9548 9552 LOCKT4res *, resp);
9549 9553 }
9550 9554
9551 9555 int
9552 9556 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9553 9557 {
9554 9558 int err;
9555 9559 int cmd;
9556 9560 vnode_t *vp;
9557 9561 struct shrlock shr;
9558 9562 struct shr_locowner shr_loco;
9559 9563 int fflags = 0;
9560 9564
9561 9565 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9562 9566 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9563 9567
9564 9568 if (sp->rs_closed)
9565 9569 return (NFS4ERR_OLD_STATEID);
9566 9570
9567 9571 vp = sp->rs_finfo->rf_vp;
9568 9572 ASSERT(vp);
9569 9573
9570 9574 shr.s_access = shr.s_deny = 0;
9571 9575
9572 9576 if (access & OPEN4_SHARE_ACCESS_READ) {
9573 9577 fflags |= FREAD;
9574 9578 shr.s_access |= F_RDACC;
9575 9579 }
9576 9580 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9577 9581 fflags |= FWRITE;
9578 9582 shr.s_access |= F_WRACC;
9579 9583 }
9580 9584 ASSERT(shr.s_access);
9581 9585
9582 9586 if (deny & OPEN4_SHARE_DENY_READ)
9583 9587 shr.s_deny |= F_RDDNY;
9584 9588 if (deny & OPEN4_SHARE_DENY_WRITE)
9585 9589 shr.s_deny |= F_WRDNY;
9586 9590
9587 9591 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9588 9592 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9589 9593 shr_loco.sl_pid = shr.s_pid;
9590 9594 shr_loco.sl_id = shr.s_sysid;
9591 9595 shr.s_owner = (caddr_t)&shr_loco;
9592 9596 shr.s_own_len = sizeof (shr_loco);
9593 9597
9594 9598 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9595 9599
9596 9600 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9597 9601 if (err != 0) {
9598 9602 if (err == EAGAIN)
9599 9603 err = NFS4ERR_SHARE_DENIED;
9600 9604 else
9601 9605 err = puterrno4(err);
9602 9606 return (err);
9603 9607 }
9604 9608
9605 9609 sp->rs_share_access |= access;
9606 9610 sp->rs_share_deny |= deny;
9607 9611
9608 9612 return (0);
9609 9613 }
9610 9614
9611 9615 int
9612 9616 rfs4_unshare(rfs4_state_t *sp)
9613 9617 {
9614 9618 int err;
9615 9619 struct shrlock shr;
9616 9620 struct shr_locowner shr_loco;
9617 9621
9618 9622 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9619 9623
9620 9624 if (sp->rs_closed || sp->rs_share_access == 0)
9621 9625 return (0);
9622 9626
9623 9627 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9624 9628 ASSERT(sp->rs_finfo->rf_vp);
9625 9629
9626 9630 shr.s_access = shr.s_deny = 0;
9627 9631 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9628 9632 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9629 9633 shr_loco.sl_pid = shr.s_pid;
9630 9634 shr_loco.sl_id = shr.s_sysid;
9631 9635 shr.s_owner = (caddr_t)&shr_loco;
9632 9636 shr.s_own_len = sizeof (shr_loco);
9633 9637
9634 9638 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9635 9639 NULL);
9636 9640 if (err != 0) {
9637 9641 err = puterrno4(err);
9638 9642 return (err);
9639 9643 }
9640 9644
9641 9645 sp->rs_share_access = 0;
9642 9646 sp->rs_share_deny = 0;
9643 9647
9644 9648 return (0);
9645 9649
9646 9650 }
9647 9651
9648 9652 static int
9649 9653 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9650 9654 {
9651 9655 struct clist *wcl;
9652 9656 count4 count = rok->data_len;
9653 9657 int wlist_len;
9654 9658
9655 9659 wcl = args->wlist;
9656 9660 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9657 9661 return (FALSE);
9658 9662 }
9659 9663 wcl = args->wlist;
9660 9664 rok->wlist_len = wlist_len;
9661 9665 rok->wlist = wcl;
9662 9666 return (TRUE);
9663 9667 }
9664 9668
9665 9669 /* tunable to disable server referrals */
9666 9670 int rfs4_no_referrals = 0;
9667 9671
9668 9672 /*
9669 9673 * Find an NFS record in reparse point data.
9670 9674 * Returns 0 for success and <0 or an errno value on failure.
9671 9675 */
9672 9676 int
9673 9677 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9674 9678 {
9675 9679 int err;
9676 9680 char *stype, *val;
9677 9681 nvlist_t *nvl;
9678 9682 nvpair_t *curr;
9679 9683
9680 9684 if ((nvl = reparse_init()) == NULL)
9681 9685 return (-1);
9682 9686
9683 9687 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9684 9688 reparse_free(nvl);
9685 9689 return (err);
9686 9690 }
9687 9691
9688 9692 curr = NULL;
9689 9693 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9690 9694 if ((stype = nvpair_name(curr)) == NULL) {
9691 9695 reparse_free(nvl);
9692 9696 return (-2);
9693 9697 }
9694 9698 if (strncasecmp(stype, "NFS", 3) == 0)
9695 9699 break;
9696 9700 }
9697 9701
9698 9702 if ((curr == NULL) ||
9699 9703 (nvpair_value_string(curr, &val))) {
9700 9704 reparse_free(nvl);
9701 9705 return (-3);
9702 9706 }
9703 9707 *nvlp = nvl;
9704 9708 *svcp = stype;
9705 9709 *datap = val;
9706 9710 return (0);
9707 9711 }
9708 9712
9709 9713 int
9710 9714 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9711 9715 {
9712 9716 nvlist_t *nvl;
9713 9717 char *s, *d;
9714 9718
9715 9719 if (rfs4_no_referrals != 0)
9716 9720 return (B_FALSE);
9717 9721
9718 9722 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9719 9723 return (B_FALSE);
9720 9724
9721 9725 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9722 9726 return (B_FALSE);
9723 9727
9724 9728 reparse_free(nvl);
9725 9729
9726 9730 return (B_TRUE);
9727 9731 }
9728 9732
9729 9733 /*
9730 9734 * There is a user-level copy of this routine in ref_subr.c.
9731 9735 * Changes should be kept in sync.
9732 9736 */
9733 9737 static int
9734 9738 nfs4_create_components(char *path, component4 *comp4)
9735 9739 {
9736 9740 int slen, plen, ncomp;
9737 9741 char *ori_path, *nxtc, buf[MAXNAMELEN];
9738 9742
9739 9743 if (path == NULL)
9740 9744 return (0);
9741 9745
9742 9746 plen = strlen(path) + 1; /* include the terminator */
9743 9747 ori_path = path;
9744 9748 ncomp = 0;
9745 9749
9746 9750 /* count number of components in the path */
9747 9751 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9748 9752 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9749 9753 if ((slen = nxtc - path) == 0) {
9750 9754 path = nxtc + 1;
9751 9755 continue;
9752 9756 }
9753 9757
9754 9758 if (comp4 != NULL) {
9755 9759 bcopy(path, buf, slen);
9756 9760 buf[slen] = '\0';
9757 9761 (void) str_to_utf8(buf, &comp4[ncomp]);
9758 9762 }
9759 9763
9760 9764 ncomp++; /* 1 valid component */
9761 9765 path = nxtc + 1;
9762 9766 }
9763 9767 if (*nxtc == '\0' || *nxtc == '\n')
9764 9768 break;
9765 9769 }
9766 9770
9767 9771 return (ncomp);
9768 9772 }
9769 9773
9770 9774 /*
9771 9775 * There is a user-level copy of this routine in ref_subr.c.
9772 9776 * Changes should be kept in sync.
9773 9777 */
9774 9778 static int
9775 9779 make_pathname4(char *path, pathname4 *pathname)
9776 9780 {
9777 9781 int ncomp;
9778 9782 component4 *comp4;
9779 9783
9780 9784 if (pathname == NULL)
9781 9785 return (0);
9782 9786
9783 9787 if (path == NULL) {
9784 9788 pathname->pathname4_val = NULL;
9785 9789 pathname->pathname4_len = 0;
9786 9790 return (0);
9787 9791 }
9788 9792
9789 9793 /* count number of components to alloc buffer */
9790 9794 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9791 9795 pathname->pathname4_val = NULL;
9792 9796 pathname->pathname4_len = 0;
9793 9797 return (0);
9794 9798 }
9795 9799 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9796 9800
9797 9801 /* copy components into allocated buffer */
9798 9802 ncomp = nfs4_create_components(path, comp4);
9799 9803
9800 9804 pathname->pathname4_val = comp4;
9801 9805 pathname->pathname4_len = ncomp;
9802 9806
9803 9807 return (ncomp);
9804 9808 }
9805 9809
9806 9810 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9807 9811
9808 9812 fs_locations4 *
9809 9813 fetch_referral(vnode_t *vp, cred_t *cr)
9810 9814 {
9811 9815 nvlist_t *nvl;
9812 9816 char *stype, *sdata;
9813 9817 fs_locations4 *result;
9814 9818 char buf[1024];
9815 9819 size_t bufsize;
9816 9820 XDR xdr;
9817 9821 int err;
9818 9822
9819 9823 /*
9820 9824 * Check attrs to ensure it's a reparse point
9821 9825 */
9822 9826 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9823 9827 return (NULL);
9824 9828
9825 9829 /*
9826 9830 * Look for an NFS record and get the type and data
9827 9831 */
9828 9832 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9829 9833 return (NULL);
9830 9834
9831 9835 /*
9832 9836 * With the type and data, upcall to get the referral
9833 9837 */
9834 9838 bufsize = sizeof (buf);
9835 9839 bzero(buf, sizeof (buf));
9836 9840 err = reparse_kderef((const char *)stype, (const char *)sdata,
9837 9841 buf, &bufsize);
9838 9842 reparse_free(nvl);
9839 9843
9840 9844 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9841 9845 char *, stype, char *, sdata, char *, buf, int, err);
9842 9846 if (err) {
9843 9847 cmn_err(CE_NOTE,
9844 9848 "reparsed daemon not running: unable to get referral (%d)",
9845 9849 err);
9846 9850 return (NULL);
9847 9851 }
9848 9852
9849 9853 /*
9850 9854 * We get an XDR'ed record back from the kderef call
9851 9855 */
9852 9856 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9853 9857 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9854 9858 err = xdr_fs_locations4(&xdr, result);
9855 9859 XDR_DESTROY(&xdr);
9856 9860 if (err != TRUE) {
9857 9861 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9858 9862 int, err);
9859 9863 return (NULL);
9860 9864 }
9861 9865
9862 9866 /*
9863 9867 * Look at path to recover fs_root, ignoring the leading '/'
9864 9868 */
9865 9869 (void) make_pathname4(vp->v_path, &result->fs_root);
9866 9870
9867 9871 return (result);
9868 9872 }
9869 9873
9870 9874 char *
9871 9875 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9872 9876 {
9873 9877 fs_locations4 *fsl;
9874 9878 fs_location4 *fs;
9875 9879 char *server, *path, *symbuf;
9876 9880 static char *prefix = "/net/";
9877 9881 int i, size, npaths;
9878 9882 uint_t len;
9879 9883
9880 9884 /* Get the referral */
9881 9885 if ((fsl = fetch_referral(vp, cr)) == NULL)
9882 9886 return (NULL);
9883 9887
9884 9888 /* Deal with only the first location and first server */
9885 9889 fs = &fsl->locations_val[0];
9886 9890 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9887 9891 if (server == NULL) {
9888 9892 rfs4_free_fs_locations4(fsl);
9889 9893 kmem_free(fsl, sizeof (fs_locations4));
9890 9894 return (NULL);
9891 9895 }
9892 9896
9893 9897 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9894 9898 size = strlen(prefix) + len;
9895 9899 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9896 9900 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9897 9901
9898 9902 /* Allocate the symlink buffer and fill it */
9899 9903 symbuf = kmem_zalloc(size, KM_SLEEP);
9900 9904 (void) strcat(symbuf, prefix);
9901 9905 (void) strcat(symbuf, server);
9902 9906 kmem_free(server, len);
9903 9907
9904 9908 npaths = 0;
9905 9909 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9906 9910 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9907 9911 if (path == NULL)
9908 9912 continue;
9909 9913 (void) strcat(symbuf, "/");
9910 9914 (void) strcat(symbuf, path);
9911 9915 npaths++;
9912 9916 kmem_free(path, len);
9913 9917 }
9914 9918
9915 9919 rfs4_free_fs_locations4(fsl);
9916 9920 kmem_free(fsl, sizeof (fs_locations4));
9917 9921
9918 9922 if (strsz != NULL)
9919 9923 *strsz = size;
9920 9924 return (symbuf);
9921 9925 }
9922 9926
9923 9927 /*
9924 9928 * Check to see if we have a downrev Solaris client, so that we
9925 9929 * can send it a symlink instead of a referral.
9926 9930 */
9927 9931 int
9928 9932 client_is_downrev(struct svc_req *req)
9929 9933 {
9930 9934 struct sockaddr *ca;
9931 9935 rfs4_clntip_t *ci;
9932 9936 bool_t create = FALSE;
9933 9937 int is_downrev;
9934 9938
9935 9939 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9936 9940 ASSERT(ca);
9937 9941 ci = rfs4_find_clntip(ca, &create);
9938 9942 if (ci == NULL)
9939 9943 return (0);
9940 9944 is_downrev = ci->ri_no_referrals;
9941 9945 rfs4_dbe_rele(ci->ri_dbe);
9942 9946 return (is_downrev);
9943 9947 }
9944 9948
9945 9949 /*
9946 9950 * Do the main work of handling HA-NFSv4 Resource Group failover on
9947 9951 * Sun Cluster.
9948 9952 * We need to detect whether any RG admin paths have been added or removed,
9949 9953 * and adjust resources accordingly.
9950 9954 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9951 9955 * order to scale, the list and array of paths need to be held in more
9952 9956 * suitable data structures.
9953 9957 */
9954 9958 static void
9955 9959 hanfsv4_failover(nfs4_srv_t *nsrv4)
9956 9960 {
9957 9961 int i, start_grace, numadded_paths = 0;
9958 9962 char **added_paths = NULL;
9959 9963 rfs4_dss_path_t *dss_path;
9960 9964
9961 9965 /*
9962 9966 * Note: currently, dss_pathlist cannot be NULL, since
9963 9967 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9964 9968 * make the latter dynamically specified too, the following will
9965 9969 * need to be adjusted.
9966 9970 */
9967 9971
9968 9972 /*
9969 9973 * First, look for removed paths: RGs that have been failed-over
9970 9974 * away from this node.
9971 9975 * Walk the "currently-serving" dss_pathlist and, for each
9972 9976 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9973 9977 * from nfsd. If not, that RG path has been removed.
9974 9978 *
9975 9979 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9976 9980 * any duplicates.
9977 9981 */
9978 9982 dss_path = nsrv4->dss_pathlist;
9979 9983 do {
9980 9984 int found = 0;
9981 9985 char *path = dss_path->path;
9982 9986
9983 9987 /* used only for non-HA so may not be removed */
9984 9988 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9985 9989 dss_path = dss_path->next;
9986 9990 continue;
9987 9991 }
9988 9992
9989 9993 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9990 9994 int cmpret;
9991 9995 char *newpath = rfs4_dss_newpaths[i];
9992 9996
9993 9997 /*
9994 9998 * Since nfsd has sorted rfs4_dss_newpaths for us,
9995 9999 * once the return from strcmp is negative we know
9996 10000 * we've passed the point where "path" should be,
9997 10001 * and can stop searching: "path" has been removed.
9998 10002 */
9999 10003 cmpret = strcmp(path, newpath);
10000 10004 if (cmpret < 0)
10001 10005 break;
10002 10006 if (cmpret == 0) {
10003 10007 found = 1;
10004 10008 break;
10005 10009 }
10006 10010 }
10007 10011
10008 10012 if (found == 0) {
10009 10013 unsigned index = dss_path->index;
10010 10014 rfs4_servinst_t *sip = dss_path->sip;
10011 10015 rfs4_dss_path_t *path_next = dss_path->next;
10012 10016
10013 10017 /*
10014 10018 * This path has been removed.
10015 10019 * We must clear out the servinst reference to
10016 10020 * it, since it's now owned by another
10017 10021 * node: we should not attempt to touch it.
10018 10022 */
10019 10023 ASSERT(dss_path == sip->dss_paths[index]);
10020 10024 sip->dss_paths[index] = NULL;
10021 10025
10022 10026 /* remove from "currently-serving" list, and destroy */
10023 10027 remque(dss_path);
10024 10028 /* allow for NUL */
10025 10029 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10026 10030 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10027 10031
10028 10032 dss_path = path_next;
10029 10033 } else {
10030 10034 /* path was found; not removed */
10031 10035 dss_path = dss_path->next;
10032 10036 }
10033 10037 } while (dss_path != nsrv4->dss_pathlist);
10034 10038
10035 10039 /*
10036 10040 * Now, look for added paths: RGs that have been failed-over
10037 10041 * to this node.
10038 10042 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10039 10043 * for each path, check if it is on the "currently-serving"
10040 10044 * dss_pathlist. If not, that RG path has been added.
10041 10045 *
10042 10046 * Note: we don't do duplicate detection here; nfsd does that for us.
10043 10047 *
10044 10048 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10045 10049 * an upper bound for the size needed for added_paths[numadded_paths].
10046 10050 */
10047 10051
10048 10052 /* probably more space than we need, but guaranteed to be enough */
10049 10053 if (rfs4_dss_numnewpaths > 0) {
10050 10054 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10051 10055 added_paths = kmem_zalloc(sz, KM_SLEEP);
10052 10056 }
10053 10057
10054 10058 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10055 10059 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10056 10060 int found = 0;
10057 10061 char *newpath = rfs4_dss_newpaths[i];
10058 10062
10059 10063 dss_path = nsrv4->dss_pathlist;
10060 10064 do {
10061 10065 char *path = dss_path->path;
10062 10066
10063 10067 /* used only for non-HA */
10064 10068 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10065 10069 dss_path = dss_path->next;
10066 10070 continue;
10067 10071 }
10068 10072
10069 10073 if (strncmp(path, newpath, strlen(path)) == 0) {
10070 10074 found = 1;
10071 10075 break;
10072 10076 }
10073 10077
10074 10078 dss_path = dss_path->next;
10075 10079 } while (dss_path != nsrv4->dss_pathlist);
10076 10080
10077 10081 if (found == 0) {
10078 10082 added_paths[numadded_paths] = newpath;
10079 10083 numadded_paths++;
10080 10084 }
10081 10085 }
10082 10086
10083 10087 /* did we find any added paths? */
10084 10088 if (numadded_paths > 0) {
10085 10089
10086 10090 /* create a new server instance, and start its grace period */
10087 10091 start_grace = 1;
10088 10092 /* CSTYLED */
10089 10093 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10090 10094
10091 10095 /* read in the stable storage state from these paths */
10092 10096 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10093 10097
10094 10098 /*
10095 10099 * Multiple failovers during a grace period will cause
10096 10100 * clients of the same resource group to be partitioned
10097 10101 * into different server instances, with different
10098 10102 * grace periods. Since clients of the same resource
10099 10103 * group must be subject to the same grace period,
10100 10104 * we need to reset all currently active grace periods.
10101 10105 */
10102 10106 rfs4_grace_reset_all(nsrv4);
10103 10107 }
10104 10108
10105 10109 if (rfs4_dss_numnewpaths > 0)
10106 10110 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10107 10111 }
|
↓ open down ↓ |
7356 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX