Print this page
nfssrv: nfsstat reports zeroed data in zone
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 28 * All Rights Reserved
29 29 */
30 30
31 31 /*
32 32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 33 * Copyright 2019 Nexenta Systems, Inc.
34 34 * Copyright 2019 Nexenta by DDN, Inc.
35 35 */
36 36
37 37 #include <sys/param.h>
38 38 #include <sys/types.h>
39 39 #include <sys/systm.h>
40 40 #include <sys/cred.h>
41 41 #include <sys/buf.h>
42 42 #include <sys/vfs.h>
43 43 #include <sys/vfs_opreg.h>
44 44 #include <sys/vnode.h>
45 45 #include <sys/uio.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/statvfs.h>
49 49 #include <sys/kmem.h>
50 50 #include <sys/dirent.h>
51 51 #include <sys/cmn_err.h>
52 52 #include <sys/debug.h>
53 53 #include <sys/systeminfo.h>
54 54 #include <sys/flock.h>
55 55 #include <sys/pathname.h>
56 56 #include <sys/nbmlock.h>
57 57 #include <sys/share.h>
58 58 #include <sys/atomic.h>
59 59 #include <sys/policy.h>
60 60 #include <sys/fem.h>
61 61 #include <sys/sdt.h>
62 62 #include <sys/ddi.h>
63 63 #include <sys/zone.h>
64 64
65 65 #include <fs/fs_reparse.h>
66 66
67 67 #include <rpc/types.h>
68 68 #include <rpc/auth.h>
69 69 #include <rpc/rpcsec_gss.h>
70 70 #include <rpc/svc.h>
71 71
72 72 #include <nfs/nfs.h>
73 73 #include <nfs/nfssys.h>
74 74 #include <nfs/export.h>
75 75 #include <nfs/nfs_cmd.h>
76 76 #include <nfs/lm.h>
77 77 #include <nfs/nfs4.h>
78 78 #include <nfs/nfs4_drc.h>
79 79
80 80 #include <sys/strsubr.h>
81 81 #include <sys/strsun.h>
82 82
83 83 #include <inet/common.h>
84 84 #include <inet/ip.h>
85 85 #include <inet/ip6.h>
86 86
87 87 #include <sys/tsol/label.h>
88 88 #include <sys/tsol/tndb.h>
89 89
90 90 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
91 91 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
92 92 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
93 93 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
94 94 extern struct svc_ops rdma_svc_ops;
95 95 extern int nfs_loaned_buffers;
96 96 /* End of Tunables */
97 97
98 98 static int rdma_setup_read_data4(READ4args *, READ4res *);
99 99
100 100 /*
101 101 * Used to bump the stateid4.seqid value and show changes in the stateid
102 102 */
103 103 #define next_stateid(sp) (++(sp)->bits.chgseq)
104 104
105 105 /*
106 106 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
107 107 * This is used to return NFS4ERR_TOOSMALL when clients specify
108 108 * maxcount that isn't large enough to hold the smallest possible
109 109 * XDR encoded dirent.
110 110 *
111 111 * sizeof cookie (8 bytes) +
112 112 * sizeof name_len (4 bytes) +
113 113 * sizeof smallest (padded) name (4 bytes) +
114 114 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
115 115 * sizeof attrlist4_len (4 bytes) +
116 116 * sizeof next boolean (4 bytes)
117 117 *
118 118 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
119 119 * the smallest possible entry4 (assumes no attrs requested).
120 120 * sizeof nfsstat4 (4 bytes) +
121 121 * sizeof verifier4 (8 bytes) +
122 122 * sizeof entry4list bool (4 bytes) +
123 123 * sizeof entry4 (36 bytes) +
124 124 * sizeof eof bool (4 bytes)
125 125 *
126 126 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
127 127 * VOP_READDIR. Its value is the size of the maximum possible dirent
128 128 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
129 129 * required for a given name length. MAXNAMELEN is the maximum
130 130 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
131 131 * macros are to allow for . and .. entries -- just a minor tweak to try
132 132 * and guarantee that buffer we give to VOP_READDIR will be large enough
133 133 * to hold ., .., and the largest possible solaris dirent64.
134 134 */
135 135 #define RFS4_MINLEN_ENTRY4 36
136 136 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
137 137 #define RFS4_MINLEN_RDDIR_BUF \
138 138 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
139 139
140 140 /*
141 141 * It would be better to pad to 4 bytes since that's what XDR would do,
142 142 * but the dirents UFS gives us are already padded to 8, so just take
143 143 * what we're given. Dircount is only a hint anyway. Currently the
144 144 * solaris kernel is ASCII only, so there's no point in calling the
145 145 * UTF8 functions.
146 146 *
147 147 * dirent64: named padded to provide 8 byte struct alignment
148 148 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
149 149 *
150 150 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
151 151 *
152 152 */
153 153 #define DIRENT64_TO_DIRCOUNT(dp) \
154 154 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
155 155
156 156
157 157 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
158 158
159 159 u_longlong_t nfs4_srv_caller_id;
160 160 uint_t nfs4_srv_vkey = 0;
161 161
162 162 void rfs4_init_compound_state(struct compound_state *);
163 163
164 164 static void nullfree(caddr_t);
165 165 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 166 struct compound_state *);
167 167 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 168 struct compound_state *);
169 169 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 170 struct compound_state *);
171 171 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 172 struct compound_state *);
173 173 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 174 struct compound_state *);
175 175 static void rfs4_op_create_free(nfs_resop4 *resop);
176 176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 177 struct svc_req *, struct compound_state *);
178 178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 179 struct svc_req *, struct compound_state *);
180 180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 181 struct compound_state *);
182 182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 184 struct compound_state *);
185 185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 187 struct compound_state *);
188 188 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 189 struct compound_state *);
190 190 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 191 struct compound_state *);
192 192 static void lock_denied_free(nfs_resop4 *);
193 193 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 194 struct compound_state *);
195 195 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 196 struct compound_state *);
197 197 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 198 struct compound_state *);
199 199 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 200 struct compound_state *);
201 201 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
202 202 struct svc_req *req, struct compound_state *cs);
203 203 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 204 struct compound_state *);
205 205 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 206 struct compound_state *);
207 207 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
208 208 struct svc_req *, struct compound_state *);
209 209 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
210 210 struct svc_req *, struct compound_state *);
211 211 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 212 struct compound_state *);
213 213 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 214 struct compound_state *);
215 215 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 216 struct compound_state *);
217 217 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 218 struct compound_state *);
219 219 static void rfs4_op_read_free(nfs_resop4 *);
220 220 static void rfs4_op_readdir_free(nfs_resop4 *resop);
221 221 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 222 struct compound_state *);
223 223 static void rfs4_op_readlink_free(nfs_resop4 *);
224 224 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
225 225 struct svc_req *, struct compound_state *);
226 226 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 227 struct compound_state *);
228 228 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 229 struct compound_state *);
230 230 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 231 struct compound_state *);
232 232 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 233 struct compound_state *);
234 234 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 235 struct compound_state *);
236 236 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 237 struct compound_state *);
238 238 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 239 struct compound_state *);
240 240 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 241 struct compound_state *);
242 242 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
243 243 struct svc_req *, struct compound_state *);
244 244 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
245 245 struct svc_req *req, struct compound_state *);
246 246 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
247 247 struct compound_state *);
248 248 static void rfs4_op_secinfo_free(nfs_resop4 *);
249 249
250 250 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
251 251 struct svc_req *);
252 252 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
253 253 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
254 254
255 255
256 256 /*
257 257 * translation table for attrs
258 258 */
259 259 struct nfs4_ntov_table {
260 260 union nfs4_attr_u *na;
261 261 uint8_t amap[NFS4_MAXNUM_ATTRS];
262 262 int attrcnt;
263 263 bool_t vfsstat;
264 264 };
265 265
266 266 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
267 267 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
268 268 struct nfs4_svgetit_arg *sargp);
269 269
270 270 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
271 271 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
272 272 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
273 273
274 274 static void hanfsv4_failover(nfs4_srv_t *);
275 275
276 276 fem_t *deleg_rdops;
277 277 fem_t *deleg_wrops;
278 278
279 279 /*
280 280 * NFS4 op dispatch table
281 281 */
282 282
283 283 struct rfsv4disp {
284 284 void (*dis_proc)(); /* proc to call */
285 285 void (*dis_resfree)(); /* frees space allocated by proc */
286 286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 287 };
288 288
289 289 static struct rfsv4disp rfsv4disptab[] = {
290 290 /*
291 291 * NFS VERSION 4
292 292 */
293 293
294 294 /* RFS_NULL = 0 */
295 295 {rfs4_op_illegal, nullfree, 0},
296 296
297 297 /* UNUSED = 1 */
298 298 {rfs4_op_illegal, nullfree, 0},
299 299
300 300 /* UNUSED = 2 */
301 301 {rfs4_op_illegal, nullfree, 0},
302 302
303 303 /* OP_ACCESS = 3 */
304 304 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
305 305
306 306 /* OP_CLOSE = 4 */
307 307 {rfs4_op_close, nullfree, 0},
308 308
309 309 /* OP_COMMIT = 5 */
310 310 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
311 311
312 312 /* OP_CREATE = 6 */
313 313 {rfs4_op_create, nullfree, 0},
314 314
315 315 /* OP_DELEGPURGE = 7 */
316 316 {rfs4_op_delegpurge, nullfree, 0},
317 317
318 318 /* OP_DELEGRETURN = 8 */
319 319 {rfs4_op_delegreturn, nullfree, 0},
320 320
321 321 /* OP_GETATTR = 9 */
322 322 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
323 323
324 324 /* OP_GETFH = 10 */
325 325 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
326 326
327 327 /* OP_LINK = 11 */
328 328 {rfs4_op_link, nullfree, 0},
329 329
330 330 /* OP_LOCK = 12 */
331 331 {rfs4_op_lock, lock_denied_free, 0},
332 332
333 333 /* OP_LOCKT = 13 */
334 334 {rfs4_op_lockt, lock_denied_free, 0},
335 335
336 336 /* OP_LOCKU = 14 */
337 337 {rfs4_op_locku, nullfree, 0},
338 338
339 339 /* OP_LOOKUP = 15 */
340 340 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
341 341
342 342 /* OP_LOOKUPP = 16 */
343 343 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
344 344
345 345 /* OP_NVERIFY = 17 */
346 346 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
347 347
348 348 /* OP_OPEN = 18 */
349 349 {rfs4_op_open, rfs4_free_reply, 0},
350 350
351 351 /* OP_OPENATTR = 19 */
352 352 {rfs4_op_openattr, nullfree, 0},
353 353
354 354 /* OP_OPEN_CONFIRM = 20 */
355 355 {rfs4_op_open_confirm, nullfree, 0},
356 356
357 357 /* OP_OPEN_DOWNGRADE = 21 */
358 358 {rfs4_op_open_downgrade, nullfree, 0},
359 359
360 360 /* OP_OPEN_PUTFH = 22 */
361 361 {rfs4_op_putfh, nullfree, RPC_ALL},
362 362
363 363 /* OP_PUTPUBFH = 23 */
364 364 {rfs4_op_putpubfh, nullfree, RPC_ALL},
365 365
366 366 /* OP_PUTROOTFH = 24 */
367 367 {rfs4_op_putrootfh, nullfree, RPC_ALL},
368 368
369 369 /* OP_READ = 25 */
370 370 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
371 371
372 372 /* OP_READDIR = 26 */
373 373 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
374 374
375 375 /* OP_READLINK = 27 */
376 376 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
377 377
378 378 /* OP_REMOVE = 28 */
379 379 {rfs4_op_remove, nullfree, 0},
380 380
381 381 /* OP_RENAME = 29 */
382 382 {rfs4_op_rename, nullfree, 0},
383 383
384 384 /* OP_RENEW = 30 */
385 385 {rfs4_op_renew, nullfree, 0},
386 386
387 387 /* OP_RESTOREFH = 31 */
388 388 {rfs4_op_restorefh, nullfree, RPC_ALL},
389 389
390 390 /* OP_SAVEFH = 32 */
391 391 {rfs4_op_savefh, nullfree, RPC_ALL},
392 392
393 393 /* OP_SECINFO = 33 */
394 394 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
395 395
396 396 /* OP_SETATTR = 34 */
397 397 {rfs4_op_setattr, nullfree, 0},
398 398
399 399 /* OP_SETCLIENTID = 35 */
400 400 {rfs4_op_setclientid, nullfree, 0},
401 401
402 402 /* OP_SETCLIENTID_CONFIRM = 36 */
403 403 {rfs4_op_setclientid_confirm, nullfree, 0},
404 404
405 405 /* OP_VERIFY = 37 */
406 406 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
407 407
408 408 /* OP_WRITE = 38 */
409 409 {rfs4_op_write, nullfree, 0},
410 410
411 411 /* OP_RELEASE_LOCKOWNER = 39 */
412 412 {rfs4_op_release_lockowner, nullfree, 0},
413 413 };
414 414
415 415 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
416 416
417 417 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
418 418
419 419 #ifdef DEBUG
420 420
421 421 int rfs4_fillone_debug = 0;
422 422 int rfs4_no_stub_access = 1;
423 423 int rfs4_rddir_debug = 0;
424 424
425 425 static char *rfs4_op_string[] = {
426 426 "rfs4_op_null",
427 427 "rfs4_op_1 unused",
428 428 "rfs4_op_2 unused",
429 429 "rfs4_op_access",
430 430 "rfs4_op_close",
431 431 "rfs4_op_commit",
432 432 "rfs4_op_create",
433 433 "rfs4_op_delegpurge",
434 434 "rfs4_op_delegreturn",
435 435 "rfs4_op_getattr",
436 436 "rfs4_op_getfh",
437 437 "rfs4_op_link",
438 438 "rfs4_op_lock",
439 439 "rfs4_op_lockt",
440 440 "rfs4_op_locku",
441 441 "rfs4_op_lookup",
442 442 "rfs4_op_lookupp",
443 443 "rfs4_op_nverify",
444 444 "rfs4_op_open",
445 445 "rfs4_op_openattr",
446 446 "rfs4_op_open_confirm",
447 447 "rfs4_op_open_downgrade",
448 448 "rfs4_op_putfh",
449 449 "rfs4_op_putpubfh",
450 450 "rfs4_op_putrootfh",
451 451 "rfs4_op_read",
452 452 "rfs4_op_readdir",
453 453 "rfs4_op_readlink",
454 454 "rfs4_op_remove",
455 455 "rfs4_op_rename",
456 456 "rfs4_op_renew",
457 457 "rfs4_op_restorefh",
458 458 "rfs4_op_savefh",
459 459 "rfs4_op_secinfo",
460 460 "rfs4_op_setattr",
461 461 "rfs4_op_setclientid",
462 462 "rfs4_op_setclient_confirm",
463 463 "rfs4_op_verify",
464 464 "rfs4_op_write",
465 465 "rfs4_op_release_lockowner",
466 466 "rfs4_op_illegal"
467 467 };
468 468 #endif
469 469
470 470 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
471 471
472 472 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
473 473
474 474 extern void rfs4_free_fs_locations4(fs_locations4 *);
475 475
476 476 #ifdef nextdp
477 477 #undef nextdp
478 478 #endif
479 479 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
480 480
481 481 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
482 482 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
483 483 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
484 484 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
485 485 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
486 486 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
487 487 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
488 488 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
489 489 NULL, NULL
490 490 };
491 491 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
492 492 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
493 493 VOPNAME_READ, { .femop_read = deleg_wr_read },
494 494 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
495 495 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
496 496 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
497 497 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
498 498 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
499 499 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
500 500 NULL, NULL
501 501 };
502 502
503 503 nfs4_srv_t *
504 504 nfs4_get_srv(void)
505 505 {
506 506 nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone);
507 507 nfs4_srv_t *srv = ng->nfs4_srv;
508 508 ASSERT(srv != NULL);
509 509 return (srv);
510 510 }
511 511
512 512 void
513 513 rfs4_srv_zone_init(nfs_globals_t *ng)
514 514 {
515 515 nfs4_srv_t *nsrv4;
516 516 timespec32_t verf;
517 517
518 518 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
519 519
520 520 /*
521 521 * The following algorithm attempts to find a unique verifier
522 522 * to be used as the write verifier returned from the server
523 523 * to the client. It is important that this verifier change
524 524 * whenever the server reboots. Of secondary importance, it
525 525 * is important for the verifier to be unique between two
526 526 * different servers.
527 527 *
528 528 * Thus, an attempt is made to use the system hostid and the
529 529 * current time in seconds when the nfssrv kernel module is
530 530 * loaded. It is assumed that an NFS server will not be able
531 531 * to boot and then to reboot in less than a second. If the
532 532 * hostid has not been set, then the current high resolution
533 533 * time is used. This will ensure different verifiers each
534 534 * time the server reboots and minimize the chances that two
535 535 * different servers will have the same verifier.
536 536 * XXX - this is broken on LP64 kernels.
537 537 */
538 538 verf.tv_sec = (time_t)zone_get_hostid(NULL);
539 539 if (verf.tv_sec != 0) {
540 540 verf.tv_nsec = gethrestime_sec();
541 541 } else {
542 542 timespec_t tverf;
543 543
544 544 gethrestime(&tverf);
545 545 verf.tv_sec = (time_t)tverf.tv_sec;
546 546 verf.tv_nsec = tverf.tv_nsec;
547 547 }
548 548 nsrv4->write4verf = *(uint64_t *)&verf;
549 549
550 550 /* Used to manage create/destroy of server state */
551 551 nsrv4->nfs4_server_state = NULL;
552 552 nsrv4->nfs4_cur_servinst = NULL;
553 553 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
554 554 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
555 555 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
556 556 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
557 557 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
558 558
559 559 ng->nfs4_srv = nsrv4;
560 560 }
561 561
562 562 void
563 563 rfs4_srv_zone_fini(nfs_globals_t *ng)
564 564 {
565 565 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
566 566
567 567 ng->nfs4_srv = NULL;
568 568
569 569 mutex_destroy(&nsrv4->deleg_lock);
570 570 mutex_destroy(&nsrv4->state_lock);
571 571 mutex_destroy(&nsrv4->servinst_lock);
572 572 rw_destroy(&nsrv4->deleg_policy_lock);
573 573
574 574 kmem_free(nsrv4, sizeof (*nsrv4));
575 575 }
576 576
577 577 void
578 578 rfs4_srvrinit(void)
579 579 {
580 580 extern void rfs4_attr_init();
581 581
582 582 rfs4_attr_init();
583 583
584 584 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
585 585 rfs4_disable_delegation();
586 586 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
587 587 &deleg_wrops) != 0) {
588 588 rfs4_disable_delegation();
589 589 fem_free(deleg_rdops);
590 590 }
591 591
592 592 nfs4_srv_caller_id = fs_new_caller_id();
593 593 lockt_sysid = lm_alloc_sysidt();
594 594 vsd_create(&nfs4_srv_vkey, NULL);
595 595 rfs4_state_g_init();
596 596 }
597 597
598 598 void
599 599 rfs4_srvrfini(void)
600 600 {
601 601 if (lockt_sysid != LM_NOSYSID) {
602 602 lm_free_sysidt(lockt_sysid);
603 603 lockt_sysid = LM_NOSYSID;
604 604 }
605 605
606 606 rfs4_state_g_fini();
607 607
608 608 fem_free(deleg_rdops);
609 609 fem_free(deleg_wrops);
610 610 }
611 611
612 612 void
613 613 rfs4_do_server_start(int server_upordown,
614 614 int srv_delegation, int cluster_booted)
615 615 {
616 616 nfs4_srv_t *nsrv4 = nfs4_get_srv();
617 617
618 618 /* Is this a warm start? */
619 619 if (server_upordown == NFS_SERVER_QUIESCED) {
620 620 cmn_err(CE_NOTE, "nfs4_srv: "
621 621 "server was previously quiesced; "
622 622 "existing NFSv4 state will be re-used");
623 623
624 624 /*
625 625 * HA-NFSv4: this is also the signal
626 626 * that a Resource Group failover has
627 627 * occurred.
628 628 */
629 629 if (cluster_booted)
630 630 hanfsv4_failover(nsrv4);
631 631 } else {
632 632 /* Cold start */
633 633 nsrv4->rfs4_start_time = 0;
634 634 rfs4_state_zone_init(nsrv4);
635 635 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
636 636 nfs4_drc_hash);
637 637
638 638 /*
639 639 * The nfsd service was started with the -s option
640 640 * we need to pull in any state from the paths indicated.
641 641 */
642 642 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
643 643 /* read in the stable storage state from these paths */
644 644 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
645 645 rfs4_dss_newpaths);
646 646 }
647 647 }
648 648
649 649 /* Check if delegation is to be enabled */
650 650 if (srv_delegation != FALSE)
651 651 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
652 652 }
653 653
654 654 void
655 655 rfs4_init_compound_state(struct compound_state *cs)
656 656 {
657 657 bzero(cs, sizeof (*cs));
658 658 cs->cont = TRUE;
659 659 cs->access = CS_ACCESS_DENIED;
660 660 cs->deleg = FALSE;
661 661 cs->mandlock = FALSE;
662 662 cs->fh.nfs_fh4_val = cs->fhbuf;
663 663 }
664 664
665 665 void
666 666 rfs4_grace_start(rfs4_servinst_t *sip)
667 667 {
668 668 rw_enter(&sip->rwlock, RW_WRITER);
669 669 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
670 670 sip->grace_period = rfs4_grace_period;
671 671 rw_exit(&sip->rwlock);
672 672 }
673 673
674 674 /*
675 675 * returns true if the instance's grace period has never been started
676 676 */
677 677 int
678 678 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
679 679 {
680 680 time_t start_time;
681 681
682 682 rw_enter(&sip->rwlock, RW_READER);
683 683 start_time = sip->start_time;
684 684 rw_exit(&sip->rwlock);
685 685
686 686 return (start_time == 0);
687 687 }
688 688
689 689 /*
690 690 * Indicates if server instance is within the
691 691 * grace period.
692 692 */
693 693 int
694 694 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
695 695 {
696 696 time_t grace_expiry;
697 697
698 698 rw_enter(&sip->rwlock, RW_READER);
699 699 grace_expiry = sip->start_time + sip->grace_period;
700 700 rw_exit(&sip->rwlock);
701 701
702 702 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
703 703 }
704 704
705 705 int
706 706 rfs4_clnt_in_grace(rfs4_client_t *cp)
707 707 {
708 708 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
709 709
710 710 return (rfs4_servinst_in_grace(cp->rc_server_instance));
711 711 }
712 712
713 713 /*
714 714 * reset all currently active grace periods
715 715 */
716 716 void
717 717 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
718 718 {
719 719 rfs4_servinst_t *sip;
720 720
721 721 mutex_enter(&nsrv4->servinst_lock);
722 722 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
723 723 if (rfs4_servinst_in_grace(sip))
724 724 rfs4_grace_start(sip);
725 725 mutex_exit(&nsrv4->servinst_lock);
726 726 }
727 727
728 728 /*
729 729 * start any new instances' grace periods
730 730 */
731 731 void
732 732 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
733 733 {
734 734 rfs4_servinst_t *sip;
735 735
736 736 mutex_enter(&nsrv4->servinst_lock);
737 737 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
738 738 if (rfs4_servinst_grace_new(sip))
739 739 rfs4_grace_start(sip);
740 740 mutex_exit(&nsrv4->servinst_lock);
741 741 }
742 742
743 743 static rfs4_dss_path_t *
744 744 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
745 745 char *path, unsigned index)
746 746 {
747 747 size_t len;
748 748 rfs4_dss_path_t *dss_path;
749 749
750 750 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
751 751
752 752 /*
753 753 * Take a copy of the string, since the original may be overwritten.
754 754 * Sadly, no strdup() in the kernel.
755 755 */
756 756 /* allow for NUL */
757 757 len = strlen(path) + 1;
758 758 dss_path->path = kmem_alloc(len, KM_SLEEP);
759 759 (void) strlcpy(dss_path->path, path, len);
760 760
761 761 /* associate with servinst */
762 762 dss_path->sip = sip;
763 763 dss_path->index = index;
764 764
765 765 /*
766 766 * Add to list of served paths.
767 767 * No locking required, as we're only ever called at startup.
768 768 */
769 769 if (nsrv4->dss_pathlist == NULL) {
770 770 /* this is the first dss_path_t */
771 771
772 772 /* needed for insque/remque */
773 773 dss_path->next = dss_path->prev = dss_path;
774 774
775 775 nsrv4->dss_pathlist = dss_path;
776 776 } else {
777 777 insque(dss_path, nsrv4->dss_pathlist);
778 778 }
779 779
780 780 return (dss_path);
781 781 }
782 782
783 783 /*
784 784 * Create a new server instance, and make it the currently active instance.
785 785 * Note that starting the grace period too early will reduce the clients'
786 786 * recovery window.
787 787 */
788 788 void
789 789 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
790 790 int dss_npaths, char **dss_paths)
791 791 {
792 792 unsigned i;
793 793 rfs4_servinst_t *sip;
794 794 rfs4_oldstate_t *oldstate;
795 795
796 796 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
797 797 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
798 798
799 799 sip->start_time = (time_t)0;
800 800 sip->grace_period = (time_t)0;
801 801 sip->next = NULL;
802 802 sip->prev = NULL;
803 803
804 804 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
805 805 /*
806 806 * This initial dummy entry is required to setup for insque/remque.
807 807 * It must be skipped over whenever the list is traversed.
808 808 */
809 809 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
810 810 /* insque/remque require initial list entry to be self-terminated */
811 811 oldstate->next = oldstate;
812 812 oldstate->prev = oldstate;
813 813 sip->oldstate = oldstate;
814 814
815 815
816 816 sip->dss_npaths = dss_npaths;
817 817 sip->dss_paths = kmem_alloc(dss_npaths *
818 818 sizeof (rfs4_dss_path_t *), KM_SLEEP);
819 819
820 820 for (i = 0; i < dss_npaths; i++) {
821 821 sip->dss_paths[i] =
822 822 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
823 823 }
824 824
825 825 mutex_enter(&nsrv4->servinst_lock);
826 826 if (nsrv4->nfs4_cur_servinst != NULL) {
827 827 /* add to linked list */
828 828 sip->prev = nsrv4->nfs4_cur_servinst;
829 829 nsrv4->nfs4_cur_servinst->next = sip;
830 830 }
831 831 if (start_grace)
832 832 rfs4_grace_start(sip);
833 833 /* make the new instance "current" */
834 834 nsrv4->nfs4_cur_servinst = sip;
835 835
836 836 mutex_exit(&nsrv4->servinst_lock);
837 837 }
838 838
839 839 /*
840 840 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
841 841 * all instances directly.
842 842 */
843 843 void
844 844 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
845 845 {
846 846 rfs4_servinst_t *sip, *prev, *current;
847 847 #ifdef DEBUG
848 848 int n = 0;
849 849 #endif
850 850
851 851 mutex_enter(&nsrv4->servinst_lock);
852 852 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
853 853 current = nsrv4->nfs4_cur_servinst;
854 854 nsrv4->nfs4_cur_servinst = NULL;
855 855 for (sip = current; sip != NULL; sip = prev) {
856 856 prev = sip->prev;
857 857 rw_destroy(&sip->rwlock);
858 858 if (sip->oldstate)
859 859 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
860 860 if (sip->dss_paths) {
861 861 int i = sip->dss_npaths;
862 862
863 863 while (i > 0) {
864 864 i--;
865 865 if (sip->dss_paths[i] != NULL) {
866 866 char *path = sip->dss_paths[i]->path;
867 867
868 868 if (path != NULL) {
869 869 kmem_free(path,
870 870 strlen(path) + 1);
871 871 }
872 872 kmem_free(sip->dss_paths[i],
873 873 sizeof (rfs4_dss_path_t));
874 874 }
875 875 }
876 876 kmem_free(sip->dss_paths,
877 877 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
878 878 }
879 879 kmem_free(sip, sizeof (rfs4_servinst_t));
880 880 #ifdef DEBUG
881 881 n++;
882 882 #endif
883 883 }
884 884 mutex_exit(&nsrv4->servinst_lock);
885 885 }
886 886
887 887 /*
888 888 * Assign the current server instance to a client_t.
889 889 * Should be called with cp->rc_dbe held.
890 890 */
891 891 void
892 892 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
893 893 rfs4_servinst_t *sip)
894 894 {
895 895 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
896 896
897 897 /*
898 898 * The lock ensures that if the current instance is in the process
899 899 * of changing, we will see the new one.
900 900 */
901 901 mutex_enter(&nsrv4->servinst_lock);
902 902 cp->rc_server_instance = sip;
903 903 mutex_exit(&nsrv4->servinst_lock);
904 904 }
905 905
906 906 rfs4_servinst_t *
907 907 rfs4_servinst(rfs4_client_t *cp)
908 908 {
909 909 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
910 910
911 911 return (cp->rc_server_instance);
912 912 }
913 913
914 914 /* ARGSUSED */
915 915 static void
916 916 nullfree(caddr_t resop)
917 917 {
918 918 }
919 919
920 920 /*
921 921 * This is a fall-through for invalid or not implemented (yet) ops
922 922 */
923 923 /* ARGSUSED */
924 924 static void
925 925 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
926 926 struct compound_state *cs)
927 927 {
928 928 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
929 929 }
930 930
931 931 /*
932 932 * Check if the security flavor, nfsnum, is in the flavor_list.
933 933 */
934 934 bool_t
935 935 in_flavor_list(int nfsnum, int *flavor_list, int count)
936 936 {
937 937 int i;
938 938
939 939 for (i = 0; i < count; i++) {
940 940 if (nfsnum == flavor_list[i])
941 941 return (TRUE);
942 942 }
943 943 return (FALSE);
944 944 }
945 945
946 946 /*
947 947 * Used by rfs4_op_secinfo to get the security information from the
948 948 * export structure associated with the component.
949 949 */
950 950 /* ARGSUSED */
951 951 static nfsstat4
952 952 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
953 953 {
954 954 int error, different_export = 0;
955 955 vnode_t *dvp, *vp;
956 956 struct exportinfo *exi;
957 957 fid_t fid;
958 958 uint_t count, i;
959 959 secinfo4 *resok_val;
960 960 struct secinfo *secp;
961 961 seconfig_t *si;
962 962 bool_t did_traverse = FALSE;
963 963 int dotdot, walk;
964 964 nfs_export_t *ne = nfs_get_export();
965 965
966 966 dvp = cs->vp;
967 967 exi = cs->exi;
968 968 ASSERT(exi != NULL);
969 969 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
970 970
971 971 /*
972 972 * If dotdotting, then need to check whether it's above the
973 973 * root of a filesystem, or above an export point.
974 974 */
975 975 if (dotdot) {
976 976 vnode_t *zone_rootvp = ne->exi_root->exi_vp;
977 977
978 978 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
979 979 /*
980 980 * If dotdotting at the root of a filesystem, then
981 981 * need to traverse back to the mounted-on filesystem
982 982 * and do the dotdot lookup there.
983 983 */
984 984 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
985 985
986 986 /*
987 987 * If at the system root, then can
988 988 * go up no further.
989 989 */
990 990 if (VN_CMP(dvp, zone_rootvp))
991 991 return (puterrno4(ENOENT));
992 992
993 993 /*
994 994 * Traverse back to the mounted-on filesystem
995 995 */
996 996 dvp = untraverse(dvp, zone_rootvp);
997 997
998 998 /*
999 999 * Set the different_export flag so we remember
1000 1000 * to pick up a new exportinfo entry for
1001 1001 * this new filesystem.
1002 1002 */
1003 1003 different_export = 1;
1004 1004 } else {
1005 1005
1006 1006 /*
1007 1007 * If dotdotting above an export point then set
1008 1008 * the different_export to get new export info.
1009 1009 */
1010 1010 different_export = nfs_exported(exi, dvp);
1011 1011 }
1012 1012 }
1013 1013
1014 1014 /*
1015 1015 * Get the vnode for the component "nm".
1016 1016 */
1017 1017 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1018 1018 NULL, NULL, NULL);
1019 1019 if (error)
1020 1020 return (puterrno4(error));
1021 1021
1022 1022 /*
1023 1023 * If the vnode is in a pseudo filesystem, or if the security flavor
1024 1024 * used in the request is valid but not an explicitly shared flavor,
1025 1025 * or the access bit indicates that this is a limited access,
1026 1026 * check whether this vnode is visible.
1027 1027 */
1028 1028 if (!different_export &&
1029 1029 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1030 1030 cs->access & CS_ACCESS_LIMITED)) {
1031 1031 if (! nfs_visible(exi, vp, &different_export)) {
1032 1032 VN_RELE(vp);
1033 1033 return (puterrno4(ENOENT));
1034 1034 }
1035 1035 }
1036 1036
1037 1037 /*
1038 1038 * If it's a mountpoint, then traverse it.
1039 1039 */
1040 1040 if (vn_ismntpt(vp)) {
1041 1041 if ((error = traverse(&vp)) != 0) {
1042 1042 VN_RELE(vp);
1043 1043 return (puterrno4(error));
1044 1044 }
1045 1045 /* remember that we had to traverse mountpoint */
1046 1046 did_traverse = TRUE;
1047 1047 different_export = 1;
1048 1048 } else if (vp->v_vfsp != dvp->v_vfsp) {
1049 1049 /*
1050 1050 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1051 1051 * then vp is probably an LOFS object. We don't need the
1052 1052 * realvp, we just need to know that we might have crossed
1053 1053 * a server fs boundary and need to call checkexport4.
1054 1054 * (LOFS lookup hides server fs mountpoints, and actually calls
1055 1055 * traverse)
1056 1056 */
1057 1057 different_export = 1;
1058 1058 }
1059 1059
1060 1060 /*
1061 1061 * Get the export information for it.
1062 1062 */
1063 1063 if (different_export) {
1064 1064
1065 1065 bzero(&fid, sizeof (fid));
1066 1066 fid.fid_len = MAXFIDSZ;
1067 1067 error = vop_fid_pseudo(vp, &fid);
1068 1068 if (error) {
1069 1069 VN_RELE(vp);
1070 1070 return (puterrno4(error));
1071 1071 }
1072 1072
1073 1073 /* We'll need to reassign "exi". */
1074 1074 if (dotdot)
1075 1075 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1076 1076 else
1077 1077 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1078 1078
1079 1079 if (exi == NULL) {
1080 1080 if (did_traverse == TRUE) {
1081 1081 /*
1082 1082 * If this vnode is a mounted-on vnode,
1083 1083 * but the mounted-on file system is not
1084 1084 * exported, send back the secinfo for
1085 1085 * the exported node that the mounted-on
1086 1086 * vnode lives in.
1087 1087 */
1088 1088 exi = cs->exi;
1089 1089 } else {
1090 1090 VN_RELE(vp);
1091 1091 return (puterrno4(EACCES));
1092 1092 }
1093 1093 }
1094 1094 }
1095 1095 ASSERT(exi != NULL);
1096 1096
1097 1097
1098 1098 /*
1099 1099 * Create the secinfo result based on the security information
1100 1100 * from the exportinfo structure (exi).
1101 1101 *
1102 1102 * Return all flavors for a pseudo node.
1103 1103 * For a real export node, return the flavor that the client
1104 1104 * has access with.
1105 1105 */
1106 1106 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1107 1107 if (PSEUDO(exi)) {
1108 1108 count = exi->exi_export.ex_seccnt; /* total sec count */
1109 1109 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1110 1110 secp = exi->exi_export.ex_secinfo;
1111 1111
1112 1112 for (i = 0; i < count; i++) {
1113 1113 si = &secp[i].s_secinfo;
1114 1114 resok_val[i].flavor = si->sc_rpcnum;
1115 1115 if (resok_val[i].flavor == RPCSEC_GSS) {
1116 1116 rpcsec_gss_info *info;
1117 1117
1118 1118 info = &resok_val[i].flavor_info;
1119 1119 info->qop = si->sc_qop;
1120 1120 info->service = (rpc_gss_svc_t)si->sc_service;
1121 1121
1122 1122 /* get oid opaque data */
1123 1123 info->oid.sec_oid4_len =
1124 1124 si->sc_gss_mech_type->length;
1125 1125 info->oid.sec_oid4_val = kmem_alloc(
1126 1126 si->sc_gss_mech_type->length, KM_SLEEP);
1127 1127 bcopy(
1128 1128 si->sc_gss_mech_type->elements,
1129 1129 info->oid.sec_oid4_val,
1130 1130 info->oid.sec_oid4_len);
1131 1131 }
1132 1132 }
1133 1133 resp->SECINFO4resok_len = count;
1134 1134 resp->SECINFO4resok_val = resok_val;
1135 1135 } else {
1136 1136 int ret_cnt = 0, k = 0;
1137 1137 int *flavor_list;
1138 1138
1139 1139 count = exi->exi_export.ex_seccnt; /* total sec count */
1140 1140 secp = exi->exi_export.ex_secinfo;
1141 1141
1142 1142 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1143 1143 /* find out which flavors to return */
1144 1144 for (i = 0; i < count; i ++) {
1145 1145 int access, flavor, perm;
1146 1146
1147 1147 flavor = secp[i].s_secinfo.sc_nfsnum;
1148 1148 perm = secp[i].s_flags;
1149 1149
1150 1150 access = nfsauth4_secinfo_access(exi, cs->req,
1151 1151 flavor, perm, cs->basecr);
1152 1152
1153 1153 if (! (access & NFSAUTH_DENIED) &&
1154 1154 ! (access & NFSAUTH_WRONGSEC)) {
1155 1155 flavor_list[ret_cnt] = flavor;
1156 1156 ret_cnt++;
1157 1157 }
1158 1158 }
1159 1159
1160 1160 /* Create the returning SECINFO value */
1161 1161 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1162 1162
1163 1163 for (i = 0; i < count; i++) {
1164 1164 /*
1165 1165 * If the flavor is in the flavor list,
1166 1166 * fill in resok_val.
1167 1167 */
1168 1168 si = &secp[i].s_secinfo;
1169 1169 if (in_flavor_list(si->sc_nfsnum,
1170 1170 flavor_list, ret_cnt)) {
1171 1171 resok_val[k].flavor = si->sc_rpcnum;
1172 1172 if (resok_val[k].flavor == RPCSEC_GSS) {
1173 1173 rpcsec_gss_info *info;
1174 1174
1175 1175 info = &resok_val[k].flavor_info;
1176 1176 info->qop = si->sc_qop;
1177 1177 info->service = (rpc_gss_svc_t)
1178 1178 si->sc_service;
1179 1179
1180 1180 /* get oid opaque data */
1181 1181 info->oid.sec_oid4_len =
1182 1182 si->sc_gss_mech_type->length;
1183 1183 info->oid.sec_oid4_val = kmem_alloc(
1184 1184 si->sc_gss_mech_type->length,
1185 1185 KM_SLEEP);
1186 1186 bcopy(si->sc_gss_mech_type->elements,
1187 1187 info->oid.sec_oid4_val,
1188 1188 info->oid.sec_oid4_len);
1189 1189 }
1190 1190 k++;
1191 1191 }
1192 1192 if (k >= ret_cnt)
1193 1193 break;
1194 1194 }
1195 1195 resp->SECINFO4resok_len = ret_cnt;
1196 1196 resp->SECINFO4resok_val = resok_val;
1197 1197 kmem_free(flavor_list, count * sizeof (int));
1198 1198 }
1199 1199
1200 1200 VN_RELE(vp);
1201 1201 return (NFS4_OK);
1202 1202 }
1203 1203
1204 1204 /*
1205 1205 * SECINFO (Operation 33): Obtain required security information on
1206 1206 * the component name in the format of (security-mechanism-oid, qop, service)
1207 1207 * triplets.
1208 1208 */
1209 1209 /* ARGSUSED */
1210 1210 static void
1211 1211 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1212 1212 struct compound_state *cs)
1213 1213 {
1214 1214 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1215 1215 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1216 1216 utf8string *utfnm = &args->name;
1217 1217 uint_t len;
1218 1218 char *nm;
1219 1219 struct sockaddr *ca;
1220 1220 char *name = NULL;
1221 1221 nfsstat4 status = NFS4_OK;
1222 1222
1223 1223 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1224 1224 SECINFO4args *, args);
1225 1225
1226 1226 /*
1227 1227 * Current file handle (cfh) should have been set before getting
1228 1228 * into this function. If not, return error.
1229 1229 */
1230 1230 if (cs->vp == NULL) {
1231 1231 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1232 1232 goto out;
1233 1233 }
1234 1234
1235 1235 if (cs->vp->v_type != VDIR) {
1236 1236 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1237 1237 goto out;
1238 1238 }
1239 1239
1240 1240 /*
1241 1241 * Verify the component name. If failed, error out, but
1242 1242 * do not error out if the component name is a "..".
1243 1243 * SECINFO will return its parents secinfo data for SECINFO "..".
1244 1244 */
1245 1245 status = utf8_dir_verify(utfnm);
1246 1246 if (status != NFS4_OK) {
1247 1247 if (utfnm->utf8string_len != 2 ||
1248 1248 utfnm->utf8string_val[0] != '.' ||
1249 1249 utfnm->utf8string_val[1] != '.') {
1250 1250 *cs->statusp = resp->status = status;
1251 1251 goto out;
1252 1252 }
1253 1253 }
1254 1254
1255 1255 nm = utf8_to_str(utfnm, &len, NULL);
1256 1256 if (nm == NULL) {
1257 1257 *cs->statusp = resp->status = NFS4ERR_INVAL;
1258 1258 goto out;
1259 1259 }
1260 1260
1261 1261 if (len > MAXNAMELEN) {
1262 1262 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1263 1263 kmem_free(nm, len);
1264 1264 goto out;
1265 1265 }
1266 1266
1267 1267 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1268 1268 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1269 1269 MAXPATHLEN + 1);
1270 1270
1271 1271 if (name == NULL) {
1272 1272 *cs->statusp = resp->status = NFS4ERR_INVAL;
1273 1273 kmem_free(nm, len);
1274 1274 goto out;
1275 1275 }
1276 1276
1277 1277
1278 1278 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1279 1279
1280 1280 if (name != nm)
1281 1281 kmem_free(name, MAXPATHLEN + 1);
1282 1282 kmem_free(nm, len);
1283 1283
1284 1284 out:
1285 1285 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1286 1286 SECINFO4res *, resp);
1287 1287 }
1288 1288
1289 1289 /*
1290 1290 * Free SECINFO result.
1291 1291 */
1292 1292 /* ARGSUSED */
1293 1293 static void
1294 1294 rfs4_op_secinfo_free(nfs_resop4 *resop)
1295 1295 {
1296 1296 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1297 1297 int count, i;
1298 1298 secinfo4 *resok_val;
1299 1299
1300 1300 /* If this is not an Ok result, nothing to free. */
1301 1301 if (resp->status != NFS4_OK) {
1302 1302 return;
1303 1303 }
1304 1304
1305 1305 count = resp->SECINFO4resok_len;
1306 1306 resok_val = resp->SECINFO4resok_val;
1307 1307
1308 1308 for (i = 0; i < count; i++) {
1309 1309 if (resok_val[i].flavor == RPCSEC_GSS) {
1310 1310 rpcsec_gss_info *info;
1311 1311
1312 1312 info = &resok_val[i].flavor_info;
1313 1313 kmem_free(info->oid.sec_oid4_val,
1314 1314 info->oid.sec_oid4_len);
1315 1315 }
1316 1316 }
1317 1317 kmem_free(resok_val, count * sizeof (secinfo4));
1318 1318 resp->SECINFO4resok_len = 0;
1319 1319 resp->SECINFO4resok_val = NULL;
1320 1320 }
1321 1321
1322 1322 /* ARGSUSED */
1323 1323 static void
1324 1324 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1325 1325 struct compound_state *cs)
1326 1326 {
1327 1327 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1328 1328 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1329 1329 int error;
1330 1330 vnode_t *vp;
1331 1331 struct vattr va;
1332 1332 int checkwriteperm;
1333 1333 cred_t *cr = cs->cr;
1334 1334 bslabel_t *clabel, *slabel;
1335 1335 ts_label_t *tslabel;
1336 1336 boolean_t admin_low_client;
1337 1337
1338 1338 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1339 1339 ACCESS4args *, args);
1340 1340
1341 1341 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1342 1342 if (cs->access == CS_ACCESS_DENIED) {
1343 1343 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1344 1344 goto out;
1345 1345 }
1346 1346 #endif
1347 1347 if (cs->vp == NULL) {
1348 1348 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1349 1349 goto out;
1350 1350 }
1351 1351
1352 1352 ASSERT(cr != NULL);
1353 1353
1354 1354 vp = cs->vp;
1355 1355
1356 1356 /*
1357 1357 * If the file system is exported read only, it is not appropriate
1358 1358 * to check write permissions for regular files and directories.
1359 1359 * Special files are interpreted by the client, so the underlying
1360 1360 * permissions are sent back to the client for interpretation.
1361 1361 */
1362 1362 if (rdonly4(req, cs) &&
1363 1363 (vp->v_type == VREG || vp->v_type == VDIR))
1364 1364 checkwriteperm = 0;
1365 1365 else
1366 1366 checkwriteperm = 1;
1367 1367
1368 1368 /*
1369 1369 * XXX
1370 1370 * We need the mode so that we can correctly determine access
1371 1371 * permissions relative to a mandatory lock file. Access to
1372 1372 * mandatory lock files is denied on the server, so it might
1373 1373 * as well be reflected to the server during the open.
1374 1374 */
1375 1375 va.va_mask = AT_MODE;
1376 1376 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1377 1377 if (error) {
1378 1378 *cs->statusp = resp->status = puterrno4(error);
1379 1379 goto out;
1380 1380 }
1381 1381 resp->access = 0;
1382 1382 resp->supported = 0;
1383 1383
1384 1384 if (is_system_labeled()) {
1385 1385 ASSERT(req->rq_label != NULL);
1386 1386 clabel = req->rq_label;
1387 1387 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1388 1388 "got client label from request(1)",
1389 1389 struct svc_req *, req);
1390 1390 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1391 1391 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1392 1392 *cs->statusp = resp->status = puterrno4(EACCES);
1393 1393 goto out;
1394 1394 }
1395 1395 slabel = label2bslabel(tslabel);
1396 1396 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1397 1397 char *, "got server label(1) for vp(2)",
1398 1398 bslabel_t *, slabel, vnode_t *, vp);
1399 1399
1400 1400 admin_low_client = B_FALSE;
1401 1401 } else
1402 1402 admin_low_client = B_TRUE;
1403 1403 }
1404 1404
1405 1405 if (args->access & ACCESS4_READ) {
1406 1406 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1407 1407 if (!error && !MANDLOCK(vp, va.va_mode) &&
1408 1408 (!is_system_labeled() || admin_low_client ||
1409 1409 bldominates(clabel, slabel)))
1410 1410 resp->access |= ACCESS4_READ;
1411 1411 resp->supported |= ACCESS4_READ;
1412 1412 }
1413 1413 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1414 1414 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1415 1415 if (!error && (!is_system_labeled() || admin_low_client ||
1416 1416 bldominates(clabel, slabel)))
1417 1417 resp->access |= ACCESS4_LOOKUP;
1418 1418 resp->supported |= ACCESS4_LOOKUP;
1419 1419 }
1420 1420 if (checkwriteperm &&
1421 1421 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1422 1422 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1423 1423 if (!error && !MANDLOCK(vp, va.va_mode) &&
1424 1424 (!is_system_labeled() || admin_low_client ||
1425 1425 blequal(clabel, slabel)))
1426 1426 resp->access |=
1427 1427 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1428 1428 resp->supported |=
1429 1429 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1430 1430 }
1431 1431
1432 1432 if (checkwriteperm &&
1433 1433 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1434 1434 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1435 1435 if (!error && (!is_system_labeled() || admin_low_client ||
1436 1436 blequal(clabel, slabel)))
1437 1437 resp->access |= ACCESS4_DELETE;
1438 1438 resp->supported |= ACCESS4_DELETE;
1439 1439 }
1440 1440 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1441 1441 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1442 1442 if (!error && !MANDLOCK(vp, va.va_mode) &&
1443 1443 (!is_system_labeled() || admin_low_client ||
1444 1444 bldominates(clabel, slabel)))
1445 1445 resp->access |= ACCESS4_EXECUTE;
1446 1446 resp->supported |= ACCESS4_EXECUTE;
1447 1447 }
1448 1448
1449 1449 if (is_system_labeled() && !admin_low_client)
1450 1450 label_rele(tslabel);
1451 1451
1452 1452 *cs->statusp = resp->status = NFS4_OK;
1453 1453 out:
1454 1454 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1455 1455 ACCESS4res *, resp);
1456 1456 }
1457 1457
1458 1458 /* ARGSUSED */
1459 1459 static void
1460 1460 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1461 1461 struct compound_state *cs)
1462 1462 {
1463 1463 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1464 1464 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1465 1465 int error;
1466 1466 vnode_t *vp = cs->vp;
1467 1467 cred_t *cr = cs->cr;
1468 1468 vattr_t va;
1469 1469 nfs4_srv_t *nsrv4;
1470 1470
1471 1471 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1472 1472 COMMIT4args *, args);
1473 1473
1474 1474 if (vp == NULL) {
1475 1475 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1476 1476 goto out;
1477 1477 }
1478 1478 if (cs->access == CS_ACCESS_DENIED) {
1479 1479 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1480 1480 goto out;
1481 1481 }
1482 1482
1483 1483 if (args->offset + args->count < args->offset) {
1484 1484 *cs->statusp = resp->status = NFS4ERR_INVAL;
1485 1485 goto out;
1486 1486 }
1487 1487
1488 1488 va.va_mask = AT_UID;
1489 1489 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1490 1490
1491 1491 /*
1492 1492 * If we can't get the attributes, then we can't do the
1493 1493 * right access checking. So, we'll fail the request.
1494 1494 */
1495 1495 if (error) {
1496 1496 *cs->statusp = resp->status = puterrno4(error);
1497 1497 goto out;
1498 1498 }
1499 1499 if (rdonly4(req, cs)) {
1500 1500 *cs->statusp = resp->status = NFS4ERR_ROFS;
1501 1501 goto out;
1502 1502 }
1503 1503
1504 1504 if (vp->v_type != VREG) {
1505 1505 if (vp->v_type == VDIR)
1506 1506 resp->status = NFS4ERR_ISDIR;
1507 1507 else
1508 1508 resp->status = NFS4ERR_INVAL;
1509 1509 *cs->statusp = resp->status;
1510 1510 goto out;
1511 1511 }
1512 1512
1513 1513 if (crgetuid(cr) != va.va_uid &&
1514 1514 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1515 1515 *cs->statusp = resp->status = puterrno4(error);
1516 1516 goto out;
1517 1517 }
1518 1518
1519 1519 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1520 1520
1521 1521 if (error) {
1522 1522 *cs->statusp = resp->status = puterrno4(error);
1523 1523 goto out;
1524 1524 }
1525 1525
1526 1526 nsrv4 = nfs4_get_srv();
1527 1527 *cs->statusp = resp->status = NFS4_OK;
1528 1528 resp->writeverf = nsrv4->write4verf;
1529 1529 out:
1530 1530 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1531 1531 COMMIT4res *, resp);
1532 1532 }
1533 1533
1534 1534 /*
1535 1535 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1536 1536 * was completed. It does the nfsv4 create for special files.
1537 1537 */
1538 1538 /* ARGSUSED */
1539 1539 static vnode_t *
1540 1540 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1541 1541 struct compound_state *cs, vattr_t *vap, char *nm)
1542 1542 {
1543 1543 int error;
1544 1544 cred_t *cr = cs->cr;
1545 1545 vnode_t *dvp = cs->vp;
1546 1546 vnode_t *vp = NULL;
1547 1547 int mode;
1548 1548 enum vcexcl excl;
1549 1549
1550 1550 switch (args->type) {
1551 1551 case NF4CHR:
1552 1552 case NF4BLK:
1553 1553 if (secpolicy_sys_devices(cr) != 0) {
1554 1554 *cs->statusp = resp->status = NFS4ERR_PERM;
1555 1555 return (NULL);
1556 1556 }
1557 1557 if (args->type == NF4CHR)
1558 1558 vap->va_type = VCHR;
1559 1559 else
1560 1560 vap->va_type = VBLK;
1561 1561 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1562 1562 args->ftype4_u.devdata.specdata2);
1563 1563 vap->va_mask |= AT_RDEV;
1564 1564 break;
1565 1565 case NF4SOCK:
1566 1566 vap->va_type = VSOCK;
1567 1567 break;
1568 1568 case NF4FIFO:
1569 1569 vap->va_type = VFIFO;
1570 1570 break;
1571 1571 default:
1572 1572 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1573 1573 return (NULL);
1574 1574 }
1575 1575
1576 1576 /*
1577 1577 * Must specify the mode.
1578 1578 */
1579 1579 if (!(vap->va_mask & AT_MODE)) {
1580 1580 *cs->statusp = resp->status = NFS4ERR_INVAL;
1581 1581 return (NULL);
1582 1582 }
1583 1583
1584 1584 excl = EXCL;
1585 1585
1586 1586 mode = 0;
1587 1587
1588 1588 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1589 1589 if (error) {
1590 1590 *cs->statusp = resp->status = puterrno4(error);
1591 1591 return (NULL);
1592 1592 }
1593 1593 return (vp);
1594 1594 }
1595 1595
1596 1596 /*
1597 1597 * nfsv4 create is used to create non-regular files. For regular files,
1598 1598 * use nfsv4 open.
1599 1599 */
1600 1600 /* ARGSUSED */
1601 1601 static void
1602 1602 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1603 1603 struct compound_state *cs)
1604 1604 {
1605 1605 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1606 1606 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1607 1607 int error;
1608 1608 struct vattr bva, iva, iva2, ava, *vap;
1609 1609 cred_t *cr = cs->cr;
1610 1610 vnode_t *dvp = cs->vp;
1611 1611 vnode_t *vp = NULL;
1612 1612 vnode_t *realvp;
1613 1613 char *nm, *lnm;
1614 1614 uint_t len, llen;
1615 1615 int syncval = 0;
1616 1616 struct nfs4_svgetit_arg sarg;
1617 1617 struct nfs4_ntov_table ntov;
1618 1618 struct statvfs64 sb;
1619 1619 nfsstat4 status;
1620 1620 struct sockaddr *ca;
1621 1621 char *name = NULL;
1622 1622 char *lname = NULL;
1623 1623
1624 1624 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1625 1625 CREATE4args *, args);
1626 1626
1627 1627 resp->attrset = 0;
1628 1628
1629 1629 if (dvp == NULL) {
1630 1630 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1631 1631 goto out;
1632 1632 }
1633 1633
1634 1634 /*
1635 1635 * If there is an unshared filesystem mounted on this vnode,
1636 1636 * do not allow to create an object in this directory.
1637 1637 */
1638 1638 if (vn_ismntpt(dvp)) {
1639 1639 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1640 1640 goto out;
1641 1641 }
1642 1642
1643 1643 /* Verify that type is correct */
1644 1644 switch (args->type) {
1645 1645 case NF4LNK:
1646 1646 case NF4BLK:
1647 1647 case NF4CHR:
1648 1648 case NF4SOCK:
1649 1649 case NF4FIFO:
1650 1650 case NF4DIR:
1651 1651 break;
1652 1652 default:
1653 1653 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1654 1654 goto out;
1655 1655 };
1656 1656
1657 1657 if (cs->access == CS_ACCESS_DENIED) {
1658 1658 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1659 1659 goto out;
1660 1660 }
1661 1661 if (dvp->v_type != VDIR) {
1662 1662 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1663 1663 goto out;
1664 1664 }
1665 1665 status = utf8_dir_verify(&args->objname);
1666 1666 if (status != NFS4_OK) {
1667 1667 *cs->statusp = resp->status = status;
1668 1668 goto out;
1669 1669 }
1670 1670
1671 1671 if (rdonly4(req, cs)) {
1672 1672 *cs->statusp = resp->status = NFS4ERR_ROFS;
1673 1673 goto out;
1674 1674 }
1675 1675
1676 1676 /*
1677 1677 * Name of newly created object
1678 1678 */
1679 1679 nm = utf8_to_fn(&args->objname, &len, NULL);
1680 1680 if (nm == NULL) {
1681 1681 *cs->statusp = resp->status = NFS4ERR_INVAL;
1682 1682 goto out;
1683 1683 }
1684 1684
1685 1685 if (len > MAXNAMELEN) {
1686 1686 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1687 1687 kmem_free(nm, len);
1688 1688 goto out;
1689 1689 }
1690 1690
1691 1691 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1692 1692 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1693 1693 MAXPATHLEN + 1);
1694 1694
1695 1695 if (name == NULL) {
1696 1696 *cs->statusp = resp->status = NFS4ERR_INVAL;
1697 1697 kmem_free(nm, len);
1698 1698 goto out;
1699 1699 }
1700 1700
1701 1701 resp->attrset = 0;
1702 1702
1703 1703 sarg.sbp = &sb;
1704 1704 sarg.is_referral = B_FALSE;
1705 1705 nfs4_ntov_table_init(&ntov);
1706 1706
1707 1707 status = do_rfs4_set_attrs(&resp->attrset,
1708 1708 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1709 1709
1710 1710 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1711 1711 status = NFS4ERR_INVAL;
1712 1712
1713 1713 if (status != NFS4_OK) {
1714 1714 *cs->statusp = resp->status = status;
1715 1715 if (name != nm)
1716 1716 kmem_free(name, MAXPATHLEN + 1);
1717 1717 kmem_free(nm, len);
1718 1718 nfs4_ntov_table_free(&ntov, &sarg);
1719 1719 resp->attrset = 0;
1720 1720 goto out;
1721 1721 }
1722 1722
1723 1723 /* Get "before" change value */
1724 1724 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1725 1725 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1726 1726 if (error) {
1727 1727 *cs->statusp = resp->status = puterrno4(error);
1728 1728 if (name != nm)
1729 1729 kmem_free(name, MAXPATHLEN + 1);
1730 1730 kmem_free(nm, len);
1731 1731 nfs4_ntov_table_free(&ntov, &sarg);
1732 1732 resp->attrset = 0;
1733 1733 goto out;
1734 1734 }
1735 1735 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1736 1736
1737 1737 vap = sarg.vap;
1738 1738
1739 1739 /*
1740 1740 * Set the default initial values for attributes when the parent
1741 1741 * directory does not have the VSUID/VSGID bit set and they have
1742 1742 * not been specified in createattrs.
1743 1743 */
1744 1744 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1745 1745 vap->va_uid = crgetuid(cr);
1746 1746 vap->va_mask |= AT_UID;
1747 1747 }
1748 1748 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1749 1749 vap->va_gid = crgetgid(cr);
1750 1750 vap->va_mask |= AT_GID;
1751 1751 }
1752 1752
1753 1753 vap->va_mask |= AT_TYPE;
1754 1754 switch (args->type) {
1755 1755 case NF4DIR:
1756 1756 vap->va_type = VDIR;
1757 1757 if ((vap->va_mask & AT_MODE) == 0) {
1758 1758 vap->va_mode = 0700; /* default: owner rwx only */
1759 1759 vap->va_mask |= AT_MODE;
1760 1760 }
1761 1761 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1762 1762 if (error)
1763 1763 break;
1764 1764
1765 1765 /*
1766 1766 * Get the initial "after" sequence number, if it fails,
1767 1767 * set to zero
1768 1768 */
1769 1769 iva.va_mask = AT_SEQ;
1770 1770 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1771 1771 iva.va_seq = 0;
1772 1772 break;
1773 1773 case NF4LNK:
1774 1774 vap->va_type = VLNK;
1775 1775 if ((vap->va_mask & AT_MODE) == 0) {
1776 1776 vap->va_mode = 0700; /* default: owner rwx only */
1777 1777 vap->va_mask |= AT_MODE;
1778 1778 }
1779 1779
1780 1780 /*
1781 1781 * symlink names must be treated as data
1782 1782 */
1783 1783 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1784 1784 &llen, NULL);
1785 1785
1786 1786 if (lnm == NULL) {
1787 1787 *cs->statusp = resp->status = NFS4ERR_INVAL;
1788 1788 if (name != nm)
1789 1789 kmem_free(name, MAXPATHLEN + 1);
1790 1790 kmem_free(nm, len);
1791 1791 nfs4_ntov_table_free(&ntov, &sarg);
1792 1792 resp->attrset = 0;
1793 1793 goto out;
1794 1794 }
1795 1795
1796 1796 if (llen > MAXPATHLEN) {
1797 1797 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1798 1798 if (name != nm)
1799 1799 kmem_free(name, MAXPATHLEN + 1);
1800 1800 kmem_free(nm, len);
1801 1801 kmem_free(lnm, llen);
1802 1802 nfs4_ntov_table_free(&ntov, &sarg);
1803 1803 resp->attrset = 0;
1804 1804 goto out;
1805 1805 }
1806 1806
1807 1807 lname = nfscmd_convname(ca, cs->exi, lnm,
1808 1808 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1809 1809
1810 1810 if (lname == NULL) {
1811 1811 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1812 1812 if (name != nm)
1813 1813 kmem_free(name, MAXPATHLEN + 1);
1814 1814 kmem_free(nm, len);
1815 1815 kmem_free(lnm, llen);
1816 1816 nfs4_ntov_table_free(&ntov, &sarg);
1817 1817 resp->attrset = 0;
1818 1818 goto out;
1819 1819 }
1820 1820
1821 1821 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1822 1822 if (lname != lnm)
1823 1823 kmem_free(lname, MAXPATHLEN + 1);
1824 1824 kmem_free(lnm, llen);
1825 1825 if (error)
1826 1826 break;
1827 1827
1828 1828 /*
1829 1829 * Get the initial "after" sequence number, if it fails,
1830 1830 * set to zero
1831 1831 */
1832 1832 iva.va_mask = AT_SEQ;
1833 1833 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1834 1834 iva.va_seq = 0;
1835 1835
1836 1836 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1837 1837 NULL, NULL, NULL);
1838 1838 if (error)
1839 1839 break;
1840 1840
1841 1841 /*
1842 1842 * va_seq is not safe over VOP calls, check it again
1843 1843 * if it has changed zero out iva to force atomic = FALSE.
1844 1844 */
1845 1845 iva2.va_mask = AT_SEQ;
1846 1846 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1847 1847 iva2.va_seq != iva.va_seq)
1848 1848 iva.va_seq = 0;
1849 1849 break;
1850 1850 default:
1851 1851 /*
1852 1852 * probably a special file.
1853 1853 */
1854 1854 if ((vap->va_mask & AT_MODE) == 0) {
1855 1855 vap->va_mode = 0600; /* default: owner rw only */
1856 1856 vap->va_mask |= AT_MODE;
1857 1857 }
1858 1858 syncval = FNODSYNC;
1859 1859 /*
1860 1860 * We know this will only generate one VOP call
1861 1861 */
1862 1862 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1863 1863
1864 1864 if (vp == NULL) {
1865 1865 if (name != nm)
1866 1866 kmem_free(name, MAXPATHLEN + 1);
1867 1867 kmem_free(nm, len);
1868 1868 nfs4_ntov_table_free(&ntov, &sarg);
1869 1869 resp->attrset = 0;
1870 1870 goto out;
1871 1871 }
1872 1872
1873 1873 /*
1874 1874 * Get the initial "after" sequence number, if it fails,
1875 1875 * set to zero
1876 1876 */
1877 1877 iva.va_mask = AT_SEQ;
1878 1878 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1879 1879 iva.va_seq = 0;
1880 1880
1881 1881 break;
1882 1882 }
1883 1883 if (name != nm)
1884 1884 kmem_free(name, MAXPATHLEN + 1);
1885 1885 kmem_free(nm, len);
1886 1886
1887 1887 if (error) {
1888 1888 *cs->statusp = resp->status = puterrno4(error);
1889 1889 }
1890 1890
1891 1891 /*
1892 1892 * Force modified data and metadata out to stable storage.
1893 1893 */
1894 1894 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1895 1895
1896 1896 if (resp->status != NFS4_OK) {
1897 1897 if (vp != NULL)
1898 1898 VN_RELE(vp);
1899 1899 nfs4_ntov_table_free(&ntov, &sarg);
1900 1900 resp->attrset = 0;
1901 1901 goto out;
1902 1902 }
1903 1903
1904 1904 /*
1905 1905 * Finish setup of cinfo response, "before" value already set.
1906 1906 * Get "after" change value, if it fails, simply return the
1907 1907 * before value.
1908 1908 */
1909 1909 ava.va_mask = AT_CTIME|AT_SEQ;
1910 1910 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1911 1911 ava.va_ctime = bva.va_ctime;
1912 1912 ava.va_seq = 0;
1913 1913 }
1914 1914 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1915 1915
1916 1916 /*
1917 1917 * True verification that object was created with correct
1918 1918 * attrs is impossible. The attrs could have been changed
1919 1919 * immediately after object creation. If attributes did
1920 1920 * not verify, the only recourse for the server is to
1921 1921 * destroy the object. Maybe if some attrs (like gid)
1922 1922 * are set incorrectly, the object should be destroyed;
1923 1923 * however, seems bad as a default policy. Do we really
1924 1924 * want to destroy an object over one of the times not
1925 1925 * verifying correctly? For these reasons, the server
1926 1926 * currently sets bits in attrset for createattrs
1927 1927 * that were set; however, no verification is done.
1928 1928 *
1929 1929 * vmask_to_nmask accounts for vattr bits set on create
1930 1930 * [do_rfs4_set_attrs() only sets resp bits for
1931 1931 * non-vattr/vfs bits.]
1932 1932 * Mask off any bits set by default so as not to return
1933 1933 * more attrset bits than were requested in createattrs
1934 1934 */
1935 1935 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1936 1936 resp->attrset &= args->createattrs.attrmask;
1937 1937 nfs4_ntov_table_free(&ntov, &sarg);
1938 1938
1939 1939 error = makefh4(&cs->fh, vp, cs->exi);
1940 1940 if (error) {
1941 1941 *cs->statusp = resp->status = puterrno4(error);
1942 1942 }
1943 1943
1944 1944 /*
1945 1945 * The cinfo.atomic = TRUE only if we got no errors, we have
1946 1946 * non-zero va_seq's, and it has incremented by exactly one
1947 1947 * during the creation and it didn't change during the VOP_LOOKUP
1948 1948 * or VOP_FSYNC.
1949 1949 */
1950 1950 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1951 1951 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1952 1952 resp->cinfo.atomic = TRUE;
1953 1953 else
1954 1954 resp->cinfo.atomic = FALSE;
1955 1955
1956 1956 /*
1957 1957 * Force modified metadata out to stable storage.
1958 1958 *
1959 1959 * if a underlying vp exists, pass it to VOP_FSYNC
1960 1960 */
1961 1961 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1962 1962 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1963 1963 else
1964 1964 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1965 1965
1966 1966 if (resp->status != NFS4_OK) {
1967 1967 VN_RELE(vp);
1968 1968 goto out;
1969 1969 }
1970 1970 if (cs->vp)
1971 1971 VN_RELE(cs->vp);
1972 1972
1973 1973 cs->vp = vp;
1974 1974 *cs->statusp = resp->status = NFS4_OK;
1975 1975 out:
1976 1976 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1977 1977 CREATE4res *, resp);
1978 1978 }
1979 1979
1980 1980 /*ARGSUSED*/
1981 1981 static void
1982 1982 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1983 1983 struct compound_state *cs)
1984 1984 {
1985 1985 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1986 1986 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1987 1987
1988 1988 rfs4_op_inval(argop, resop, req, cs);
1989 1989
1990 1990 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1991 1991 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1992 1992 }
1993 1993
1994 1994 /*ARGSUSED*/
1995 1995 static void
1996 1996 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1997 1997 struct compound_state *cs)
1998 1998 {
1999 1999 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2000 2000 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2001 2001 rfs4_deleg_state_t *dsp;
2002 2002 nfsstat4 status;
2003 2003
2004 2004 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2005 2005 DELEGRETURN4args *, args);
2006 2006
2007 2007 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2008 2008 resp->status = *cs->statusp = status;
2009 2009 if (status != NFS4_OK)
2010 2010 goto out;
2011 2011
2012 2012 /* Ensure specified filehandle matches */
2013 2013 if (cs->vp != dsp->rds_finfo->rf_vp) {
2014 2014 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2015 2015 } else
2016 2016 rfs4_return_deleg(dsp, FALSE);
2017 2017
2018 2018 rfs4_update_lease(dsp->rds_client);
2019 2019
2020 2020 rfs4_deleg_state_rele(dsp);
2021 2021 out:
2022 2022 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2023 2023 DELEGRETURN4res *, resp);
2024 2024 }
2025 2025
2026 2026 /*
2027 2027 * Check to see if a given "flavor" is an explicitly shared flavor.
2028 2028 * The assumption of this routine is the "flavor" is already a valid
2029 2029 * flavor in the secinfo list of "exi".
2030 2030 *
2031 2031 * e.g.
2032 2032 * # share -o sec=flavor1 /export
2033 2033 * # share -o sec=flavor2 /export/home
2034 2034 *
2035 2035 * flavor2 is not an explicitly shared flavor for /export,
2036 2036 * however it is in the secinfo list for /export thru the
2037 2037 * server namespace setup.
2038 2038 */
2039 2039 int
2040 2040 is_exported_sec(int flavor, struct exportinfo *exi)
2041 2041 {
2042 2042 int i;
2043 2043 struct secinfo *sp;
2044 2044
2045 2045 sp = exi->exi_export.ex_secinfo;
2046 2046 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2047 2047 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2048 2048 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2049 2049 return (SEC_REF_EXPORTED(&sp[i]));
2050 2050 }
2051 2051 }
2052 2052
2053 2053 /* Should not reach this point based on the assumption */
2054 2054 return (0);
2055 2055 }
2056 2056
2057 2057 /*
2058 2058 * Check if the security flavor used in the request matches what is
2059 2059 * required at the export point or at the root pseudo node (exi_root).
2060 2060 *
2061 2061 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2062 2062 *
2063 2063 */
2064 2064 static int
2065 2065 secinfo_match_or_authnone(struct compound_state *cs)
2066 2066 {
2067 2067 int i;
2068 2068 struct secinfo *sp;
2069 2069
2070 2070 /*
2071 2071 * Check cs->nfsflavor (from the request) against
2072 2072 * the current export data in cs->exi.
2073 2073 */
2074 2074 sp = cs->exi->exi_export.ex_secinfo;
2075 2075 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2076 2076 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2077 2077 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2078 2078 return (1);
2079 2079 }
2080 2080
2081 2081 return (0);
2082 2082 }
2083 2083
2084 2084 /*
2085 2085 * Check the access authority for the client and return the correct error.
2086 2086 */
2087 2087 nfsstat4
2088 2088 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2089 2089 {
2090 2090 int authres;
2091 2091
2092 2092 /*
2093 2093 * First, check if the security flavor used in the request
2094 2094 * are among the flavors set in the server namespace.
2095 2095 */
2096 2096 if (!secinfo_match_or_authnone(cs)) {
2097 2097 *cs->statusp = NFS4ERR_WRONGSEC;
2098 2098 return (*cs->statusp);
2099 2099 }
2100 2100
2101 2101 authres = checkauth4(cs, req);
2102 2102
2103 2103 if (authres > 0) {
2104 2104 *cs->statusp = NFS4_OK;
2105 2105 if (! (cs->access & CS_ACCESS_LIMITED))
2106 2106 cs->access = CS_ACCESS_OK;
2107 2107 } else if (authres == 0) {
2108 2108 *cs->statusp = NFS4ERR_ACCESS;
2109 2109 } else if (authres == -2) {
2110 2110 *cs->statusp = NFS4ERR_WRONGSEC;
2111 2111 } else {
2112 2112 *cs->statusp = NFS4ERR_DELAY;
2113 2113 }
2114 2114 return (*cs->statusp);
2115 2115 }
2116 2116
2117 2117 /*
2118 2118 * bitmap4_to_attrmask is called by getattr and readdir.
2119 2119 * It sets up the vattr mask and determines whether vfsstat call is needed
2120 2120 * based on the input bitmap.
2121 2121 * Returns nfsv4 status.
2122 2122 */
2123 2123 static nfsstat4
2124 2124 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2125 2125 {
2126 2126 int i;
2127 2127 uint_t va_mask;
2128 2128 struct statvfs64 *sbp = sargp->sbp;
2129 2129
2130 2130 sargp->sbp = NULL;
2131 2131 sargp->flag = 0;
2132 2132 sargp->rdattr_error = NFS4_OK;
2133 2133 sargp->mntdfid_set = FALSE;
2134 2134 if (sargp->cs->vp)
2135 2135 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2136 2136 FH4_ATTRDIR | FH4_NAMEDATTR);
2137 2137 else
2138 2138 sargp->xattr = 0;
2139 2139
2140 2140 /*
2141 2141 * Set rdattr_error_req to true if return error per
2142 2142 * failed entry rather than fail the readdir.
2143 2143 */
2144 2144 if (breq & FATTR4_RDATTR_ERROR_MASK)
2145 2145 sargp->rdattr_error_req = 1;
2146 2146 else
2147 2147 sargp->rdattr_error_req = 0;
2148 2148
2149 2149 /*
2150 2150 * generate the va_mask
2151 2151 * Handle the easy cases first
2152 2152 */
2153 2153 switch (breq) {
2154 2154 case NFS4_NTOV_ATTR_MASK:
2155 2155 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2156 2156 return (NFS4_OK);
2157 2157
2158 2158 case NFS4_FS_ATTR_MASK:
2159 2159 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2160 2160 sargp->sbp = sbp;
2161 2161 return (NFS4_OK);
2162 2162
2163 2163 case NFS4_NTOV_ATTR_CACHE_MASK:
2164 2164 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2165 2165 return (NFS4_OK);
2166 2166
2167 2167 case FATTR4_LEASE_TIME_MASK:
2168 2168 sargp->vap->va_mask = 0;
2169 2169 return (NFS4_OK);
2170 2170
2171 2171 default:
2172 2172 va_mask = 0;
2173 2173 for (i = 0; i < nfs4_ntov_map_size; i++) {
2174 2174 if ((breq & nfs4_ntov_map[i].fbit) &&
2175 2175 nfs4_ntov_map[i].vbit)
2176 2176 va_mask |= nfs4_ntov_map[i].vbit;
2177 2177 }
2178 2178
2179 2179 /*
2180 2180 * Check is vfsstat is needed
2181 2181 */
2182 2182 if (breq & NFS4_FS_ATTR_MASK)
2183 2183 sargp->sbp = sbp;
2184 2184
2185 2185 sargp->vap->va_mask = va_mask;
2186 2186 return (NFS4_OK);
2187 2187 }
2188 2188 /* NOTREACHED */
2189 2189 }
2190 2190
2191 2191 /*
2192 2192 * bitmap4_get_sysattrs is called by getattr and readdir.
2193 2193 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2194 2194 * Returns nfsv4 status.
2195 2195 */
2196 2196 static nfsstat4
2197 2197 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2198 2198 {
2199 2199 int error;
2200 2200 struct compound_state *cs = sargp->cs;
2201 2201 vnode_t *vp = cs->vp;
2202 2202
2203 2203 if (sargp->sbp != NULL) {
2204 2204 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2205 2205 sargp->sbp = NULL; /* to identify error */
2206 2206 return (puterrno4(error));
2207 2207 }
2208 2208 }
2209 2209
2210 2210 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2211 2211 }
2212 2212
2213 2213 static void
2214 2214 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2215 2215 {
2216 2216 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2217 2217 KM_SLEEP);
2218 2218 ntovp->attrcnt = 0;
2219 2219 ntovp->vfsstat = FALSE;
2220 2220 }
2221 2221
2222 2222 static void
2223 2223 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2224 2224 struct nfs4_svgetit_arg *sargp)
2225 2225 {
2226 2226 int i;
2227 2227 union nfs4_attr_u *na;
2228 2228 uint8_t *amap;
2229 2229
2230 2230 /*
2231 2231 * XXX Should do the same checks for whether the bit is set
2232 2232 */
2233 2233 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2234 2234 i < ntovp->attrcnt; i++, na++, amap++) {
2235 2235 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2236 2236 NFS4ATTR_FREEIT, sargp, na);
2237 2237 }
2238 2238 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2239 2239 /*
2240 2240 * xdr_free for getattr will be done later
2241 2241 */
2242 2242 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2243 2243 i < ntovp->attrcnt; i++, na++, amap++) {
2244 2244 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2245 2245 }
2246 2246 }
2247 2247 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2248 2248 }
2249 2249
2250 2250 /*
2251 2251 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2252 2252 */
2253 2253 static nfsstat4
2254 2254 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2255 2255 struct nfs4_svgetit_arg *sargp)
2256 2256 {
2257 2257 int error = 0;
2258 2258 int i, k;
2259 2259 struct nfs4_ntov_table ntov;
2260 2260 XDR xdr;
2261 2261 ulong_t xdr_size;
2262 2262 char *xdr_attrs;
2263 2263 nfsstat4 status = NFS4_OK;
2264 2264 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2265 2265 union nfs4_attr_u *na;
2266 2266 uint8_t *amap;
2267 2267
2268 2268 sargp->op = NFS4ATTR_GETIT;
2269 2269 sargp->flag = 0;
2270 2270
2271 2271 fattrp->attrmask = 0;
2272 2272 /* if no bits requested, then return empty fattr4 */
2273 2273 if (breq == 0) {
2274 2274 fattrp->attrlist4_len = 0;
2275 2275 fattrp->attrlist4 = NULL;
2276 2276 return (NFS4_OK);
2277 2277 }
2278 2278
2279 2279 /*
2280 2280 * return NFS4ERR_INVAL when client requests write-only attrs
2281 2281 */
2282 2282 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2283 2283 return (NFS4ERR_INVAL);
2284 2284
2285 2285 nfs4_ntov_table_init(&ntov);
2286 2286 na = ntov.na;
2287 2287 amap = ntov.amap;
2288 2288
2289 2289 /*
2290 2290 * Now loop to get or verify the attrs
2291 2291 */
2292 2292 for (i = 0; i < nfs4_ntov_map_size; i++) {
2293 2293 if (breq & nfs4_ntov_map[i].fbit) {
2294 2294 if ((*nfs4_ntov_map[i].sv_getit)(
2295 2295 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2296 2296
2297 2297 error = (*nfs4_ntov_map[i].sv_getit)(
2298 2298 NFS4ATTR_GETIT, sargp, na);
2299 2299
2300 2300 /*
2301 2301 * Possible error values:
2302 2302 * >0 if sv_getit failed to
2303 2303 * get the attr; 0 if succeeded;
2304 2304 * <0 if rdattr_error and the
2305 2305 * attribute cannot be returned.
2306 2306 */
2307 2307 if (error && !(sargp->rdattr_error_req))
2308 2308 goto done;
2309 2309 /*
2310 2310 * If error then just for entry
2311 2311 */
2312 2312 if (error == 0) {
2313 2313 fattrp->attrmask |=
2314 2314 nfs4_ntov_map[i].fbit;
2315 2315 *amap++ =
2316 2316 (uint8_t)nfs4_ntov_map[i].nval;
2317 2317 na++;
2318 2318 (ntov.attrcnt)++;
2319 2319 } else if ((error > 0) &&
2320 2320 (sargp->rdattr_error == NFS4_OK)) {
2321 2321 sargp->rdattr_error = puterrno4(error);
2322 2322 }
2323 2323 error = 0;
2324 2324 }
2325 2325 }
2326 2326 }
2327 2327
2328 2328 /*
2329 2329 * If rdattr_error was set after the return value for it was assigned,
2330 2330 * update it.
2331 2331 */
2332 2332 if (prev_rdattr_error != sargp->rdattr_error) {
2333 2333 na = ntov.na;
2334 2334 amap = ntov.amap;
2335 2335 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2336 2336 k = *amap;
2337 2337 if (k < FATTR4_RDATTR_ERROR) {
2338 2338 continue;
2339 2339 }
2340 2340 if ((k == FATTR4_RDATTR_ERROR) &&
2341 2341 ((*nfs4_ntov_map[k].sv_getit)(
2342 2342 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2343 2343
2344 2344 (void) (*nfs4_ntov_map[k].sv_getit)(
2345 2345 NFS4ATTR_GETIT, sargp, na);
2346 2346 }
2347 2347 break;
2348 2348 }
2349 2349 }
2350 2350
2351 2351 xdr_size = 0;
2352 2352 na = ntov.na;
2353 2353 amap = ntov.amap;
2354 2354 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2355 2355 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2356 2356 }
2357 2357
2358 2358 fattrp->attrlist4_len = xdr_size;
2359 2359 if (xdr_size) {
2360 2360 /* freed by rfs4_op_getattr_free() */
2361 2361 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2362 2362
2363 2363 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2364 2364
2365 2365 na = ntov.na;
2366 2366 amap = ntov.amap;
2367 2367 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2368 2368 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2369 2369 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2370 2370 int, *amap);
2371 2371 status = NFS4ERR_SERVERFAULT;
2372 2372 break;
2373 2373 }
2374 2374 }
2375 2375 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2376 2376 } else {
2377 2377 fattrp->attrlist4 = NULL;
2378 2378 }
2379 2379 done:
2380 2380
2381 2381 nfs4_ntov_table_free(&ntov, sargp);
2382 2382
2383 2383 if (error != 0)
2384 2384 status = puterrno4(error);
2385 2385
2386 2386 return (status);
2387 2387 }
2388 2388
2389 2389 /* ARGSUSED */
2390 2390 static void
2391 2391 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2392 2392 struct compound_state *cs)
2393 2393 {
2394 2394 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2395 2395 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2396 2396 struct nfs4_svgetit_arg sarg;
2397 2397 struct statvfs64 sb;
2398 2398 nfsstat4 status;
2399 2399
2400 2400 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2401 2401 GETATTR4args *, args);
2402 2402
2403 2403 if (cs->vp == NULL) {
2404 2404 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2405 2405 goto out;
2406 2406 }
2407 2407
2408 2408 if (cs->access == CS_ACCESS_DENIED) {
2409 2409 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2410 2410 goto out;
2411 2411 }
2412 2412
2413 2413 sarg.sbp = &sb;
2414 2414 sarg.cs = cs;
2415 2415 sarg.is_referral = B_FALSE;
2416 2416
2417 2417 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2418 2418 if (status == NFS4_OK) {
2419 2419
2420 2420 status = bitmap4_get_sysattrs(&sarg);
2421 2421 if (status == NFS4_OK) {
2422 2422
2423 2423 /* Is this a referral? */
2424 2424 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2425 2425 /* Older V4 Solaris client sees a link */
2426 2426 if (client_is_downrev(req))
2427 2427 sarg.vap->va_type = VLNK;
2428 2428 else
2429 2429 sarg.is_referral = B_TRUE;
2430 2430 }
2431 2431
2432 2432 status = do_rfs4_op_getattr(args->attr_request,
2433 2433 &resp->obj_attributes, &sarg);
2434 2434 }
2435 2435 }
2436 2436 *cs->statusp = resp->status = status;
2437 2437 out:
2438 2438 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2439 2439 GETATTR4res *, resp);
2440 2440 }
2441 2441
2442 2442 static void
2443 2443 rfs4_op_getattr_free(nfs_resop4 *resop)
2444 2444 {
2445 2445 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2446 2446
2447 2447 nfs4_fattr4_free(&resp->obj_attributes);
2448 2448 }
2449 2449
2450 2450 /* ARGSUSED */
2451 2451 static void
2452 2452 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2453 2453 struct compound_state *cs)
2454 2454 {
2455 2455 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2456 2456
2457 2457 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2458 2458
2459 2459 if (cs->vp == NULL) {
2460 2460 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2461 2461 goto out;
2462 2462 }
2463 2463 if (cs->access == CS_ACCESS_DENIED) {
2464 2464 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2465 2465 goto out;
2466 2466 }
2467 2467
2468 2468 /* check for reparse point at the share point */
2469 2469 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2470 2470 /* it's all bad */
2471 2471 cs->exi->exi_moved = 1;
2472 2472 *cs->statusp = resp->status = NFS4ERR_MOVED;
2473 2473 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2474 2474 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2475 2475 return;
2476 2476 }
2477 2477
2478 2478 /* check for reparse point at vp */
2479 2479 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2480 2480 /* it's not all bad */
2481 2481 *cs->statusp = resp->status = NFS4ERR_MOVED;
2482 2482 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2483 2483 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2484 2484 return;
2485 2485 }
2486 2486
2487 2487 resp->object.nfs_fh4_val =
2488 2488 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2489 2489 nfs_fh4_copy(&cs->fh, &resp->object);
2490 2490 *cs->statusp = resp->status = NFS4_OK;
2491 2491 out:
2492 2492 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2493 2493 GETFH4res *, resp);
2494 2494 }
2495 2495
2496 2496 static void
2497 2497 rfs4_op_getfh_free(nfs_resop4 *resop)
2498 2498 {
2499 2499 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2500 2500
2501 2501 if (resp->status == NFS4_OK &&
2502 2502 resp->object.nfs_fh4_val != NULL) {
2503 2503 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2504 2504 resp->object.nfs_fh4_val = NULL;
2505 2505 resp->object.nfs_fh4_len = 0;
2506 2506 }
2507 2507 }
2508 2508
2509 2509 /*
2510 2510 * illegal: args: void
2511 2511 * res : status (NFS4ERR_OP_ILLEGAL)
2512 2512 */
2513 2513 /* ARGSUSED */
2514 2514 static void
2515 2515 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2516 2516 struct svc_req *req, struct compound_state *cs)
2517 2517 {
2518 2518 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2519 2519
2520 2520 resop->resop = OP_ILLEGAL;
2521 2521 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2522 2522 }
2523 2523
2524 2524 /*
2525 2525 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2526 2526 * res: status. If success - CURRENT_FH unchanged, return change_info
2527 2527 */
2528 2528 /* ARGSUSED */
2529 2529 static void
2530 2530 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2531 2531 struct compound_state *cs)
2532 2532 {
2533 2533 LINK4args *args = &argop->nfs_argop4_u.oplink;
2534 2534 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2535 2535 int error;
2536 2536 vnode_t *vp;
2537 2537 vnode_t *dvp;
2538 2538 struct vattr bdva, idva, adva;
2539 2539 char *nm;
2540 2540 uint_t len;
2541 2541 struct sockaddr *ca;
2542 2542 char *name = NULL;
2543 2543 nfsstat4 status;
2544 2544
2545 2545 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2546 2546 LINK4args *, args);
2547 2547
2548 2548 /* SAVED_FH: source object */
2549 2549 vp = cs->saved_vp;
2550 2550 if (vp == NULL) {
2551 2551 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2552 2552 goto out;
2553 2553 }
2554 2554
2555 2555 /* CURRENT_FH: target directory */
2556 2556 dvp = cs->vp;
2557 2557 if (dvp == NULL) {
2558 2558 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2559 2559 goto out;
2560 2560 }
2561 2561
2562 2562 /*
2563 2563 * If there is a non-shared filesystem mounted on this vnode,
2564 2564 * do not allow to link any file in this directory.
2565 2565 */
2566 2566 if (vn_ismntpt(dvp)) {
2567 2567 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2568 2568 goto out;
2569 2569 }
2570 2570
2571 2571 if (cs->access == CS_ACCESS_DENIED) {
2572 2572 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2573 2573 goto out;
2574 2574 }
2575 2575
2576 2576 /* Check source object's type validity */
2577 2577 if (vp->v_type == VDIR) {
2578 2578 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2579 2579 goto out;
2580 2580 }
2581 2581
2582 2582 /* Check target directory's type */
2583 2583 if (dvp->v_type != VDIR) {
2584 2584 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2585 2585 goto out;
2586 2586 }
2587 2587
2588 2588 if (cs->saved_exi != cs->exi) {
2589 2589 *cs->statusp = resp->status = NFS4ERR_XDEV;
2590 2590 goto out;
2591 2591 }
2592 2592
2593 2593 status = utf8_dir_verify(&args->newname);
2594 2594 if (status != NFS4_OK) {
2595 2595 *cs->statusp = resp->status = status;
2596 2596 goto out;
2597 2597 }
2598 2598
2599 2599 nm = utf8_to_fn(&args->newname, &len, NULL);
2600 2600 if (nm == NULL) {
2601 2601 *cs->statusp = resp->status = NFS4ERR_INVAL;
2602 2602 goto out;
2603 2603 }
2604 2604
2605 2605 if (len > MAXNAMELEN) {
2606 2606 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2607 2607 kmem_free(nm, len);
2608 2608 goto out;
2609 2609 }
2610 2610
2611 2611 if (rdonly4(req, cs)) {
2612 2612 *cs->statusp = resp->status = NFS4ERR_ROFS;
2613 2613 kmem_free(nm, len);
2614 2614 goto out;
2615 2615 }
2616 2616
2617 2617 /* Get "before" change value */
2618 2618 bdva.va_mask = AT_CTIME|AT_SEQ;
2619 2619 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2620 2620 if (error) {
2621 2621 *cs->statusp = resp->status = puterrno4(error);
2622 2622 kmem_free(nm, len);
2623 2623 goto out;
2624 2624 }
2625 2625
2626 2626 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2627 2627 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2628 2628 MAXPATHLEN + 1);
2629 2629
2630 2630 if (name == NULL) {
2631 2631 *cs->statusp = resp->status = NFS4ERR_INVAL;
2632 2632 kmem_free(nm, len);
2633 2633 goto out;
2634 2634 }
2635 2635
2636 2636 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2637 2637
2638 2638 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2639 2639
2640 2640 if (nm != name)
2641 2641 kmem_free(name, MAXPATHLEN + 1);
2642 2642 kmem_free(nm, len);
2643 2643
2644 2644 /*
2645 2645 * Get the initial "after" sequence number, if it fails, set to zero
2646 2646 */
2647 2647 idva.va_mask = AT_SEQ;
2648 2648 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2649 2649 idva.va_seq = 0;
2650 2650
2651 2651 /*
2652 2652 * Force modified data and metadata out to stable storage.
2653 2653 */
2654 2654 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2655 2655 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2656 2656
2657 2657 if (error) {
2658 2658 *cs->statusp = resp->status = puterrno4(error);
2659 2659 goto out;
2660 2660 }
2661 2661
2662 2662 /*
2663 2663 * Get "after" change value, if it fails, simply return the
2664 2664 * before value.
2665 2665 */
2666 2666 adva.va_mask = AT_CTIME|AT_SEQ;
2667 2667 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2668 2668 adva.va_ctime = bdva.va_ctime;
2669 2669 adva.va_seq = 0;
2670 2670 }
2671 2671
2672 2672 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2673 2673
2674 2674 /*
2675 2675 * The cinfo.atomic = TRUE only if we have
2676 2676 * non-zero va_seq's, and it has incremented by exactly one
2677 2677 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2678 2678 */
2679 2679 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2680 2680 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2681 2681 resp->cinfo.atomic = TRUE;
2682 2682 else
2683 2683 resp->cinfo.atomic = FALSE;
2684 2684
2685 2685 *cs->statusp = resp->status = NFS4_OK;
2686 2686 out:
2687 2687 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2688 2688 LINK4res *, resp);
2689 2689 }
2690 2690
2691 2691 /*
2692 2692 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2693 2693 */
2694 2694
2695 2695 /* ARGSUSED */
2696 2696 static nfsstat4
2697 2697 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2698 2698 {
2699 2699 int error;
2700 2700 int different_export = 0;
2701 2701 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2702 2702 struct exportinfo *exi = NULL, *pre_exi = NULL;
2703 2703 nfsstat4 stat;
2704 2704 fid_t fid;
2705 2705 int attrdir, dotdot, walk;
2706 2706 bool_t is_newvp = FALSE;
2707 2707
2708 2708 if (cs->vp->v_flag & V_XATTRDIR) {
2709 2709 attrdir = 1;
2710 2710 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2711 2711 } else {
2712 2712 attrdir = 0;
2713 2713 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2714 2714 }
2715 2715
2716 2716 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2717 2717
2718 2718 /*
2719 2719 * If dotdotting, then need to check whether it's
2720 2720 * above the root of a filesystem, or above an
2721 2721 * export point.
2722 2722 */
2723 2723 if (dotdot) {
2724 2724 vnode_t *zone_rootvp;
2725 2725
2726 2726 ASSERT(cs->exi != NULL);
2727 2727 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2728 2728 /*
2729 2729 * If dotdotting at the root of a filesystem, then
2730 2730 * need to traverse back to the mounted-on filesystem
2731 2731 * and do the dotdot lookup there.
2732 2732 */
2733 2733 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2734 2734
2735 2735 /*
2736 2736 * If at the system root, then can
2737 2737 * go up no further.
2738 2738 */
2739 2739 if (VN_CMP(cs->vp, zone_rootvp))
2740 2740 return (puterrno4(ENOENT));
2741 2741
2742 2742 /*
2743 2743 * Traverse back to the mounted-on filesystem
2744 2744 */
2745 2745 cs->vp = untraverse(cs->vp, zone_rootvp);
2746 2746
2747 2747 /*
2748 2748 * Set the different_export flag so we remember
2749 2749 * to pick up a new exportinfo entry for
2750 2750 * this new filesystem.
2751 2751 */
2752 2752 different_export = 1;
2753 2753 } else {
2754 2754
2755 2755 /*
2756 2756 * If dotdotting above an export point then set
2757 2757 * the different_export to get new export info.
2758 2758 */
2759 2759 different_export = nfs_exported(cs->exi, cs->vp);
2760 2760 }
2761 2761 }
2762 2762
2763 2763 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2764 2764 NULL, NULL, NULL);
2765 2765 if (error)
2766 2766 return (puterrno4(error));
2767 2767
2768 2768 /*
2769 2769 * If the vnode is in a pseudo filesystem, check whether it is visible.
2770 2770 *
2771 2771 * XXX if the vnode is a symlink and it is not visible in
2772 2772 * a pseudo filesystem, return ENOENT (not following symlink).
2773 2773 * V4 client can not mount such symlink. This is a regression
2774 2774 * from V2/V3.
2775 2775 *
2776 2776 * In the same exported filesystem, if the security flavor used
2777 2777 * is not an explicitly shared flavor, limit the view to the visible
2778 2778 * list entries only. This is not a WRONGSEC case because it's already
2779 2779 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2780 2780 */
2781 2781 if (!different_export &&
2782 2782 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2783 2783 cs->access & CS_ACCESS_LIMITED)) {
2784 2784 if (! nfs_visible(cs->exi, vp, &different_export)) {
2785 2785 VN_RELE(vp);
2786 2786 return (puterrno4(ENOENT));
2787 2787 }
2788 2788 }
2789 2789
2790 2790 /*
2791 2791 * If it's a mountpoint, then traverse it.
2792 2792 */
2793 2793 if (vn_ismntpt(vp)) {
2794 2794 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2795 2795 pre_tvp = vp; /* save pre-traversed vnode */
2796 2796
2797 2797 /*
2798 2798 * hold pre_tvp to counteract rele by traverse. We will
2799 2799 * need pre_tvp below if checkexport4 fails
2800 2800 */
2801 2801 VN_HOLD(pre_tvp);
2802 2802 if ((error = traverse(&vp)) != 0) {
2803 2803 VN_RELE(vp);
2804 2804 VN_RELE(pre_tvp);
2805 2805 return (puterrno4(error));
2806 2806 }
2807 2807 different_export = 1;
2808 2808 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2809 2809 /*
2810 2810 * The vfsp comparison is to handle the case where
2811 2811 * a LOFS mount is shared. lo_lookup traverses mount points,
2812 2812 * and NFS is unaware of local fs transistions because
2813 2813 * v_vfsmountedhere isn't set. For this special LOFS case,
2814 2814 * the dir and the obj returned by lookup will have different
2815 2815 * vfs ptrs.
2816 2816 */
2817 2817 different_export = 1;
2818 2818 }
2819 2819
2820 2820 if (different_export) {
2821 2821
2822 2822 bzero(&fid, sizeof (fid));
2823 2823 fid.fid_len = MAXFIDSZ;
2824 2824 error = vop_fid_pseudo(vp, &fid);
2825 2825 if (error) {
2826 2826 VN_RELE(vp);
2827 2827 if (pre_tvp)
2828 2828 VN_RELE(pre_tvp);
2829 2829 return (puterrno4(error));
2830 2830 }
2831 2831
2832 2832 if (dotdot)
2833 2833 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2834 2834 else
2835 2835 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2836 2836
2837 2837 if (exi == NULL) {
2838 2838 if (pre_tvp) {
2839 2839 /*
2840 2840 * If this vnode is a mounted-on vnode,
2841 2841 * but the mounted-on file system is not
2842 2842 * exported, send back the filehandle for
2843 2843 * the mounted-on vnode, not the root of
2844 2844 * the mounted-on file system.
2845 2845 */
2846 2846 VN_RELE(vp);
2847 2847 vp = pre_tvp;
2848 2848 exi = pre_exi;
2849 2849 } else {
2850 2850 VN_RELE(vp);
2851 2851 return (puterrno4(EACCES));
2852 2852 }
2853 2853 } else if (pre_tvp) {
2854 2854 /* we're done with pre_tvp now. release extra hold */
2855 2855 VN_RELE(pre_tvp);
2856 2856 }
2857 2857
2858 2858 cs->exi = exi;
2859 2859
2860 2860 /*
2861 2861 * Now we do a checkauth4. The reason is that
2862 2862 * this client/user may not have access to the new
2863 2863 * exported file system, and if they do,
2864 2864 * the client/user may be mapped to a different uid.
2865 2865 *
2866 2866 * We start with a new cr, because the checkauth4 done
2867 2867 * in the PUT*FH operation over wrote the cred's uid,
2868 2868 * gid, etc, and we want the real thing before calling
2869 2869 * checkauth4()
2870 2870 */
2871 2871 crfree(cs->cr);
2872 2872 cs->cr = crdup(cs->basecr);
2873 2873
2874 2874 oldvp = cs->vp;
2875 2875 cs->vp = vp;
2876 2876 is_newvp = TRUE;
2877 2877
2878 2878 stat = call_checkauth4(cs, req);
2879 2879 if (stat != NFS4_OK) {
2880 2880 VN_RELE(cs->vp);
2881 2881 cs->vp = oldvp;
2882 2882 return (stat);
2883 2883 }
2884 2884 }
2885 2885
2886 2886 /*
2887 2887 * After various NFS checks, do a label check on the path
2888 2888 * component. The label on this path should either be the
2889 2889 * global zone's label or a zone's label. We are only
2890 2890 * interested in the zone's label because exported files
2891 2891 * in global zone is accessible (though read-only) to
2892 2892 * clients. The exportability/visibility check is already
2893 2893 * done before reaching this code.
2894 2894 */
2895 2895 if (is_system_labeled()) {
2896 2896 bslabel_t *clabel;
2897 2897
2898 2898 ASSERT(req->rq_label != NULL);
2899 2899 clabel = req->rq_label;
2900 2900 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2901 2901 "got client label from request(1)", struct svc_req *, req);
2902 2902
2903 2903 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2904 2904 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2905 2905 cs->exi)) {
2906 2906 error = EACCES;
2907 2907 goto err_out;
2908 2908 }
2909 2909 } else {
2910 2910 /*
2911 2911 * We grant access to admin_low label clients
2912 2912 * only if the client is trusted, i.e. also
2913 2913 * running Solaris Trusted Extension.
2914 2914 */
2915 2915 struct sockaddr *ca;
2916 2916 int addr_type;
2917 2917 void *ipaddr;
2918 2918 tsol_tpc_t *tp;
2919 2919
2920 2920 ca = (struct sockaddr *)svc_getrpccaller(
2921 2921 req->rq_xprt)->buf;
2922 2922 if (ca->sa_family == AF_INET) {
2923 2923 addr_type = IPV4_VERSION;
2924 2924 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2925 2925 } else if (ca->sa_family == AF_INET6) {
2926 2926 addr_type = IPV6_VERSION;
2927 2927 ipaddr = &((struct sockaddr_in6 *)
2928 2928 ca)->sin6_addr;
2929 2929 }
2930 2930 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2931 2931 if (tp == NULL || tp->tpc_tp.tp_doi !=
2932 2932 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2933 2933 SUN_CIPSO) {
2934 2934 if (tp != NULL)
2935 2935 TPC_RELE(tp);
2936 2936 error = EACCES;
2937 2937 goto err_out;
2938 2938 }
2939 2939 TPC_RELE(tp);
2940 2940 }
2941 2941 }
2942 2942
2943 2943 error = makefh4(&cs->fh, vp, cs->exi);
2944 2944
2945 2945 err_out:
2946 2946 if (error) {
2947 2947 if (is_newvp) {
2948 2948 VN_RELE(cs->vp);
2949 2949 cs->vp = oldvp;
2950 2950 } else
2951 2951 VN_RELE(vp);
2952 2952 return (puterrno4(error));
2953 2953 }
2954 2954
2955 2955 if (!is_newvp) {
2956 2956 if (cs->vp)
2957 2957 VN_RELE(cs->vp);
2958 2958 cs->vp = vp;
2959 2959 } else if (oldvp)
2960 2960 VN_RELE(oldvp);
2961 2961
2962 2962 /*
2963 2963 * if did lookup on attrdir and didn't lookup .., set named
2964 2964 * attr fh flag
2965 2965 */
2966 2966 if (attrdir && ! dotdot)
2967 2967 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2968 2968
2969 2969 /* Assume false for now, open proc will set this */
2970 2970 cs->mandlock = FALSE;
2971 2971
2972 2972 return (NFS4_OK);
2973 2973 }
2974 2974
2975 2975 /* ARGSUSED */
2976 2976 static void
2977 2977 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2978 2978 struct compound_state *cs)
2979 2979 {
2980 2980 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2981 2981 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2982 2982 char *nm;
2983 2983 uint_t len;
2984 2984 struct sockaddr *ca;
2985 2985 char *name = NULL;
2986 2986 nfsstat4 status;
2987 2987
2988 2988 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2989 2989 LOOKUP4args *, args);
2990 2990
2991 2991 if (cs->vp == NULL) {
2992 2992 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2993 2993 goto out;
2994 2994 }
2995 2995
2996 2996 if (cs->vp->v_type == VLNK) {
2997 2997 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2998 2998 goto out;
2999 2999 }
3000 3000
3001 3001 if (cs->vp->v_type != VDIR) {
3002 3002 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3003 3003 goto out;
3004 3004 }
3005 3005
3006 3006 status = utf8_dir_verify(&args->objname);
3007 3007 if (status != NFS4_OK) {
3008 3008 *cs->statusp = resp->status = status;
3009 3009 goto out;
3010 3010 }
3011 3011
3012 3012 nm = utf8_to_str(&args->objname, &len, NULL);
3013 3013 if (nm == NULL) {
3014 3014 *cs->statusp = resp->status = NFS4ERR_INVAL;
3015 3015 goto out;
3016 3016 }
3017 3017
3018 3018 if (len > MAXNAMELEN) {
3019 3019 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3020 3020 kmem_free(nm, len);
3021 3021 goto out;
3022 3022 }
3023 3023
3024 3024 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3025 3025 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3026 3026 MAXPATHLEN + 1);
3027 3027
3028 3028 if (name == NULL) {
3029 3029 *cs->statusp = resp->status = NFS4ERR_INVAL;
3030 3030 kmem_free(nm, len);
3031 3031 goto out;
3032 3032 }
3033 3033
3034 3034 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3035 3035
3036 3036 if (name != nm)
3037 3037 kmem_free(name, MAXPATHLEN + 1);
3038 3038 kmem_free(nm, len);
3039 3039
3040 3040 out:
3041 3041 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3042 3042 LOOKUP4res *, resp);
3043 3043 }
3044 3044
3045 3045 /* ARGSUSED */
3046 3046 static void
3047 3047 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3048 3048 struct compound_state *cs)
3049 3049 {
3050 3050 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3051 3051
3052 3052 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3053 3053
3054 3054 if (cs->vp == NULL) {
3055 3055 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3056 3056 goto out;
3057 3057 }
3058 3058
3059 3059 if (cs->vp->v_type != VDIR) {
3060 3060 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3061 3061 goto out;
3062 3062 }
3063 3063
3064 3064 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3065 3065
3066 3066 /*
3067 3067 * From NFSV4 Specification, LOOKUPP should not check for
3068 3068 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3069 3069 */
3070 3070 if (resp->status == NFS4ERR_WRONGSEC) {
3071 3071 *cs->statusp = resp->status = NFS4_OK;
3072 3072 }
3073 3073
3074 3074 out:
3075 3075 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3076 3076 LOOKUPP4res *, resp);
3077 3077 }
3078 3078
3079 3079
3080 3080 /*ARGSUSED2*/
3081 3081 static void
3082 3082 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3083 3083 struct compound_state *cs)
3084 3084 {
3085 3085 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3086 3086 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3087 3087 vnode_t *avp = NULL;
3088 3088 int lookup_flags = LOOKUP_XATTR, error;
3089 3089 int exp_ro = 0;
3090 3090
3091 3091 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3092 3092 OPENATTR4args *, args);
3093 3093
3094 3094 if (cs->vp == NULL) {
3095 3095 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3096 3096 goto out;
3097 3097 }
3098 3098
3099 3099 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3100 3100 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3101 3101 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3102 3102 goto out;
3103 3103 }
3104 3104
3105 3105 /*
3106 3106 * If file system supports passing ACE mask to VOP_ACCESS then
3107 3107 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3108 3108 */
3109 3109
3110 3110 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3111 3111 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3112 3112 V_ACE_MASK, cs->cr, NULL);
3113 3113 else
3114 3114 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3115 3115 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3116 3116 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3117 3117
3118 3118 if (error) {
3119 3119 *cs->statusp = resp->status = puterrno4(EACCES);
3120 3120 goto out;
3121 3121 }
3122 3122
3123 3123 /*
3124 3124 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3125 3125 * the file system is exported read-only -- regardless of
3126 3126 * createdir flag. Otherwise the attrdir would be created
3127 3127 * (assuming server fs isn't mounted readonly locally). If
3128 3128 * VOP_LOOKUP returns ENOENT in this case, the error will
3129 3129 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3130 3130 * because specfs has no VOP_LOOKUP op, so the macro would
3131 3131 * return ENOSYS. EINVAL is returned by all (current)
3132 3132 * Solaris file system implementations when any of their
3133 3133 * restrictions are violated (xattr(dir) can't have xattrdir).
3134 3134 * Returning NOTSUPP is more appropriate in this case
3135 3135 * because the object will never be able to have an attrdir.
3136 3136 */
3137 3137 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3138 3138 lookup_flags |= CREATE_XATTR_DIR;
3139 3139
3140 3140 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3141 3141 NULL, NULL, NULL);
3142 3142
3143 3143 if (error) {
3144 3144 if (error == ENOENT && args->createdir && exp_ro)
3145 3145 *cs->statusp = resp->status = puterrno4(EROFS);
3146 3146 else if (error == EINVAL || error == ENOSYS)
3147 3147 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3148 3148 else
3149 3149 *cs->statusp = resp->status = puterrno4(error);
3150 3150 goto out;
3151 3151 }
3152 3152
3153 3153 ASSERT(avp->v_flag & V_XATTRDIR);
3154 3154
3155 3155 error = makefh4(&cs->fh, avp, cs->exi);
3156 3156
3157 3157 if (error) {
3158 3158 VN_RELE(avp);
3159 3159 *cs->statusp = resp->status = puterrno4(error);
3160 3160 goto out;
3161 3161 }
3162 3162
3163 3163 VN_RELE(cs->vp);
3164 3164 cs->vp = avp;
3165 3165
3166 3166 /*
3167 3167 * There is no requirement for an attrdir fh flag
3168 3168 * because the attrdir has a vnode flag to distinguish
3169 3169 * it from regular (non-xattr) directories. The
3170 3170 * FH4_ATTRDIR flag is set for future sanity checks.
3171 3171 */
3172 3172 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3173 3173 *cs->statusp = resp->status = NFS4_OK;
3174 3174
3175 3175 out:
3176 3176 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3177 3177 OPENATTR4res *, resp);
3178 3178 }
3179 3179
3180 3180 static int
3181 3181 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3182 3182 caller_context_t *ct)
3183 3183 {
3184 3184 int error;
3185 3185 int i;
3186 3186 clock_t delaytime;
3187 3187
3188 3188 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3189 3189
3190 3190 /*
3191 3191 * Don't block on mandatory locks. If this routine returns
3192 3192 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3193 3193 */
3194 3194 uio->uio_fmode = FNONBLOCK;
3195 3195
3196 3196 for (i = 0; i < rfs4_maxlock_tries; i++) {
3197 3197
3198 3198
3199 3199 if (direction == FREAD) {
3200 3200 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3201 3201 error = VOP_READ(vp, uio, ioflag, cred, ct);
3202 3202 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3203 3203 } else {
3204 3204 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3205 3205 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3206 3206 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3207 3207 }
3208 3208
3209 3209 if (error != EAGAIN)
3210 3210 break;
3211 3211
3212 3212 if (i < rfs4_maxlock_tries - 1) {
3213 3213 delay(delaytime);
3214 3214 delaytime *= 2;
3215 3215 }
3216 3216 }
3217 3217
3218 3218 return (error);
3219 3219 }
3220 3220
3221 3221 /* ARGSUSED */
3222 3222 static void
3223 3223 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3224 3224 struct compound_state *cs)
3225 3225 {
3226 3226 READ4args *args = &argop->nfs_argop4_u.opread;
3227 3227 READ4res *resp = &resop->nfs_resop4_u.opread;
3228 3228 int error;
3229 3229 int verror;
3230 3230 vnode_t *vp;
3231 3231 struct vattr va;
3232 3232 struct iovec iov, *iovp = NULL;
3233 3233 int iovcnt;
3234 3234 struct uio uio;
3235 3235 u_offset_t offset;
3236 3236 bool_t *deleg = &cs->deleg;
3237 3237 nfsstat4 stat;
3238 3238 int in_crit = 0;
3239 3239 mblk_t *mp = NULL;
3240 3240 int alloc_err = 0;
3241 3241 int rdma_used = 0;
3242 3242 int loaned_buffers;
3243 3243 caller_context_t ct;
3244 3244 struct uio *uiop;
3245 3245
3246 3246 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3247 3247 READ4args, args);
3248 3248
3249 3249 vp = cs->vp;
3250 3250 if (vp == NULL) {
3251 3251 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3252 3252 goto out;
3253 3253 }
3254 3254 if (cs->access == CS_ACCESS_DENIED) {
3255 3255 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3256 3256 goto out;
3257 3257 }
3258 3258
3259 3259 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3260 3260 deleg, TRUE, &ct)) != NFS4_OK) {
3261 3261 *cs->statusp = resp->status = stat;
3262 3262 goto out;
3263 3263 }
3264 3264
3265 3265 /*
3266 3266 * Enter the critical region before calling VOP_RWLOCK
3267 3267 * to avoid a deadlock with write requests.
3268 3268 */
3269 3269 if (nbl_need_check(vp)) {
3270 3270 nbl_start_crit(vp, RW_READER);
3271 3271 in_crit = 1;
3272 3272 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3273 3273 &ct)) {
3274 3274 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3275 3275 goto out;
3276 3276 }
3277 3277 }
3278 3278
3279 3279 if (args->wlist) {
3280 3280 if (args->count > clist_len(args->wlist)) {
3281 3281 *cs->statusp = resp->status = NFS4ERR_INVAL;
3282 3282 goto out;
3283 3283 }
3284 3284 rdma_used = 1;
3285 3285 }
3286 3286
3287 3287 /* use loaned buffers for TCP */
3288 3288 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3289 3289
3290 3290 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3291 3291 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3292 3292
3293 3293 /*
3294 3294 * If we can't get the attributes, then we can't do the
3295 3295 * right access checking. So, we'll fail the request.
3296 3296 */
3297 3297 if (verror) {
3298 3298 *cs->statusp = resp->status = puterrno4(verror);
3299 3299 goto out;
3300 3300 }
3301 3301
3302 3302 if (vp->v_type != VREG) {
3303 3303 *cs->statusp = resp->status =
3304 3304 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3305 3305 goto out;
3306 3306 }
3307 3307
3308 3308 if (crgetuid(cs->cr) != va.va_uid &&
3309 3309 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3310 3310 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3311 3311 *cs->statusp = resp->status = puterrno4(error);
3312 3312 goto out;
3313 3313 }
3314 3314
3315 3315 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3316 3316 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3317 3317 goto out;
3318 3318 }
3319 3319
3320 3320 offset = args->offset;
3321 3321 if (offset >= va.va_size) {
3322 3322 *cs->statusp = resp->status = NFS4_OK;
3323 3323 resp->eof = TRUE;
3324 3324 resp->data_len = 0;
3325 3325 resp->data_val = NULL;
3326 3326 resp->mblk = NULL;
3327 3327 /* RDMA */
3328 3328 resp->wlist = args->wlist;
3329 3329 resp->wlist_len = resp->data_len;
3330 3330 *cs->statusp = resp->status = NFS4_OK;
3331 3331 if (resp->wlist)
3332 3332 clist_zero_len(resp->wlist);
3333 3333 goto out;
3334 3334 }
3335 3335
3336 3336 if (args->count == 0) {
3337 3337 *cs->statusp = resp->status = NFS4_OK;
3338 3338 resp->eof = FALSE;
3339 3339 resp->data_len = 0;
3340 3340 resp->data_val = NULL;
3341 3341 resp->mblk = NULL;
3342 3342 /* RDMA */
3343 3343 resp->wlist = args->wlist;
3344 3344 resp->wlist_len = resp->data_len;
3345 3345 if (resp->wlist)
3346 3346 clist_zero_len(resp->wlist);
3347 3347 goto out;
3348 3348 }
3349 3349
3350 3350 /*
3351 3351 * Do not allocate memory more than maximum allowed
3352 3352 * transfer size
3353 3353 */
3354 3354 if (args->count > rfs4_tsize(req))
3355 3355 args->count = rfs4_tsize(req);
3356 3356
3357 3357 if (loaned_buffers) {
3358 3358 uiop = (uio_t *)rfs_setup_xuio(vp);
3359 3359 ASSERT(uiop != NULL);
3360 3360 uiop->uio_segflg = UIO_SYSSPACE;
3361 3361 uiop->uio_loffset = args->offset;
3362 3362 uiop->uio_resid = args->count;
3363 3363
3364 3364 /* Jump to do the read if successful */
3365 3365 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3366 3366 /*
3367 3367 * Need to hold the vnode until after VOP_RETZCBUF()
3368 3368 * is called.
3369 3369 */
3370 3370 VN_HOLD(vp);
3371 3371 goto doio_read;
3372 3372 }
3373 3373
3374 3374 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3375 3375 uiop->uio_loffset, int, uiop->uio_resid);
3376 3376
3377 3377 uiop->uio_extflg = 0;
3378 3378
3379 3379 /* failure to setup for zero copy */
3380 3380 rfs_free_xuio((void *)uiop);
3381 3381 loaned_buffers = 0;
3382 3382 }
3383 3383
3384 3384 /*
3385 3385 * If returning data via RDMA Write, then grab the chunk list. If we
3386 3386 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3387 3387 */
3388 3388 if (rdma_used) {
3389 3389 mp = NULL;
3390 3390 (void) rdma_get_wchunk(req, &iov, args->wlist);
3391 3391 uio.uio_iov = &iov;
3392 3392 uio.uio_iovcnt = 1;
3393 3393 } else {
3394 3394 /*
3395 3395 * mp will contain the data to be sent out in the read reply.
3396 3396 * It will be freed after the reply has been sent.
3397 3397 */
3398 3398 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3399 3399 ASSERT(mp != NULL);
3400 3400 ASSERT(alloc_err == 0);
3401 3401 uio.uio_iov = iovp;
3402 3402 uio.uio_iovcnt = iovcnt;
3403 3403 }
3404 3404
3405 3405 uio.uio_segflg = UIO_SYSSPACE;
3406 3406 uio.uio_extflg = UIO_COPY_CACHED;
3407 3407 uio.uio_loffset = args->offset;
3408 3408 uio.uio_resid = args->count;
3409 3409 uiop = &uio;
3410 3410
3411 3411 doio_read:
3412 3412 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3413 3413
3414 3414 va.va_mask = AT_SIZE;
3415 3415 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3416 3416
3417 3417 if (error) {
3418 3418 if (mp)
3419 3419 freemsg(mp);
3420 3420 *cs->statusp = resp->status = puterrno4(error);
3421 3421 goto out;
3422 3422 }
3423 3423
3424 3424 /* make mblk using zc buffers */
3425 3425 if (loaned_buffers) {
3426 3426 mp = uio_to_mblk(uiop);
3427 3427 ASSERT(mp != NULL);
3428 3428 }
3429 3429
3430 3430 *cs->statusp = resp->status = NFS4_OK;
3431 3431
3432 3432 ASSERT(uiop->uio_resid >= 0);
3433 3433 resp->data_len = args->count - uiop->uio_resid;
3434 3434 if (mp) {
3435 3435 resp->data_val = (char *)mp->b_datap->db_base;
3436 3436 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3437 3437 } else {
3438 3438 resp->data_val = (caddr_t)iov.iov_base;
3439 3439 }
3440 3440
3441 3441 resp->mblk = mp;
3442 3442
3443 3443 if (!verror && offset + resp->data_len == va.va_size)
3444 3444 resp->eof = TRUE;
3445 3445 else
3446 3446 resp->eof = FALSE;
3447 3447
3448 3448 if (rdma_used) {
3449 3449 if (!rdma_setup_read_data4(args, resp)) {
3450 3450 *cs->statusp = resp->status = NFS4ERR_INVAL;
3451 3451 }
3452 3452 } else {
3453 3453 resp->wlist = NULL;
3454 3454 }
3455 3455
3456 3456 out:
3457 3457 if (in_crit)
3458 3458 nbl_end_crit(vp);
3459 3459
3460 3460 if (iovp != NULL)
3461 3461 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3462 3462
3463 3463 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3464 3464 READ4res *, resp);
3465 3465 }
3466 3466
3467 3467 static void
3468 3468 rfs4_op_read_free(nfs_resop4 *resop)
3469 3469 {
3470 3470 READ4res *resp = &resop->nfs_resop4_u.opread;
3471 3471
3472 3472 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3473 3473 freemsg(resp->mblk);
3474 3474 resp->mblk = NULL;
3475 3475 resp->data_val = NULL;
3476 3476 resp->data_len = 0;
3477 3477 }
3478 3478 }
3479 3479
3480 3480 static void
3481 3481 rfs4_op_readdir_free(nfs_resop4 * resop)
3482 3482 {
3483 3483 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3484 3484
3485 3485 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3486 3486 freeb(resp->mblk);
3487 3487 resp->mblk = NULL;
3488 3488 resp->data_len = 0;
3489 3489 }
3490 3490 }
3491 3491
3492 3492
3493 3493 /* ARGSUSED */
3494 3494 static void
3495 3495 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3496 3496 struct compound_state *cs)
3497 3497 {
3498 3498 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3499 3499 int error;
3500 3500 vnode_t *vp;
3501 3501 struct exportinfo *exi, *sav_exi;
3502 3502 nfs_fh4_fmt_t *fh_fmtp;
3503 3503 nfs_export_t *ne = nfs_get_export();
3504 3504
3505 3505 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3506 3506
3507 3507 if (cs->vp) {
3508 3508 VN_RELE(cs->vp);
3509 3509 cs->vp = NULL;
3510 3510 }
3511 3511
3512 3512 if (cs->cr)
3513 3513 crfree(cs->cr);
3514 3514
3515 3515 cs->cr = crdup(cs->basecr);
3516 3516
3517 3517 vp = ne->exi_public->exi_vp;
3518 3518 if (vp == NULL) {
3519 3519 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3520 3520 goto out;
3521 3521 }
3522 3522
3523 3523 error = makefh4(&cs->fh, vp, ne->exi_public);
3524 3524 if (error != 0) {
3525 3525 *cs->statusp = resp->status = puterrno4(error);
3526 3526 goto out;
3527 3527 }
3528 3528 sav_exi = cs->exi;
3529 3529 if (ne->exi_public == ne->exi_root) {
3530 3530 /*
3531 3531 * No filesystem is actually shared public, so we default
3532 3532 * to exi_root. In this case, we must check whether root
3533 3533 * is exported.
3534 3534 */
3535 3535 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3536 3536
3537 3537 /*
3538 3538 * if root filesystem is exported, the exportinfo struct that we
3539 3539 * should use is what checkexport4 returns, because root_exi is
3540 3540 * actually a mostly empty struct.
3541 3541 */
3542 3542 exi = checkexport4(&fh_fmtp->fh4_fsid,
3543 3543 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3544 3544 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3545 3545 } else {
3546 3546 /*
3547 3547 * it's a properly shared filesystem
3548 3548 */
3549 3549 cs->exi = ne->exi_public;
3550 3550 }
3551 3551
3552 3552 if (is_system_labeled()) {
3553 3553 bslabel_t *clabel;
3554 3554
3555 3555 ASSERT(req->rq_label != NULL);
3556 3556 clabel = req->rq_label;
3557 3557 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3558 3558 "got client label from request(1)",
3559 3559 struct svc_req *, req);
3560 3560 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3561 3561 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3562 3562 cs->exi)) {
3563 3563 *cs->statusp = resp->status =
3564 3564 NFS4ERR_SERVERFAULT;
3565 3565 goto out;
3566 3566 }
3567 3567 }
3568 3568 }
3569 3569
3570 3570 VN_HOLD(vp);
3571 3571 cs->vp = vp;
3572 3572
3573 3573 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3574 3574 VN_RELE(cs->vp);
3575 3575 cs->vp = NULL;
3576 3576 cs->exi = sav_exi;
3577 3577 goto out;
3578 3578 }
3579 3579
3580 3580 *cs->statusp = resp->status = NFS4_OK;
3581 3581 out:
3582 3582 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3583 3583 PUTPUBFH4res *, resp);
3584 3584 }
3585 3585
3586 3586 /*
3587 3587 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3588 3588 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3589 3589 * or joe have restrictive search permissions, then we shouldn't let
3590 3590 * the client get a file handle. This is easy to enforce. However, we
3591 3591 * don't know what security flavor should be used until we resolve the
3592 3592 * path name. Another complication is uid mapping. If root is
3593 3593 * the user, then it will be mapped to the anonymous user by default,
3594 3594 * but we won't know that till we've resolved the path name. And we won't
3595 3595 * know what the anonymous user is.
3596 3596 * Luckily, SECINFO is specified to take a full filename.
3597 3597 * So what we will have to in rfs4_op_lookup is check that flavor of
3598 3598 * the target object matches that of the request, and if root was the
3599 3599 * caller, check for the root= and anon= options, and if necessary,
3600 3600 * repeat the lookup using the right cred_t. But that's not done yet.
3601 3601 */
3602 3602 /* ARGSUSED */
3603 3603 static void
3604 3604 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3605 3605 struct compound_state *cs)
3606 3606 {
3607 3607 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3608 3608 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3609 3609 nfs_fh4_fmt_t *fh_fmtp;
3610 3610
3611 3611 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3612 3612 PUTFH4args *, args);
3613 3613
3614 3614 if (cs->vp) {
3615 3615 VN_RELE(cs->vp);
3616 3616 cs->vp = NULL;
3617 3617 }
3618 3618
3619 3619 if (cs->cr) {
3620 3620 crfree(cs->cr);
3621 3621 cs->cr = NULL;
3622 3622 }
3623 3623
3624 3624
3625 3625 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3626 3626 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3627 3627 goto out;
3628 3628 }
3629 3629
3630 3630 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3631 3631 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3632 3632 NULL);
3633 3633
3634 3634 if (cs->exi == NULL) {
3635 3635 *cs->statusp = resp->status = NFS4ERR_STALE;
3636 3636 goto out;
3637 3637 }
3638 3638
3639 3639 cs->cr = crdup(cs->basecr);
3640 3640
3641 3641 ASSERT(cs->cr != NULL);
3642 3642
3643 3643 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3644 3644 *cs->statusp = resp->status;
3645 3645 goto out;
3646 3646 }
3647 3647
3648 3648 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3649 3649 VN_RELE(cs->vp);
3650 3650 cs->vp = NULL;
3651 3651 goto out;
3652 3652 }
3653 3653
3654 3654 nfs_fh4_copy(&args->object, &cs->fh);
3655 3655 *cs->statusp = resp->status = NFS4_OK;
3656 3656 cs->deleg = FALSE;
3657 3657
3658 3658 out:
3659 3659 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3660 3660 PUTFH4res *, resp);
3661 3661 }
3662 3662
3663 3663 /* ARGSUSED */
3664 3664 static void
3665 3665 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3666 3666 struct compound_state *cs)
3667 3667 {
3668 3668 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3669 3669 int error;
3670 3670 fid_t fid;
3671 3671 struct exportinfo *exi, *sav_exi;
3672 3672
3673 3673 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3674 3674
3675 3675 if (cs->vp) {
3676 3676 VN_RELE(cs->vp);
3677 3677 cs->vp = NULL;
3678 3678 }
3679 3679
3680 3680 if (cs->cr)
3681 3681 crfree(cs->cr);
3682 3682
3683 3683 cs->cr = crdup(cs->basecr);
3684 3684
3685 3685 /*
3686 3686 * Using rootdir, the system root vnode,
3687 3687 * get its fid.
3688 3688 */
3689 3689 bzero(&fid, sizeof (fid));
3690 3690 fid.fid_len = MAXFIDSZ;
3691 3691 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3692 3692 if (error != 0) {
3693 3693 *cs->statusp = resp->status = puterrno4(error);
3694 3694 goto out;
3695 3695 }
3696 3696
3697 3697 /*
3698 3698 * Then use the root fsid & fid it to find out if it's exported
3699 3699 *
3700 3700 * If the server root isn't exported directly, then
3701 3701 * it should at least be a pseudo export based on
3702 3702 * one or more exports further down in the server's
3703 3703 * file tree.
3704 3704 */
3705 3705 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3706 3706 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3707 3707 NFS4_DEBUG(rfs4_debug,
3708 3708 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3709 3709 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3710 3710 goto out;
3711 3711 }
3712 3712
3713 3713 /*
3714 3714 * Now make a filehandle based on the root
3715 3715 * export and root vnode.
3716 3716 */
3717 3717 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3718 3718 if (error != 0) {
3719 3719 *cs->statusp = resp->status = puterrno4(error);
3720 3720 goto out;
3721 3721 }
3722 3722
3723 3723 sav_exi = cs->exi;
3724 3724 cs->exi = exi;
3725 3725
3726 3726 VN_HOLD(ZONE_ROOTVP());
3727 3727 cs->vp = ZONE_ROOTVP();
3728 3728
3729 3729 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3730 3730 VN_RELE(cs->vp);
3731 3731 cs->vp = NULL;
3732 3732 cs->exi = sav_exi;
3733 3733 goto out;
3734 3734 }
3735 3735
3736 3736 *cs->statusp = resp->status = NFS4_OK;
3737 3737 cs->deleg = FALSE;
3738 3738 out:
3739 3739 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3740 3740 PUTROOTFH4res *, resp);
3741 3741 }
3742 3742
3743 3743 /*
3744 3744 * readlink: args: CURRENT_FH.
3745 3745 * res: status. If success - CURRENT_FH unchanged, return linktext.
3746 3746 */
3747 3747
3748 3748 /* ARGSUSED */
3749 3749 static void
3750 3750 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3751 3751 struct compound_state *cs)
3752 3752 {
3753 3753 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3754 3754 int error;
3755 3755 vnode_t *vp;
3756 3756 struct iovec iov;
3757 3757 struct vattr va;
3758 3758 struct uio uio;
3759 3759 char *data;
3760 3760 struct sockaddr *ca;
3761 3761 char *name = NULL;
3762 3762 int is_referral;
3763 3763
3764 3764 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3765 3765
3766 3766 /* CURRENT_FH: directory */
3767 3767 vp = cs->vp;
3768 3768 if (vp == NULL) {
3769 3769 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3770 3770 goto out;
3771 3771 }
3772 3772
3773 3773 if (cs->access == CS_ACCESS_DENIED) {
3774 3774 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3775 3775 goto out;
3776 3776 }
3777 3777
3778 3778 /* Is it a referral? */
3779 3779 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3780 3780
3781 3781 is_referral = 1;
3782 3782
3783 3783 } else {
3784 3784
3785 3785 is_referral = 0;
3786 3786
3787 3787 if (vp->v_type == VDIR) {
3788 3788 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3789 3789 goto out;
3790 3790 }
3791 3791
3792 3792 if (vp->v_type != VLNK) {
3793 3793 *cs->statusp = resp->status = NFS4ERR_INVAL;
3794 3794 goto out;
3795 3795 }
3796 3796
3797 3797 }
3798 3798
3799 3799 va.va_mask = AT_MODE;
3800 3800 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3801 3801 if (error) {
3802 3802 *cs->statusp = resp->status = puterrno4(error);
3803 3803 goto out;
3804 3804 }
3805 3805
|
↓ open down ↓ |
3805 lines elided |
↑ open up ↑ |
3806 3806 if (MANDLOCK(vp, va.va_mode)) {
3807 3807 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3808 3808 goto out;
3809 3809 }
3810 3810
3811 3811 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3812 3812
3813 3813 if (is_referral) {
3814 3814 char *s;
3815 3815 size_t strsz;
3816 + kstat_named_t *stat =
3817 + cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3816 3818
3817 3819 /* Get an artificial symlink based on a referral */
3818 3820 s = build_symlink(vp, cs->cr, &strsz);
3819 - global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3821 + stat[NFS_REFERLINKS].value.ui64++;
3820 3822 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3821 3823 vnode_t *, vp, char *, s);
3822 3824 if (s == NULL)
3823 3825 error = EINVAL;
3824 3826 else {
3825 3827 error = 0;
3826 3828 (void) strlcpy(data, s, MAXPATHLEN + 1);
3827 3829 kmem_free(s, strsz);
3828 3830 }
3829 3831
3830 3832 } else {
3831 3833
3832 3834 iov.iov_base = data;
3833 3835 iov.iov_len = MAXPATHLEN;
3834 3836 uio.uio_iov = &iov;
3835 3837 uio.uio_iovcnt = 1;
3836 3838 uio.uio_segflg = UIO_SYSSPACE;
3837 3839 uio.uio_extflg = UIO_COPY_CACHED;
3838 3840 uio.uio_loffset = 0;
3839 3841 uio.uio_resid = MAXPATHLEN;
3840 3842
3841 3843 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3842 3844
3843 3845 if (!error)
3844 3846 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3845 3847 }
3846 3848
3847 3849 if (error) {
3848 3850 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3849 3851 *cs->statusp = resp->status = puterrno4(error);
3850 3852 goto out;
3851 3853 }
3852 3854
3853 3855 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3854 3856 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3855 3857 MAXPATHLEN + 1);
3856 3858
3857 3859 if (name == NULL) {
3858 3860 /*
3859 3861 * Even though the conversion failed, we return
3860 3862 * something. We just don't translate it.
3861 3863 */
3862 3864 name = data;
3863 3865 }
3864 3866
3865 3867 /*
3866 3868 * treat link name as data
3867 3869 */
3868 3870 (void) str_to_utf8(name, (utf8string *)&resp->link);
3869 3871
3870 3872 if (name != data)
3871 3873 kmem_free(name, MAXPATHLEN + 1);
3872 3874 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3873 3875 *cs->statusp = resp->status = NFS4_OK;
3874 3876
3875 3877 out:
3876 3878 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3877 3879 READLINK4res *, resp);
3878 3880 }
3879 3881
3880 3882 static void
3881 3883 rfs4_op_readlink_free(nfs_resop4 *resop)
3882 3884 {
3883 3885 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3884 3886 utf8string *symlink = (utf8string *)&resp->link;
3885 3887
3886 3888 if (symlink->utf8string_val) {
3887 3889 UTF8STRING_FREE(*symlink)
3888 3890 }
3889 3891 }
3890 3892
3891 3893 /*
3892 3894 * release_lockowner:
3893 3895 * Release any state associated with the supplied
3894 3896 * lockowner. Note if any lo_state is holding locks we will not
3895 3897 * rele that lo_state and thus the lockowner will not be destroyed.
3896 3898 * A client using lock after the lock owner stateid has been released
3897 3899 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3898 3900 * to reissue the lock with new_lock_owner set to TRUE.
3899 3901 * args: lock_owner
3900 3902 * res: status
3901 3903 */
3902 3904 /* ARGSUSED */
3903 3905 static void
3904 3906 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3905 3907 struct svc_req *req, struct compound_state *cs)
3906 3908 {
3907 3909 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3908 3910 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3909 3911 rfs4_lockowner_t *lo;
3910 3912 rfs4_openowner_t *oo;
3911 3913 rfs4_state_t *sp;
3912 3914 rfs4_lo_state_t *lsp;
3913 3915 rfs4_client_t *cp;
3914 3916 bool_t create = FALSE;
3915 3917 locklist_t *llist;
3916 3918 sysid_t sysid;
3917 3919
3918 3920 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3919 3921 cs, RELEASE_LOCKOWNER4args *, ap);
3920 3922
3921 3923 /* Make sure there is a clientid around for this request */
3922 3924 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3923 3925
3924 3926 if (cp == NULL) {
3925 3927 *cs->statusp = resp->status =
3926 3928 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3927 3929 goto out;
3928 3930 }
3929 3931 rfs4_client_rele(cp);
3930 3932
3931 3933 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3932 3934 if (lo == NULL) {
3933 3935 *cs->statusp = resp->status = NFS4_OK;
3934 3936 goto out;
3935 3937 }
3936 3938 ASSERT(lo->rl_client != NULL);
3937 3939
3938 3940 /*
3939 3941 * Check for EXPIRED client. If so will reap state with in a lease
3940 3942 * period or on next set_clientid_confirm step
3941 3943 */
3942 3944 if (rfs4_lease_expired(lo->rl_client)) {
3943 3945 rfs4_lockowner_rele(lo);
3944 3946 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3945 3947 goto out;
3946 3948 }
3947 3949
3948 3950 /*
3949 3951 * If no sysid has been assigned, then no locks exist; just return.
3950 3952 */
3951 3953 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3952 3954 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3953 3955 rfs4_lockowner_rele(lo);
3954 3956 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3955 3957 goto out;
3956 3958 }
3957 3959
3958 3960 sysid = lo->rl_client->rc_sysidt;
3959 3961 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3960 3962
3961 3963 /*
3962 3964 * Mark the lockowner invalid.
3963 3965 */
3964 3966 rfs4_dbe_hide(lo->rl_dbe);
3965 3967
3966 3968 /*
3967 3969 * sysid-pid pair should now not be used since the lockowner is
3968 3970 * invalid. If the client were to instantiate the lockowner again
3969 3971 * it would be assigned a new pid. Thus we can get the list of
3970 3972 * current locks.
3971 3973 */
3972 3974
3973 3975 llist = flk_get_active_locks(sysid, lo->rl_pid);
3974 3976 /* If we are still holding locks fail */
3975 3977 if (llist != NULL) {
3976 3978
3977 3979 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3978 3980
3979 3981 flk_free_locklist(llist);
3980 3982 /*
3981 3983 * We need to unhide the lockowner so the client can
3982 3984 * try it again. The bad thing here is if the client
3983 3985 * has a logic error that took it here in the first place
3984 3986 * they probably have lost accounting of the locks that it
3985 3987 * is holding. So we may have dangling state until the
3986 3988 * open owner state is reaped via close. One scenario
3987 3989 * that could possibly occur is that the client has
3988 3990 * sent the unlock request(s) in separate threads
3989 3991 * and has not waited for the replies before sending the
3990 3992 * RELEASE_LOCKOWNER request. Presumably, it would expect
3991 3993 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3992 3994 * reissuing the request.
3993 3995 */
3994 3996 rfs4_dbe_unhide(lo->rl_dbe);
3995 3997 rfs4_lockowner_rele(lo);
3996 3998 goto out;
3997 3999 }
3998 4000
3999 4001 /*
4000 4002 * For the corresponding client we need to check each open
4001 4003 * owner for any opens that have lockowner state associated
4002 4004 * with this lockowner.
4003 4005 */
4004 4006
4005 4007 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4006 4008 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4007 4009 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4008 4010
4009 4011 rfs4_dbe_lock(oo->ro_dbe);
4010 4012 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4011 4013 sp = list_next(&oo->ro_statelist, sp)) {
4012 4014
4013 4015 rfs4_dbe_lock(sp->rs_dbe);
4014 4016 for (lsp = list_head(&sp->rs_lostatelist);
4015 4017 lsp != NULL;
4016 4018 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4017 4019 if (lsp->rls_locker == lo) {
4018 4020 rfs4_dbe_lock(lsp->rls_dbe);
4019 4021 rfs4_dbe_invalidate(lsp->rls_dbe);
4020 4022 rfs4_dbe_unlock(lsp->rls_dbe);
4021 4023 }
4022 4024 }
4023 4025 rfs4_dbe_unlock(sp->rs_dbe);
4024 4026 }
4025 4027 rfs4_dbe_unlock(oo->ro_dbe);
4026 4028 }
4027 4029 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4028 4030
4029 4031 rfs4_lockowner_rele(lo);
4030 4032
4031 4033 *cs->statusp = resp->status = NFS4_OK;
4032 4034
4033 4035 out:
4034 4036 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4035 4037 cs, RELEASE_LOCKOWNER4res *, resp);
4036 4038 }
4037 4039
4038 4040 /*
4039 4041 * short utility function to lookup a file and recall the delegation
4040 4042 */
4041 4043 static rfs4_file_t *
4042 4044 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4043 4045 int *lkup_error, cred_t *cr)
4044 4046 {
4045 4047 vnode_t *vp;
4046 4048 rfs4_file_t *fp = NULL;
4047 4049 bool_t fcreate = FALSE;
4048 4050 int error;
4049 4051
4050 4052 if (vpp)
4051 4053 *vpp = NULL;
4052 4054
4053 4055 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4054 4056 NULL)) == 0) {
4055 4057 if (vp->v_type == VREG)
4056 4058 fp = rfs4_findfile(vp, NULL, &fcreate);
4057 4059 if (vpp)
4058 4060 *vpp = vp;
4059 4061 else
4060 4062 VN_RELE(vp);
4061 4063 }
4062 4064
4063 4065 if (lkup_error)
4064 4066 *lkup_error = error;
4065 4067
4066 4068 return (fp);
4067 4069 }
4068 4070
4069 4071 /*
4070 4072 * remove: args: CURRENT_FH: directory; name.
4071 4073 * res: status. If success - CURRENT_FH unchanged, return change_info
4072 4074 * for directory.
4073 4075 */
4074 4076 /* ARGSUSED */
4075 4077 static void
4076 4078 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4077 4079 struct compound_state *cs)
4078 4080 {
4079 4081 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4080 4082 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4081 4083 int error;
4082 4084 vnode_t *dvp, *vp;
4083 4085 struct vattr bdva, idva, adva;
4084 4086 char *nm;
4085 4087 uint_t len;
4086 4088 rfs4_file_t *fp;
4087 4089 int in_crit = 0;
4088 4090 bslabel_t *clabel;
4089 4091 struct sockaddr *ca;
4090 4092 char *name = NULL;
4091 4093 nfsstat4 status;
4092 4094
4093 4095 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4094 4096 REMOVE4args *, args);
4095 4097
4096 4098 /* CURRENT_FH: directory */
4097 4099 dvp = cs->vp;
4098 4100 if (dvp == NULL) {
4099 4101 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4100 4102 goto out;
4101 4103 }
4102 4104
4103 4105 if (cs->access == CS_ACCESS_DENIED) {
4104 4106 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4105 4107 goto out;
4106 4108 }
4107 4109
4108 4110 /*
4109 4111 * If there is an unshared filesystem mounted on this vnode,
4110 4112 * Do not allow to remove anything in this directory.
4111 4113 */
4112 4114 if (vn_ismntpt(dvp)) {
4113 4115 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4114 4116 goto out;
4115 4117 }
4116 4118
4117 4119 if (dvp->v_type != VDIR) {
4118 4120 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4119 4121 goto out;
4120 4122 }
4121 4123
4122 4124 status = utf8_dir_verify(&args->target);
4123 4125 if (status != NFS4_OK) {
4124 4126 *cs->statusp = resp->status = status;
4125 4127 goto out;
4126 4128 }
4127 4129
4128 4130 /*
4129 4131 * Lookup the file so that we can check if it's a directory
4130 4132 */
4131 4133 nm = utf8_to_fn(&args->target, &len, NULL);
4132 4134 if (nm == NULL) {
4133 4135 *cs->statusp = resp->status = NFS4ERR_INVAL;
4134 4136 goto out;
4135 4137 }
4136 4138
4137 4139 if (len > MAXNAMELEN) {
4138 4140 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4139 4141 kmem_free(nm, len);
4140 4142 goto out;
4141 4143 }
4142 4144
4143 4145 if (rdonly4(req, cs)) {
4144 4146 *cs->statusp = resp->status = NFS4ERR_ROFS;
4145 4147 kmem_free(nm, len);
4146 4148 goto out;
4147 4149 }
4148 4150
4149 4151 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4150 4152 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4151 4153 MAXPATHLEN + 1);
4152 4154
4153 4155 if (name == NULL) {
4154 4156 *cs->statusp = resp->status = NFS4ERR_INVAL;
4155 4157 kmem_free(nm, len);
4156 4158 goto out;
4157 4159 }
4158 4160
4159 4161 /*
4160 4162 * Lookup the file to determine type and while we are see if
4161 4163 * there is a file struct around and check for delegation.
4162 4164 * We don't need to acquire va_seq before this lookup, if
4163 4165 * it causes an update, cinfo.before will not match, which will
4164 4166 * trigger a cache flush even if atomic is TRUE.
4165 4167 */
4166 4168 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4167 4169 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4168 4170 NULL)) {
4169 4171 VN_RELE(vp);
4170 4172 rfs4_file_rele(fp);
4171 4173 *cs->statusp = resp->status = NFS4ERR_DELAY;
4172 4174 if (nm != name)
4173 4175 kmem_free(name, MAXPATHLEN + 1);
4174 4176 kmem_free(nm, len);
4175 4177 goto out;
4176 4178 }
4177 4179 }
4178 4180
4179 4181 /* Didn't find anything to remove */
4180 4182 if (vp == NULL) {
4181 4183 *cs->statusp = resp->status = error;
4182 4184 if (nm != name)
4183 4185 kmem_free(name, MAXPATHLEN + 1);
4184 4186 kmem_free(nm, len);
4185 4187 goto out;
4186 4188 }
4187 4189
4188 4190 if (nbl_need_check(vp)) {
4189 4191 nbl_start_crit(vp, RW_READER);
4190 4192 in_crit = 1;
4191 4193 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4192 4194 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4193 4195 if (nm != name)
4194 4196 kmem_free(name, MAXPATHLEN + 1);
4195 4197 kmem_free(nm, len);
4196 4198 nbl_end_crit(vp);
4197 4199 VN_RELE(vp);
4198 4200 if (fp) {
4199 4201 rfs4_clear_dont_grant(fp);
4200 4202 rfs4_file_rele(fp);
4201 4203 }
4202 4204 goto out;
4203 4205 }
4204 4206 }
4205 4207
4206 4208 /* check label before allowing removal */
4207 4209 if (is_system_labeled()) {
4208 4210 ASSERT(req->rq_label != NULL);
4209 4211 clabel = req->rq_label;
4210 4212 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4211 4213 "got client label from request(1)",
4212 4214 struct svc_req *, req);
4213 4215 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4214 4216 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4215 4217 cs->exi)) {
4216 4218 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4217 4219 if (name != nm)
4218 4220 kmem_free(name, MAXPATHLEN + 1);
4219 4221 kmem_free(nm, len);
4220 4222 if (in_crit)
4221 4223 nbl_end_crit(vp);
4222 4224 VN_RELE(vp);
4223 4225 if (fp) {
4224 4226 rfs4_clear_dont_grant(fp);
4225 4227 rfs4_file_rele(fp);
4226 4228 }
4227 4229 goto out;
4228 4230 }
4229 4231 }
4230 4232 }
4231 4233
4232 4234 /* Get dir "before" change value */
4233 4235 bdva.va_mask = AT_CTIME|AT_SEQ;
4234 4236 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4235 4237 if (error) {
4236 4238 *cs->statusp = resp->status = puterrno4(error);
4237 4239 if (nm != name)
4238 4240 kmem_free(name, MAXPATHLEN + 1);
4239 4241 kmem_free(nm, len);
4240 4242 if (in_crit)
4241 4243 nbl_end_crit(vp);
4242 4244 VN_RELE(vp);
4243 4245 if (fp) {
4244 4246 rfs4_clear_dont_grant(fp);
4245 4247 rfs4_file_rele(fp);
4246 4248 }
4247 4249 goto out;
4248 4250 }
4249 4251 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4250 4252
4251 4253 /* Actually do the REMOVE operation */
4252 4254 if (vp->v_type == VDIR) {
4253 4255 /*
4254 4256 * Can't remove a directory that has a mounted-on filesystem.
4255 4257 */
4256 4258 if (vn_ismntpt(vp)) {
4257 4259 error = EACCES;
4258 4260 } else {
4259 4261 /*
4260 4262 * System V defines rmdir to return EEXIST,
4261 4263 * not ENOTEMPTY, if the directory is not
4262 4264 * empty. A System V NFS server needs to map
4263 4265 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4264 4266 * transmit over the wire.
4265 4267 */
4266 4268 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4267 4269 NULL, 0)) == EEXIST)
4268 4270 error = ENOTEMPTY;
4269 4271 }
4270 4272 } else {
4271 4273 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4272 4274 fp != NULL) {
4273 4275 struct vattr va;
4274 4276 vnode_t *tvp;
4275 4277
4276 4278 rfs4_dbe_lock(fp->rf_dbe);
4277 4279 tvp = fp->rf_vp;
4278 4280 if (tvp)
4279 4281 VN_HOLD(tvp);
4280 4282 rfs4_dbe_unlock(fp->rf_dbe);
4281 4283
4282 4284 if (tvp) {
4283 4285 /*
4284 4286 * This is va_seq safe because we are not
4285 4287 * manipulating dvp.
4286 4288 */
4287 4289 va.va_mask = AT_NLINK;
4288 4290 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4289 4291 va.va_nlink == 0) {
4290 4292 /* Remove state on file remove */
4291 4293 if (in_crit) {
4292 4294 nbl_end_crit(vp);
4293 4295 in_crit = 0;
4294 4296 }
4295 4297 rfs4_close_all_state(fp);
4296 4298 }
4297 4299 VN_RELE(tvp);
4298 4300 }
4299 4301 }
4300 4302 }
4301 4303
4302 4304 if (in_crit)
4303 4305 nbl_end_crit(vp);
4304 4306 VN_RELE(vp);
4305 4307
4306 4308 if (fp) {
4307 4309 rfs4_clear_dont_grant(fp);
4308 4310 rfs4_file_rele(fp);
4309 4311 }
4310 4312 if (nm != name)
4311 4313 kmem_free(name, MAXPATHLEN + 1);
4312 4314 kmem_free(nm, len);
4313 4315
4314 4316 if (error) {
4315 4317 *cs->statusp = resp->status = puterrno4(error);
4316 4318 goto out;
4317 4319 }
4318 4320
4319 4321 /*
4320 4322 * Get the initial "after" sequence number, if it fails, set to zero
4321 4323 */
4322 4324 idva.va_mask = AT_SEQ;
4323 4325 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4324 4326 idva.va_seq = 0;
4325 4327
4326 4328 /*
4327 4329 * Force modified data and metadata out to stable storage.
4328 4330 */
4329 4331 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4330 4332
4331 4333 /*
4332 4334 * Get "after" change value, if it fails, simply return the
4333 4335 * before value.
4334 4336 */
4335 4337 adva.va_mask = AT_CTIME|AT_SEQ;
4336 4338 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4337 4339 adva.va_ctime = bdva.va_ctime;
4338 4340 adva.va_seq = 0;
4339 4341 }
4340 4342
4341 4343 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4342 4344
4343 4345 /*
4344 4346 * The cinfo.atomic = TRUE only if we have
4345 4347 * non-zero va_seq's, and it has incremented by exactly one
4346 4348 * during the VOP_REMOVE/RMDIR and it didn't change during
4347 4349 * the VOP_FSYNC.
4348 4350 */
4349 4351 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4350 4352 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4351 4353 resp->cinfo.atomic = TRUE;
4352 4354 else
4353 4355 resp->cinfo.atomic = FALSE;
4354 4356
4355 4357 *cs->statusp = resp->status = NFS4_OK;
4356 4358
4357 4359 out:
4358 4360 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4359 4361 REMOVE4res *, resp);
4360 4362 }
4361 4363
4362 4364 /*
4363 4365 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4364 4366 * oldname and newname.
4365 4367 * res: status. If success - CURRENT_FH unchanged, return change_info
4366 4368 * for both from and target directories.
4367 4369 */
4368 4370 /* ARGSUSED */
4369 4371 static void
4370 4372 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4371 4373 struct compound_state *cs)
4372 4374 {
4373 4375 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4374 4376 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4375 4377 int error;
4376 4378 vnode_t *odvp;
4377 4379 vnode_t *ndvp;
4378 4380 vnode_t *srcvp, *targvp, *tvp;
4379 4381 struct vattr obdva, oidva, oadva;
4380 4382 struct vattr nbdva, nidva, nadva;
4381 4383 char *onm, *nnm;
4382 4384 uint_t olen, nlen;
4383 4385 rfs4_file_t *fp, *sfp;
4384 4386 int in_crit_src, in_crit_targ;
4385 4387 int fp_rele_grant_hold, sfp_rele_grant_hold;
4386 4388 int unlinked;
4387 4389 bslabel_t *clabel;
4388 4390 struct sockaddr *ca;
4389 4391 char *converted_onm = NULL;
4390 4392 char *converted_nnm = NULL;
4391 4393 nfsstat4 status;
4392 4394
4393 4395 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4394 4396 RENAME4args *, args);
4395 4397
4396 4398 fp = sfp = NULL;
4397 4399 srcvp = targvp = tvp = NULL;
4398 4400 in_crit_src = in_crit_targ = 0;
4399 4401 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4400 4402 unlinked = 0;
4401 4403
4402 4404 /* CURRENT_FH: target directory */
4403 4405 ndvp = cs->vp;
4404 4406 if (ndvp == NULL) {
4405 4407 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4406 4408 goto out;
4407 4409 }
4408 4410
4409 4411 /* SAVED_FH: from directory */
4410 4412 odvp = cs->saved_vp;
4411 4413 if (odvp == NULL) {
4412 4414 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4413 4415 goto out;
4414 4416 }
4415 4417
4416 4418 if (cs->access == CS_ACCESS_DENIED) {
4417 4419 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4418 4420 goto out;
4419 4421 }
4420 4422
4421 4423 /*
4422 4424 * If there is an unshared filesystem mounted on this vnode,
4423 4425 * do not allow to rename objects in this directory.
4424 4426 */
4425 4427 if (vn_ismntpt(odvp)) {
4426 4428 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4427 4429 goto out;
4428 4430 }
4429 4431
4430 4432 /*
4431 4433 * If there is an unshared filesystem mounted on this vnode,
4432 4434 * do not allow to rename to this directory.
4433 4435 */
4434 4436 if (vn_ismntpt(ndvp)) {
4435 4437 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4436 4438 goto out;
4437 4439 }
4438 4440
4439 4441 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4440 4442 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4441 4443 goto out;
4442 4444 }
4443 4445
4444 4446 if (cs->saved_exi != cs->exi) {
4445 4447 *cs->statusp = resp->status = NFS4ERR_XDEV;
4446 4448 goto out;
4447 4449 }
4448 4450
4449 4451 status = utf8_dir_verify(&args->oldname);
4450 4452 if (status != NFS4_OK) {
4451 4453 *cs->statusp = resp->status = status;
4452 4454 goto out;
4453 4455 }
4454 4456
4455 4457 status = utf8_dir_verify(&args->newname);
4456 4458 if (status != NFS4_OK) {
4457 4459 *cs->statusp = resp->status = status;
4458 4460 goto out;
4459 4461 }
4460 4462
4461 4463 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4462 4464 if (onm == NULL) {
4463 4465 *cs->statusp = resp->status = NFS4ERR_INVAL;
4464 4466 goto out;
4465 4467 }
4466 4468 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4467 4469 nlen = MAXPATHLEN + 1;
4468 4470 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4469 4471 nlen);
4470 4472
4471 4473 if (converted_onm == NULL) {
4472 4474 *cs->statusp = resp->status = NFS4ERR_INVAL;
4473 4475 kmem_free(onm, olen);
4474 4476 goto out;
4475 4477 }
4476 4478
4477 4479 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4478 4480 if (nnm == NULL) {
4479 4481 *cs->statusp = resp->status = NFS4ERR_INVAL;
4480 4482 if (onm != converted_onm)
4481 4483 kmem_free(converted_onm, MAXPATHLEN + 1);
4482 4484 kmem_free(onm, olen);
4483 4485 goto out;
4484 4486 }
4485 4487 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4486 4488 MAXPATHLEN + 1);
4487 4489
4488 4490 if (converted_nnm == NULL) {
4489 4491 *cs->statusp = resp->status = NFS4ERR_INVAL;
4490 4492 kmem_free(nnm, nlen);
4491 4493 nnm = NULL;
4492 4494 if (onm != converted_onm)
4493 4495 kmem_free(converted_onm, MAXPATHLEN + 1);
4494 4496 kmem_free(onm, olen);
4495 4497 goto out;
4496 4498 }
4497 4499
4498 4500
4499 4501 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4500 4502 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4501 4503 kmem_free(onm, olen);
4502 4504 kmem_free(nnm, nlen);
4503 4505 goto out;
4504 4506 }
4505 4507
4506 4508
4507 4509 if (rdonly4(req, cs)) {
4508 4510 *cs->statusp = resp->status = NFS4ERR_ROFS;
4509 4511 if (onm != converted_onm)
4510 4512 kmem_free(converted_onm, MAXPATHLEN + 1);
4511 4513 kmem_free(onm, olen);
4512 4514 if (nnm != converted_nnm)
4513 4515 kmem_free(converted_nnm, MAXPATHLEN + 1);
4514 4516 kmem_free(nnm, nlen);
4515 4517 goto out;
4516 4518 }
4517 4519
4518 4520 /* check label of the target dir */
4519 4521 if (is_system_labeled()) {
4520 4522 ASSERT(req->rq_label != NULL);
4521 4523 clabel = req->rq_label;
4522 4524 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4523 4525 "got client label from request(1)",
4524 4526 struct svc_req *, req);
4525 4527 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4526 4528 if (!do_rfs_label_check(clabel, ndvp,
4527 4529 EQUALITY_CHECK, cs->exi)) {
4528 4530 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4529 4531 goto err_out;
4530 4532 }
4531 4533 }
4532 4534 }
4533 4535
4534 4536 /*
4535 4537 * Is the source a file and have a delegation?
4536 4538 * We don't need to acquire va_seq before these lookups, if
4537 4539 * it causes an update, cinfo.before will not match, which will
4538 4540 * trigger a cache flush even if atomic is TRUE.
4539 4541 */
4540 4542 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4541 4543 &error, cs->cr)) {
4542 4544 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4543 4545 NULL)) {
4544 4546 *cs->statusp = resp->status = NFS4ERR_DELAY;
4545 4547 goto err_out;
4546 4548 }
4547 4549 }
4548 4550
4549 4551 if (srcvp == NULL) {
4550 4552 *cs->statusp = resp->status = puterrno4(error);
4551 4553 if (onm != converted_onm)
4552 4554 kmem_free(converted_onm, MAXPATHLEN + 1);
4553 4555 kmem_free(onm, olen);
4554 4556 if (nnm != converted_nnm)
4555 4557 kmem_free(converted_nnm, MAXPATHLEN + 1);
4556 4558 kmem_free(nnm, nlen);
4557 4559 goto out;
4558 4560 }
4559 4561
4560 4562 sfp_rele_grant_hold = 1;
4561 4563
4562 4564 /* Does the destination exist and a file and have a delegation? */
4563 4565 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4564 4566 NULL, cs->cr)) {
4565 4567 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4566 4568 NULL)) {
4567 4569 *cs->statusp = resp->status = NFS4ERR_DELAY;
4568 4570 goto err_out;
4569 4571 }
4570 4572 }
4571 4573 fp_rele_grant_hold = 1;
4572 4574
4573 4575 /* Check for NBMAND lock on both source and target */
4574 4576 if (nbl_need_check(srcvp)) {
4575 4577 nbl_start_crit(srcvp, RW_READER);
4576 4578 in_crit_src = 1;
4577 4579 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4578 4580 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4579 4581 goto err_out;
4580 4582 }
4581 4583 }
4582 4584
4583 4585 if (targvp && nbl_need_check(targvp)) {
4584 4586 nbl_start_crit(targvp, RW_READER);
4585 4587 in_crit_targ = 1;
4586 4588 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4587 4589 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4588 4590 goto err_out;
4589 4591 }
4590 4592 }
4591 4593
4592 4594 /* Get source "before" change value */
4593 4595 obdva.va_mask = AT_CTIME|AT_SEQ;
4594 4596 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4595 4597 if (!error) {
4596 4598 nbdva.va_mask = AT_CTIME|AT_SEQ;
4597 4599 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4598 4600 }
4599 4601 if (error) {
4600 4602 *cs->statusp = resp->status = puterrno4(error);
4601 4603 goto err_out;
4602 4604 }
4603 4605
4604 4606 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4605 4607 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4606 4608
4607 4609 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4608 4610 NULL, 0);
4609 4611
4610 4612 /*
4611 4613 * If target existed and was unlinked by VOP_RENAME, state will need
4612 4614 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4613 4615 * any necessary nbl_end_crit on srcvp and tgtvp.
4614 4616 */
4615 4617 if (error == 0 && fp != NULL) {
4616 4618 rfs4_dbe_lock(fp->rf_dbe);
4617 4619 tvp = fp->rf_vp;
4618 4620 if (tvp)
4619 4621 VN_HOLD(tvp);
4620 4622 rfs4_dbe_unlock(fp->rf_dbe);
4621 4623
4622 4624 if (tvp) {
4623 4625 struct vattr va;
4624 4626 va.va_mask = AT_NLINK;
4625 4627
4626 4628 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4627 4629 va.va_nlink == 0) {
4628 4630 unlinked = 1;
4629 4631
4630 4632 /* DEBUG data */
4631 4633 if ((srcvp == targvp) || (tvp != targvp)) {
4632 4634 cmn_err(CE_WARN, "rfs4_op_rename: "
4633 4635 "srcvp %p, targvp: %p, tvp: %p",
4634 4636 (void *)srcvp, (void *)targvp,
4635 4637 (void *)tvp);
4636 4638 }
4637 4639 } else {
4638 4640 VN_RELE(tvp);
4639 4641 }
4640 4642 }
4641 4643 }
4642 4644 if (error == 0)
4643 4645 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4644 4646
4645 4647 if (in_crit_src)
4646 4648 nbl_end_crit(srcvp);
4647 4649 if (srcvp)
4648 4650 VN_RELE(srcvp);
4649 4651 if (in_crit_targ)
4650 4652 nbl_end_crit(targvp);
4651 4653 if (targvp)
4652 4654 VN_RELE(targvp);
4653 4655
4654 4656 if (unlinked) {
4655 4657 ASSERT(fp != NULL);
4656 4658 ASSERT(tvp != NULL);
4657 4659
4658 4660 /* DEBUG data */
4659 4661 if (RW_READ_HELD(&tvp->v_nbllock)) {
4660 4662 cmn_err(CE_WARN, "rfs4_op_rename: "
4661 4663 "RW_READ_HELD(%p)", (void *)tvp);
4662 4664 }
4663 4665
4664 4666 /* The file is gone and so should the state */
4665 4667 rfs4_close_all_state(fp);
4666 4668 VN_RELE(tvp);
4667 4669 }
4668 4670
4669 4671 if (sfp) {
4670 4672 rfs4_clear_dont_grant(sfp);
4671 4673 rfs4_file_rele(sfp);
4672 4674 }
4673 4675 if (fp) {
4674 4676 rfs4_clear_dont_grant(fp);
4675 4677 rfs4_file_rele(fp);
4676 4678 }
4677 4679
4678 4680 if (converted_onm != onm)
4679 4681 kmem_free(converted_onm, MAXPATHLEN + 1);
4680 4682 kmem_free(onm, olen);
4681 4683 if (converted_nnm != nnm)
4682 4684 kmem_free(converted_nnm, MAXPATHLEN + 1);
4683 4685 kmem_free(nnm, nlen);
4684 4686
4685 4687 /*
4686 4688 * Get the initial "after" sequence number, if it fails, set to zero
4687 4689 */
4688 4690 oidva.va_mask = AT_SEQ;
4689 4691 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4690 4692 oidva.va_seq = 0;
4691 4693
4692 4694 nidva.va_mask = AT_SEQ;
4693 4695 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4694 4696 nidva.va_seq = 0;
4695 4697
4696 4698 /*
4697 4699 * Force modified data and metadata out to stable storage.
4698 4700 */
4699 4701 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4700 4702 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4701 4703
4702 4704 if (error) {
4703 4705 *cs->statusp = resp->status = puterrno4(error);
4704 4706 goto out;
4705 4707 }
4706 4708
4707 4709 /*
4708 4710 * Get "after" change values, if it fails, simply return the
4709 4711 * before value.
4710 4712 */
4711 4713 oadva.va_mask = AT_CTIME|AT_SEQ;
4712 4714 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4713 4715 oadva.va_ctime = obdva.va_ctime;
4714 4716 oadva.va_seq = 0;
4715 4717 }
4716 4718
4717 4719 nadva.va_mask = AT_CTIME|AT_SEQ;
4718 4720 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4719 4721 nadva.va_ctime = nbdva.va_ctime;
4720 4722 nadva.va_seq = 0;
4721 4723 }
4722 4724
4723 4725 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4724 4726 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4725 4727
4726 4728 /*
4727 4729 * The cinfo.atomic = TRUE only if we have
4728 4730 * non-zero va_seq's, and it has incremented by exactly one
4729 4731 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4730 4732 */
4731 4733 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4732 4734 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4733 4735 resp->source_cinfo.atomic = TRUE;
4734 4736 else
4735 4737 resp->source_cinfo.atomic = FALSE;
4736 4738
4737 4739 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4738 4740 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4739 4741 resp->target_cinfo.atomic = TRUE;
4740 4742 else
4741 4743 resp->target_cinfo.atomic = FALSE;
4742 4744
4743 4745 #ifdef VOLATILE_FH_TEST
4744 4746 {
4745 4747 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4746 4748
4747 4749 /*
4748 4750 * Add the renamed file handle to the volatile rename list
4749 4751 */
4750 4752 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4751 4753 /* file handles may expire on rename */
4752 4754 vnode_t *vp;
4753 4755
4754 4756 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4755 4757 /*
4756 4758 * Already know that nnm will be a valid string
4757 4759 */
4758 4760 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4759 4761 NULL, NULL, NULL);
4760 4762 kmem_free(nnm, nlen);
4761 4763 if (!error) {
4762 4764 add_volrnm_fh(cs->exi, vp);
4763 4765 VN_RELE(vp);
4764 4766 }
4765 4767 }
4766 4768 }
4767 4769 #endif /* VOLATILE_FH_TEST */
4768 4770
4769 4771 *cs->statusp = resp->status = NFS4_OK;
4770 4772 out:
4771 4773 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4772 4774 RENAME4res *, resp);
4773 4775 return;
4774 4776
4775 4777 err_out:
4776 4778 if (onm != converted_onm)
4777 4779 kmem_free(converted_onm, MAXPATHLEN + 1);
4778 4780 if (onm != NULL)
4779 4781 kmem_free(onm, olen);
4780 4782 if (nnm != converted_nnm)
4781 4783 kmem_free(converted_nnm, MAXPATHLEN + 1);
4782 4784 if (nnm != NULL)
4783 4785 kmem_free(nnm, nlen);
4784 4786
4785 4787 if (in_crit_src) nbl_end_crit(srcvp);
4786 4788 if (in_crit_targ) nbl_end_crit(targvp);
4787 4789 if (targvp) VN_RELE(targvp);
4788 4790 if (srcvp) VN_RELE(srcvp);
4789 4791 if (sfp) {
4790 4792 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4791 4793 rfs4_file_rele(sfp);
4792 4794 }
4793 4795 if (fp) {
4794 4796 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4795 4797 rfs4_file_rele(fp);
4796 4798 }
4797 4799
4798 4800 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4799 4801 RENAME4res *, resp);
4800 4802 }
4801 4803
4802 4804 /* ARGSUSED */
4803 4805 static void
4804 4806 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4805 4807 struct compound_state *cs)
4806 4808 {
4807 4809 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4808 4810 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4809 4811 rfs4_client_t *cp;
4810 4812
4811 4813 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4812 4814 RENEW4args *, args);
4813 4815
4814 4816 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4815 4817 *cs->statusp = resp->status =
4816 4818 rfs4_check_clientid(&args->clientid, 0);
4817 4819 goto out;
4818 4820 }
4819 4821
4820 4822 if (rfs4_lease_expired(cp)) {
4821 4823 rfs4_client_rele(cp);
4822 4824 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4823 4825 goto out;
4824 4826 }
4825 4827
4826 4828 rfs4_update_lease(cp);
4827 4829
4828 4830 mutex_enter(cp->rc_cbinfo.cb_lock);
4829 4831 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4830 4832 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4831 4833 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4832 4834 } else {
4833 4835 *cs->statusp = resp->status = NFS4_OK;
4834 4836 }
4835 4837 mutex_exit(cp->rc_cbinfo.cb_lock);
4836 4838
4837 4839 rfs4_client_rele(cp);
4838 4840
4839 4841 out:
4840 4842 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4841 4843 RENEW4res *, resp);
4842 4844 }
4843 4845
4844 4846 /* ARGSUSED */
4845 4847 static void
4846 4848 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4847 4849 struct compound_state *cs)
4848 4850 {
4849 4851 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4850 4852
4851 4853 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4852 4854
4853 4855 /* No need to check cs->access - we are not accessing any object */
4854 4856 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4855 4857 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4856 4858 goto out;
4857 4859 }
4858 4860 if (cs->vp != NULL) {
4859 4861 VN_RELE(cs->vp);
4860 4862 }
4861 4863 cs->vp = cs->saved_vp;
4862 4864 cs->saved_vp = NULL;
4863 4865 cs->exi = cs->saved_exi;
4864 4866 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4865 4867 *cs->statusp = resp->status = NFS4_OK;
4866 4868 cs->deleg = FALSE;
4867 4869
4868 4870 out:
4869 4871 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4870 4872 RESTOREFH4res *, resp);
4871 4873 }
4872 4874
4873 4875 /* ARGSUSED */
4874 4876 static void
4875 4877 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4876 4878 struct compound_state *cs)
4877 4879 {
4878 4880 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4879 4881
4880 4882 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4881 4883
4882 4884 /* No need to check cs->access - we are not accessing any object */
4883 4885 if (cs->vp == NULL) {
4884 4886 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4885 4887 goto out;
4886 4888 }
4887 4889 if (cs->saved_vp != NULL) {
4888 4890 VN_RELE(cs->saved_vp);
4889 4891 }
4890 4892 cs->saved_vp = cs->vp;
4891 4893 VN_HOLD(cs->saved_vp);
4892 4894 cs->saved_exi = cs->exi;
4893 4895 /*
4894 4896 * since SAVEFH is fairly rare, don't alloc space for its fh
4895 4897 * unless necessary.
4896 4898 */
4897 4899 if (cs->saved_fh.nfs_fh4_val == NULL) {
4898 4900 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4899 4901 }
4900 4902 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4901 4903 *cs->statusp = resp->status = NFS4_OK;
4902 4904
4903 4905 out:
4904 4906 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4905 4907 SAVEFH4res *, resp);
4906 4908 }
4907 4909
4908 4910 /*
4909 4911 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4910 4912 * return the bitmap of attrs that were set successfully. It is also
4911 4913 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4912 4914 * always be called only after rfs4_do_set_attrs().
4913 4915 *
4914 4916 * Verify that the attributes are same as the expected ones. sargp->vap
4915 4917 * and sargp->sbp contain the input attributes as translated from fattr4.
4916 4918 *
4917 4919 * This function verifies only the attrs that correspond to a vattr or
4918 4920 * vfsstat struct. That is because of the extra step needed to get the
4919 4921 * corresponding system structs. Other attributes have already been set or
4920 4922 * verified by do_rfs4_set_attrs.
4921 4923 *
4922 4924 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4923 4925 */
4924 4926 static int
4925 4927 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4926 4928 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4927 4929 {
4928 4930 int error, ret_error = 0;
4929 4931 int i, k;
4930 4932 uint_t sva_mask = sargp->vap->va_mask;
4931 4933 uint_t vbit;
4932 4934 union nfs4_attr_u *na;
4933 4935 uint8_t *amap;
4934 4936 bool_t getsb = ntovp->vfsstat;
4935 4937
4936 4938 if (sva_mask != 0) {
4937 4939 /*
4938 4940 * Okay to overwrite sargp->vap because we verify based
4939 4941 * on the incoming values.
4940 4942 */
4941 4943 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4942 4944 sargp->cs->cr, NULL);
4943 4945 if (ret_error) {
4944 4946 if (resp == NULL)
4945 4947 return (ret_error);
4946 4948 /*
4947 4949 * Must return bitmap of successful attrs
4948 4950 */
4949 4951 sva_mask = 0; /* to prevent checking vap later */
4950 4952 } else {
4951 4953 /*
4952 4954 * Some file systems clobber va_mask. it is probably
4953 4955 * wrong of them to do so, nonethless we practice
4954 4956 * defensive coding.
4955 4957 * See bug id 4276830.
4956 4958 */
4957 4959 sargp->vap->va_mask = sva_mask;
4958 4960 }
4959 4961 }
4960 4962
4961 4963 if (getsb) {
4962 4964 /*
4963 4965 * Now get the superblock and loop on the bitmap, as there is
4964 4966 * no simple way of translating from superblock to bitmap4.
4965 4967 */
4966 4968 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4967 4969 if (ret_error) {
4968 4970 if (resp == NULL)
4969 4971 goto errout;
4970 4972 getsb = FALSE;
4971 4973 }
4972 4974 }
4973 4975
4974 4976 /*
4975 4977 * Now loop and verify each attribute which getattr returned
4976 4978 * whether it's the same as the input.
4977 4979 */
4978 4980 if (resp == NULL && !getsb && (sva_mask == 0))
4979 4981 goto errout;
4980 4982
4981 4983 na = ntovp->na;
4982 4984 amap = ntovp->amap;
4983 4985 k = 0;
4984 4986 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4985 4987 k = *amap;
4986 4988 ASSERT(nfs4_ntov_map[k].nval == k);
4987 4989 vbit = nfs4_ntov_map[k].vbit;
4988 4990
4989 4991 /*
4990 4992 * If vattr attribute but VOP_GETATTR failed, or it's
4991 4993 * superblock attribute but VFS_STATVFS failed, skip
4992 4994 */
4993 4995 if (vbit) {
4994 4996 if ((vbit & sva_mask) == 0)
4995 4997 continue;
4996 4998 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4997 4999 continue;
4998 5000 }
4999 5001 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5000 5002 if (resp != NULL) {
5001 5003 if (error)
5002 5004 ret_error = -1; /* not all match */
5003 5005 else /* update response bitmap */
5004 5006 *resp |= nfs4_ntov_map[k].fbit;
5005 5007 continue;
5006 5008 }
5007 5009 if (error) {
5008 5010 ret_error = -1; /* not all match */
5009 5011 break;
5010 5012 }
5011 5013 }
5012 5014 errout:
5013 5015 return (ret_error);
5014 5016 }
5015 5017
5016 5018 /*
5017 5019 * Decode the attribute to be set/verified. If the attr requires a sys op
5018 5020 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5019 5021 * call the sv_getit function for it, because the sys op hasn't yet been done.
5020 5022 * Return 0 for success, error code if failed.
5021 5023 *
5022 5024 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5023 5025 */
5024 5026 static int
5025 5027 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5026 5028 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5027 5029 {
5028 5030 int error = 0;
5029 5031 bool_t set_later;
5030 5032
5031 5033 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5032 5034
5033 5035 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5034 5036 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5035 5037 /*
5036 5038 * don't verify yet if a vattr or sb dependent attr,
5037 5039 * because we don't have their sys values yet.
5038 5040 * Will be done later.
5039 5041 */
5040 5042 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5041 5043 /*
5042 5044 * ACLs are a special case, since setting the MODE
5043 5045 * conflicts with setting the ACL. We delay setting
5044 5046 * the ACL until all other attributes have been set.
5045 5047 * The ACL gets set in do_rfs4_op_setattr().
5046 5048 */
5047 5049 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5048 5050 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5049 5051 sargp, nap);
5050 5052 if (error) {
5051 5053 xdr_free(nfs4_ntov_map[k].xfunc,
5052 5054 (caddr_t)nap);
5053 5055 }
5054 5056 }
5055 5057 }
5056 5058 } else {
5057 5059 #ifdef DEBUG
5058 5060 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5059 5061 "decoding attribute %d\n", k);
5060 5062 #endif
5061 5063 error = EINVAL;
5062 5064 }
5063 5065 if (!error && resp_bval && !set_later) {
5064 5066 *resp_bval |= nfs4_ntov_map[k].fbit;
5065 5067 }
5066 5068
5067 5069 return (error);
5068 5070 }
5069 5071
5070 5072 /*
5071 5073 * Set vattr based on incoming fattr4 attrs - used by setattr.
5072 5074 * Set response mask. Ignore any values that are not writable vattr attrs.
5073 5075 */
5074 5076 static nfsstat4
5075 5077 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5076 5078 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5077 5079 nfs4_attr_cmd_t cmd)
5078 5080 {
5079 5081 int error = 0;
5080 5082 int i;
5081 5083 char *attrs = fattrp->attrlist4;
5082 5084 uint32_t attrslen = fattrp->attrlist4_len;
5083 5085 XDR xdr;
5084 5086 nfsstat4 status = NFS4_OK;
5085 5087 vnode_t *vp = cs->vp;
5086 5088 union nfs4_attr_u *na;
5087 5089 uint8_t *amap;
5088 5090
5089 5091 #ifndef lint
5090 5092 /*
5091 5093 * Make sure that maximum attribute number can be expressed as an
5092 5094 * 8 bit quantity.
5093 5095 */
5094 5096 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5095 5097 #endif
5096 5098
5097 5099 if (vp == NULL) {
5098 5100 if (resp)
5099 5101 *resp = 0;
5100 5102 return (NFS4ERR_NOFILEHANDLE);
5101 5103 }
5102 5104 if (cs->access == CS_ACCESS_DENIED) {
5103 5105 if (resp)
5104 5106 *resp = 0;
5105 5107 return (NFS4ERR_ACCESS);
5106 5108 }
5107 5109
5108 5110 sargp->op = cmd;
5109 5111 sargp->cs = cs;
5110 5112 sargp->flag = 0; /* may be set later */
5111 5113 sargp->vap->va_mask = 0;
5112 5114 sargp->rdattr_error = NFS4_OK;
5113 5115 sargp->rdattr_error_req = FALSE;
5114 5116 /* sargp->sbp is set by the caller */
5115 5117
5116 5118 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5117 5119
5118 5120 na = ntovp->na;
5119 5121 amap = ntovp->amap;
5120 5122
5121 5123 /*
5122 5124 * The following loop iterates on the nfs4_ntov_map checking
5123 5125 * if the fbit is set in the requested bitmap.
5124 5126 * If set then we process the arguments using the
5125 5127 * rfs4_fattr4 conversion functions to populate the setattr
5126 5128 * vattr and va_mask. Any settable attrs that are not using vattr
5127 5129 * will be set in this loop.
5128 5130 */
5129 5131 for (i = 0; i < nfs4_ntov_map_size; i++) {
5130 5132 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5131 5133 continue;
5132 5134 }
5133 5135 /*
5134 5136 * If setattr, must be a writable attr.
5135 5137 * If verify/nverify, must be a readable attr.
5136 5138 */
5137 5139 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5138 5140 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5139 5141 /*
5140 5142 * Client tries to set/verify an
5141 5143 * unsupported attribute, tries to set
5142 5144 * a read only attr or verify a write
5143 5145 * only one - error!
5144 5146 */
5145 5147 break;
5146 5148 }
5147 5149 /*
5148 5150 * Decode the attribute to set/verify
5149 5151 */
5150 5152 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5151 5153 &xdr, resp ? resp : NULL, na);
5152 5154 if (error)
5153 5155 break;
5154 5156 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5155 5157 na++;
5156 5158 (ntovp->attrcnt)++;
5157 5159 if (nfs4_ntov_map[i].vfsstat)
5158 5160 ntovp->vfsstat = TRUE;
5159 5161 }
5160 5162
5161 5163 if (error != 0)
5162 5164 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5163 5165 puterrno4(error));
5164 5166 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5165 5167 return (status);
5166 5168 }
5167 5169
5168 5170 static nfsstat4
5169 5171 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5170 5172 stateid4 *stateid)
5171 5173 {
5172 5174 int error = 0;
5173 5175 struct nfs4_svgetit_arg sarg;
5174 5176 bool_t trunc;
5175 5177
5176 5178 nfsstat4 status = NFS4_OK;
5177 5179 cred_t *cr = cs->cr;
5178 5180 vnode_t *vp = cs->vp;
5179 5181 struct nfs4_ntov_table ntov;
5180 5182 struct statvfs64 sb;
5181 5183 struct vattr bva;
5182 5184 struct flock64 bf;
5183 5185 int in_crit = 0;
5184 5186 uint_t saved_mask = 0;
5185 5187 caller_context_t ct;
5186 5188
5187 5189 *resp = 0;
5188 5190 sarg.sbp = &sb;
5189 5191 sarg.is_referral = B_FALSE;
5190 5192 nfs4_ntov_table_init(&ntov);
5191 5193 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5192 5194 NFS4ATTR_SETIT);
5193 5195 if (status != NFS4_OK) {
5194 5196 /*
5195 5197 * failed set attrs
5196 5198 */
5197 5199 goto done;
5198 5200 }
5199 5201 if ((sarg.vap->va_mask == 0) &&
5200 5202 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5201 5203 /*
5202 5204 * no further work to be done
5203 5205 */
5204 5206 goto done;
5205 5207 }
5206 5208
5207 5209 /*
5208 5210 * If we got a request to set the ACL and the MODE, only
5209 5211 * allow changing VSUID, VSGID, and VSVTX. Attempting
5210 5212 * to change any other bits, along with setting an ACL,
5211 5213 * gives NFS4ERR_INVAL.
5212 5214 */
5213 5215 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5214 5216 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5215 5217 vattr_t va;
5216 5218
5217 5219 va.va_mask = AT_MODE;
5218 5220 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5219 5221 if (error) {
5220 5222 status = puterrno4(error);
5221 5223 goto done;
5222 5224 }
5223 5225 if ((sarg.vap->va_mode ^ va.va_mode) &
5224 5226 ~(VSUID | VSGID | VSVTX)) {
5225 5227 status = NFS4ERR_INVAL;
5226 5228 goto done;
5227 5229 }
5228 5230 }
5229 5231
5230 5232 /* Check stateid only if size has been set */
5231 5233 if (sarg.vap->va_mask & AT_SIZE) {
5232 5234 trunc = (sarg.vap->va_size == 0);
5233 5235 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5234 5236 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5235 5237 if (status != NFS4_OK)
5236 5238 goto done;
5237 5239 } else {
5238 5240 ct.cc_sysid = 0;
5239 5241 ct.cc_pid = 0;
5240 5242 ct.cc_caller_id = nfs4_srv_caller_id;
5241 5243 ct.cc_flags = CC_DONTBLOCK;
5242 5244 }
5243 5245
5244 5246 /* XXX start of possible race with delegations */
5245 5247
5246 5248 /*
5247 5249 * We need to specially handle size changes because it is
5248 5250 * possible for the client to create a file with read-only
5249 5251 * modes, but with the file opened for writing. If the client
5250 5252 * then tries to set the file size, e.g. ftruncate(3C),
5251 5253 * fcntl(F_FREESP), the normal access checking done in
5252 5254 * VOP_SETATTR would prevent the client from doing it even though
5253 5255 * it should be allowed to do so. To get around this, we do the
5254 5256 * access checking for ourselves and use VOP_SPACE which doesn't
5255 5257 * do the access checking.
5256 5258 * Also the client should not be allowed to change the file
5257 5259 * size if there is a conflicting non-blocking mandatory lock in
5258 5260 * the region of the change.
5259 5261 */
5260 5262 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5261 5263 u_offset_t offset;
5262 5264 ssize_t length;
5263 5265
5264 5266 /*
5265 5267 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5266 5268 * before returning, sarg.vap->va_mask is used to
5267 5269 * generate the setattr reply bitmap. We also clear
5268 5270 * AT_SIZE below before calling VOP_SPACE. For both
5269 5271 * of these cases, the va_mask needs to be saved here
5270 5272 * and restored after calling VOP_SETATTR.
5271 5273 */
5272 5274 saved_mask = sarg.vap->va_mask;
5273 5275
5274 5276 /*
5275 5277 * Check any possible conflict due to NBMAND locks.
5276 5278 * Get into critical region before VOP_GETATTR, so the
5277 5279 * size attribute is valid when checking conflicts.
5278 5280 */
5279 5281 if (nbl_need_check(vp)) {
5280 5282 nbl_start_crit(vp, RW_READER);
5281 5283 in_crit = 1;
5282 5284 }
5283 5285
5284 5286 bva.va_mask = AT_UID|AT_SIZE;
5285 5287 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5286 5288 status = puterrno4(error);
5287 5289 goto done;
5288 5290 }
5289 5291
5290 5292 if (in_crit) {
5291 5293 if (sarg.vap->va_size < bva.va_size) {
5292 5294 offset = sarg.vap->va_size;
5293 5295 length = bva.va_size - sarg.vap->va_size;
5294 5296 } else {
5295 5297 offset = bva.va_size;
5296 5298 length = sarg.vap->va_size - bva.va_size;
5297 5299 }
5298 5300 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5299 5301 &ct)) {
5300 5302 status = NFS4ERR_LOCKED;
5301 5303 goto done;
5302 5304 }
5303 5305 }
5304 5306
5305 5307 if (crgetuid(cr) == bva.va_uid) {
5306 5308 sarg.vap->va_mask &= ~AT_SIZE;
5307 5309 bf.l_type = F_WRLCK;
5308 5310 bf.l_whence = 0;
5309 5311 bf.l_start = (off64_t)sarg.vap->va_size;
5310 5312 bf.l_len = 0;
5311 5313 bf.l_sysid = 0;
5312 5314 bf.l_pid = 0;
5313 5315 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5314 5316 (offset_t)sarg.vap->va_size, cr, &ct);
5315 5317 }
5316 5318 }
5317 5319
5318 5320 if (!error && sarg.vap->va_mask != 0)
5319 5321 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5320 5322
5321 5323 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5322 5324 if (saved_mask & AT_SIZE)
5323 5325 sarg.vap->va_mask |= AT_SIZE;
5324 5326
5325 5327 /*
5326 5328 * If an ACL was being set, it has been delayed until now,
5327 5329 * in order to set the mode (via the VOP_SETATTR() above) first.
5328 5330 */
5329 5331 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5330 5332 int i;
5331 5333
5332 5334 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5333 5335 if (ntov.amap[i] == FATTR4_ACL)
5334 5336 break;
5335 5337 if (i < NFS4_MAXNUM_ATTRS) {
5336 5338 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5337 5339 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5338 5340 if (error == 0) {
5339 5341 *resp |= FATTR4_ACL_MASK;
5340 5342 } else if (error == ENOTSUP) {
5341 5343 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5342 5344 status = NFS4ERR_ATTRNOTSUPP;
5343 5345 goto done;
5344 5346 }
5345 5347 } else {
5346 5348 NFS4_DEBUG(rfs4_debug,
5347 5349 (CE_NOTE, "do_rfs4_op_setattr: "
5348 5350 "unable to find ACL in fattr4"));
5349 5351 error = EINVAL;
5350 5352 }
5351 5353 }
5352 5354
5353 5355 if (error) {
5354 5356 /* check if a monitor detected a delegation conflict */
5355 5357 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5356 5358 status = NFS4ERR_DELAY;
5357 5359 else
5358 5360 status = puterrno4(error);
5359 5361
5360 5362 /*
5361 5363 * Set the response bitmap when setattr failed.
5362 5364 * If VOP_SETATTR partially succeeded, test by doing a
5363 5365 * VOP_GETATTR on the object and comparing the data
5364 5366 * to the setattr arguments.
5365 5367 */
5366 5368 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5367 5369 } else {
5368 5370 /*
5369 5371 * Force modified metadata out to stable storage.
5370 5372 */
5371 5373 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5372 5374 /*
5373 5375 * Set response bitmap
5374 5376 */
5375 5377 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5376 5378 }
5377 5379
5378 5380 /* Return early and already have a NFSv4 error */
5379 5381 done:
5380 5382 /*
5381 5383 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5382 5384 * conversion sets both readable and writeable NFS4 attrs
5383 5385 * for AT_MTIME and AT_ATIME. The line below masks out
5384 5386 * unrequested attrs from the setattr result bitmap. This
5385 5387 * is placed after the done: label to catch the ATTRNOTSUP
5386 5388 * case.
5387 5389 */
5388 5390 *resp &= fattrp->attrmask;
5389 5391
5390 5392 if (in_crit)
5391 5393 nbl_end_crit(vp);
5392 5394
5393 5395 nfs4_ntov_table_free(&ntov, &sarg);
5394 5396
5395 5397 return (status);
5396 5398 }
5397 5399
5398 5400 /* ARGSUSED */
5399 5401 static void
5400 5402 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5401 5403 struct compound_state *cs)
5402 5404 {
5403 5405 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5404 5406 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5405 5407 bslabel_t *clabel;
5406 5408
5407 5409 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5408 5410 SETATTR4args *, args);
5409 5411
5410 5412 if (cs->vp == NULL) {
5411 5413 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5412 5414 goto out;
5413 5415 }
5414 5416
5415 5417 /*
5416 5418 * If there is an unshared filesystem mounted on this vnode,
5417 5419 * do not allow to setattr on this vnode.
5418 5420 */
5419 5421 if (vn_ismntpt(cs->vp)) {
5420 5422 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5421 5423 goto out;
5422 5424 }
5423 5425
5424 5426 resp->attrsset = 0;
5425 5427
5426 5428 if (rdonly4(req, cs)) {
5427 5429 *cs->statusp = resp->status = NFS4ERR_ROFS;
5428 5430 goto out;
5429 5431 }
5430 5432
5431 5433 /* check label before setting attributes */
5432 5434 if (is_system_labeled()) {
5433 5435 ASSERT(req->rq_label != NULL);
5434 5436 clabel = req->rq_label;
5435 5437 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5436 5438 "got client label from request(1)",
5437 5439 struct svc_req *, req);
5438 5440 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5439 5441 if (!do_rfs_label_check(clabel, cs->vp,
5440 5442 EQUALITY_CHECK, cs->exi)) {
5441 5443 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5442 5444 goto out;
5443 5445 }
5444 5446 }
5445 5447 }
5446 5448
5447 5449 *cs->statusp = resp->status =
5448 5450 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5449 5451 &args->stateid);
5450 5452
5451 5453 out:
5452 5454 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5453 5455 SETATTR4res *, resp);
5454 5456 }
5455 5457
5456 5458 /* ARGSUSED */
5457 5459 static void
5458 5460 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5459 5461 struct compound_state *cs)
5460 5462 {
5461 5463 /*
5462 5464 * verify and nverify are exactly the same, except that nverify
5463 5465 * succeeds when some argument changed, and verify succeeds when
5464 5466 * when none changed.
5465 5467 */
5466 5468
5467 5469 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5468 5470 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5469 5471
5470 5472 int error;
5471 5473 struct nfs4_svgetit_arg sarg;
5472 5474 struct statvfs64 sb;
5473 5475 struct nfs4_ntov_table ntov;
5474 5476
5475 5477 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5476 5478 VERIFY4args *, args);
5477 5479
5478 5480 if (cs->vp == NULL) {
5479 5481 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5480 5482 goto out;
5481 5483 }
5482 5484
5483 5485 sarg.sbp = &sb;
5484 5486 sarg.is_referral = B_FALSE;
5485 5487 nfs4_ntov_table_init(&ntov);
5486 5488 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5487 5489 &sarg, &ntov, NFS4ATTR_VERIT);
5488 5490 if (resp->status != NFS4_OK) {
5489 5491 /*
5490 5492 * do_rfs4_set_attrs will try to verify systemwide attrs,
5491 5493 * so could return -1 for "no match".
5492 5494 */
5493 5495 if (resp->status == -1)
5494 5496 resp->status = NFS4ERR_NOT_SAME;
5495 5497 goto done;
5496 5498 }
5497 5499 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5498 5500 switch (error) {
5499 5501 case 0:
5500 5502 resp->status = NFS4_OK;
5501 5503 break;
5502 5504 case -1:
5503 5505 resp->status = NFS4ERR_NOT_SAME;
5504 5506 break;
5505 5507 default:
5506 5508 resp->status = puterrno4(error);
5507 5509 break;
5508 5510 }
5509 5511 done:
5510 5512 *cs->statusp = resp->status;
5511 5513 nfs4_ntov_table_free(&ntov, &sarg);
5512 5514 out:
5513 5515 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5514 5516 VERIFY4res *, resp);
5515 5517 }
5516 5518
5517 5519 /* ARGSUSED */
5518 5520 static void
5519 5521 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5520 5522 struct compound_state *cs)
5521 5523 {
5522 5524 /*
5523 5525 * verify and nverify are exactly the same, except that nverify
5524 5526 * succeeds when some argument changed, and verify succeeds when
5525 5527 * when none changed.
5526 5528 */
5527 5529
5528 5530 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5529 5531 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5530 5532
5531 5533 int error;
5532 5534 struct nfs4_svgetit_arg sarg;
5533 5535 struct statvfs64 sb;
5534 5536 struct nfs4_ntov_table ntov;
5535 5537
5536 5538 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5537 5539 NVERIFY4args *, args);
5538 5540
5539 5541 if (cs->vp == NULL) {
5540 5542 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5541 5543 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5542 5544 NVERIFY4res *, resp);
5543 5545 return;
5544 5546 }
5545 5547 sarg.sbp = &sb;
5546 5548 sarg.is_referral = B_FALSE;
5547 5549 nfs4_ntov_table_init(&ntov);
5548 5550 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5549 5551 &sarg, &ntov, NFS4ATTR_VERIT);
5550 5552 if (resp->status != NFS4_OK) {
5551 5553 /*
5552 5554 * do_rfs4_set_attrs will try to verify systemwide attrs,
5553 5555 * so could return -1 for "no match".
5554 5556 */
5555 5557 if (resp->status == -1)
5556 5558 resp->status = NFS4_OK;
5557 5559 goto done;
5558 5560 }
5559 5561 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5560 5562 switch (error) {
5561 5563 case 0:
5562 5564 resp->status = NFS4ERR_SAME;
5563 5565 break;
5564 5566 case -1:
5565 5567 resp->status = NFS4_OK;
5566 5568 break;
5567 5569 default:
5568 5570 resp->status = puterrno4(error);
5569 5571 break;
5570 5572 }
5571 5573 done:
5572 5574 *cs->statusp = resp->status;
5573 5575 nfs4_ntov_table_free(&ntov, &sarg);
5574 5576
5575 5577 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5576 5578 NVERIFY4res *, resp);
5577 5579 }
5578 5580
5579 5581 /*
5580 5582 * XXX - This should live in an NFS header file.
5581 5583 */
5582 5584 #define MAX_IOVECS 12
5583 5585
5584 5586 /* ARGSUSED */
5585 5587 static void
5586 5588 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5587 5589 struct compound_state *cs)
5588 5590 {
5589 5591 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5590 5592 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5591 5593 int error;
5592 5594 vnode_t *vp;
5593 5595 struct vattr bva;
5594 5596 u_offset_t rlimit;
5595 5597 struct uio uio;
5596 5598 struct iovec iov[MAX_IOVECS];
5597 5599 struct iovec *iovp;
5598 5600 int iovcnt;
5599 5601 int ioflag;
5600 5602 cred_t *savecred, *cr;
5601 5603 bool_t *deleg = &cs->deleg;
5602 5604 nfsstat4 stat;
5603 5605 int in_crit = 0;
5604 5606 caller_context_t ct;
5605 5607 nfs4_srv_t *nsrv4;
5606 5608
5607 5609 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5608 5610 WRITE4args *, args);
5609 5611
5610 5612 vp = cs->vp;
5611 5613 if (vp == NULL) {
5612 5614 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5613 5615 goto out;
5614 5616 }
5615 5617 if (cs->access == CS_ACCESS_DENIED) {
5616 5618 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5617 5619 goto out;
5618 5620 }
5619 5621
5620 5622 cr = cs->cr;
5621 5623
5622 5624 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5623 5625 deleg, TRUE, &ct)) != NFS4_OK) {
5624 5626 *cs->statusp = resp->status = stat;
5625 5627 goto out;
5626 5628 }
5627 5629
5628 5630 /*
5629 5631 * We have to enter the critical region before calling VOP_RWLOCK
5630 5632 * to avoid a deadlock with ufs.
5631 5633 */
5632 5634 if (nbl_need_check(vp)) {
5633 5635 nbl_start_crit(vp, RW_READER);
5634 5636 in_crit = 1;
5635 5637 if (nbl_conflict(vp, NBL_WRITE,
5636 5638 args->offset, args->data_len, 0, &ct)) {
5637 5639 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5638 5640 goto out;
5639 5641 }
5640 5642 }
5641 5643
5642 5644 bva.va_mask = AT_MODE | AT_UID;
5643 5645 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5644 5646
5645 5647 /*
5646 5648 * If we can't get the attributes, then we can't do the
5647 5649 * right access checking. So, we'll fail the request.
5648 5650 */
5649 5651 if (error) {
5650 5652 *cs->statusp = resp->status = puterrno4(error);
5651 5653 goto out;
5652 5654 }
5653 5655
5654 5656 if (rdonly4(req, cs)) {
5655 5657 *cs->statusp = resp->status = NFS4ERR_ROFS;
5656 5658 goto out;
5657 5659 }
5658 5660
5659 5661 if (vp->v_type != VREG) {
5660 5662 *cs->statusp = resp->status =
5661 5663 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5662 5664 goto out;
5663 5665 }
5664 5666
5665 5667 if (crgetuid(cr) != bva.va_uid &&
5666 5668 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5667 5669 *cs->statusp = resp->status = puterrno4(error);
5668 5670 goto out;
5669 5671 }
5670 5672
5671 5673 if (MANDLOCK(vp, bva.va_mode)) {
5672 5674 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5673 5675 goto out;
5674 5676 }
5675 5677
5676 5678 nsrv4 = nfs4_get_srv();
5677 5679 if (args->data_len == 0) {
5678 5680 *cs->statusp = resp->status = NFS4_OK;
5679 5681 resp->count = 0;
5680 5682 resp->committed = args->stable;
5681 5683 resp->writeverf = nsrv4->write4verf;
5682 5684 goto out;
5683 5685 }
5684 5686
5685 5687 if (args->mblk != NULL) {
5686 5688 mblk_t *m;
5687 5689 uint_t bytes, round_len;
5688 5690
5689 5691 iovcnt = 0;
5690 5692 bytes = 0;
5691 5693 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5692 5694 for (m = args->mblk;
5693 5695 m != NULL && bytes < round_len;
5694 5696 m = m->b_cont) {
5695 5697 iovcnt++;
5696 5698 bytes += MBLKL(m);
5697 5699 }
5698 5700 #ifdef DEBUG
5699 5701 /* should have ended on an mblk boundary */
5700 5702 if (bytes != round_len) {
5701 5703 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5702 5704 bytes, round_len, args->data_len);
5703 5705 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5704 5706 (void *)args->mblk, (void *)m);
5705 5707 ASSERT(bytes == round_len);
5706 5708 }
5707 5709 #endif
5708 5710 if (iovcnt <= MAX_IOVECS) {
5709 5711 iovp = iov;
5710 5712 } else {
5711 5713 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5712 5714 }
5713 5715 mblk_to_iov(args->mblk, iovcnt, iovp);
5714 5716 } else if (args->rlist != NULL) {
5715 5717 iovcnt = 1;
5716 5718 iovp = iov;
5717 5719 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5718 5720 iovp->iov_len = args->data_len;
5719 5721 } else {
5720 5722 iovcnt = 1;
5721 5723 iovp = iov;
5722 5724 iovp->iov_base = args->data_val;
5723 5725 iovp->iov_len = args->data_len;
5724 5726 }
5725 5727
5726 5728 uio.uio_iov = iovp;
5727 5729 uio.uio_iovcnt = iovcnt;
5728 5730
5729 5731 uio.uio_segflg = UIO_SYSSPACE;
5730 5732 uio.uio_extflg = UIO_COPY_DEFAULT;
5731 5733 uio.uio_loffset = args->offset;
5732 5734 uio.uio_resid = args->data_len;
5733 5735 uio.uio_llimit = curproc->p_fsz_ctl;
5734 5736 rlimit = uio.uio_llimit - args->offset;
5735 5737 if (rlimit < (u_offset_t)uio.uio_resid)
5736 5738 uio.uio_resid = (int)rlimit;
5737 5739
5738 5740 if (args->stable == UNSTABLE4)
5739 5741 ioflag = 0;
5740 5742 else if (args->stable == FILE_SYNC4)
5741 5743 ioflag = FSYNC;
5742 5744 else if (args->stable == DATA_SYNC4)
5743 5745 ioflag = FDSYNC;
5744 5746 else {
5745 5747 if (iovp != iov)
5746 5748 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5747 5749 *cs->statusp = resp->status = NFS4ERR_INVAL;
5748 5750 goto out;
5749 5751 }
5750 5752
5751 5753 /*
5752 5754 * We're changing creds because VM may fault and we need
5753 5755 * the cred of the current thread to be used if quota
5754 5756 * checking is enabled.
5755 5757 */
5756 5758 savecred = curthread->t_cred;
5757 5759 curthread->t_cred = cr;
5758 5760 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5759 5761 curthread->t_cred = savecred;
5760 5762
5761 5763 if (iovp != iov)
5762 5764 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5763 5765
5764 5766 if (error) {
5765 5767 *cs->statusp = resp->status = puterrno4(error);
5766 5768 goto out;
5767 5769 }
5768 5770
5769 5771 *cs->statusp = resp->status = NFS4_OK;
5770 5772 resp->count = args->data_len - uio.uio_resid;
5771 5773
5772 5774 if (ioflag == 0)
5773 5775 resp->committed = UNSTABLE4;
5774 5776 else
5775 5777 resp->committed = FILE_SYNC4;
5776 5778
5777 5779 resp->writeverf = nsrv4->write4verf;
5778 5780
5779 5781 out:
5780 5782 if (in_crit)
5781 5783 nbl_end_crit(vp);
5782 5784
5783 5785 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5784 5786 WRITE4res *, resp);
5785 5787 }
5786 5788
5787 5789
5788 5790 /* XXX put in a header file */
5789 5791 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5790 5792
5791 5793 void
5792 5794 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5793 5795 struct svc_req *req, cred_t *cr, int *rv)
5794 5796 {
5795 5797 uint_t i;
5796 5798 struct compound_state cs;
5797 5799 nfs4_srv_t *nsrv4;
5798 5800 nfs_export_t *ne = nfs_get_export();
5799 5801
5800 5802 if (rv != NULL)
5801 5803 *rv = 0;
5802 5804 rfs4_init_compound_state(&cs);
5803 5805 /*
5804 5806 * Form a reply tag by copying over the request tag.
5805 5807 */
5806 5808 resp->tag.utf8string_len = args->tag.utf8string_len;
5807 5809 if (args->tag.utf8string_len != 0) {
5808 5810 resp->tag.utf8string_val =
5809 5811 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5810 5812 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5811 5813 resp->tag.utf8string_len);
5812 5814 } else {
5813 5815 resp->tag.utf8string_val = NULL;
5814 5816 }
5815 5817
5816 5818 cs.statusp = &resp->status;
5817 5819 cs.req = req;
5818 5820 resp->array = NULL;
5819 5821 resp->array_len = 0;
5820 5822
5821 5823 /*
5822 5824 * XXX for now, minorversion should be zero
5823 5825 */
5824 5826 if (args->minorversion != NFS4_MINORVERSION) {
5825 5827 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5826 5828 &cs, COMPOUND4args *, args);
5827 5829 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5828 5830 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5829 5831 &cs, COMPOUND4res *, resp);
5830 5832 return;
5831 5833 }
5832 5834
5833 5835 if (args->array_len == 0) {
5834 5836 resp->status = NFS4_OK;
5835 5837 return;
5836 5838 }
5837 5839
5838 5840 ASSERT(exi == NULL);
5839 5841 ASSERT(cr == NULL);
5840 5842
5841 5843 cr = crget();
5842 5844 ASSERT(cr != NULL);
5843 5845
5844 5846 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5845 5847 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5846 5848 &cs, COMPOUND4args *, args);
5847 5849 crfree(cr);
5848 5850 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5849 5851 &cs, COMPOUND4res *, resp);
5850 5852 svcerr_badcred(req->rq_xprt);
5851 5853 if (rv != NULL)
5852 5854 *rv = 1;
5853 5855 return;
5854 5856 }
5855 5857 resp->array_len = args->array_len;
5856 5858 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5857 5859 KM_SLEEP);
5858 5860
5859 5861 cs.basecr = cr;
5860 5862 nsrv4 = nfs4_get_srv();
5861 5863
5862 5864 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5863 5865 COMPOUND4args *, args);
5864 5866
5865 5867 /*
5866 5868 * For now, NFS4 compound processing must be protected by
5867 5869 * exported_lock because it can access more than one exportinfo
5868 5870 * per compound and share/unshare can now change multiple
5869 5871 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5870 5872 * per proc (excluding public exinfo), and exi_count design
5871 5873 * is sufficient to protect concurrent execution of NFS2/3
5872 5874 * ops along with unexport. This lock will be removed as
5873 5875 * part of the NFSv4 phase 2 namespace redesign work.
5874 5876 */
5875 5877 rw_enter(&ne->exported_lock, RW_READER);
5876 5878
5877 5879 /*
5878 5880 * If this is the first compound we've seen, we need to start all
5879 5881 * new instances' grace periods.
5880 5882 */
5881 5883 if (nsrv4->seen_first_compound == 0) {
5882 5884 rfs4_grace_start_new(nsrv4);
5883 5885 /*
5884 5886 * This must be set after rfs4_grace_start_new(), otherwise
|
↓ open down ↓ |
2055 lines elided |
↑ open up ↑ |
5885 5887 * another thread could proceed past here before the former
5886 5888 * is finished.
5887 5889 */
5888 5890 nsrv4->seen_first_compound = 1;
5889 5891 }
5890 5892
5891 5893 for (i = 0; i < args->array_len && cs.cont; i++) {
5892 5894 nfs_argop4 *argop;
5893 5895 nfs_resop4 *resop;
5894 5896 uint_t op;
5897 + kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
5895 5898
5896 5899 argop = &args->array[i];
5897 5900 resop = &resp->array[i];
5898 5901 resop->resop = argop->argop;
5899 5902 op = (uint_t)resop->resop;
5900 5903
5901 5904 if (op < rfsv4disp_cnt) {
5902 5905 /*
5903 5906 * Count the individual ops here; NULL and COMPOUND
5904 5907 * are counted in common_dispatch()
5905 5908 */
5906 - rfsproccnt_v4_ptr[op].value.ui64++;
5909 + stat[op].value.ui64++;
5907 5910
5908 5911 NFS4_DEBUG(rfs4_debug > 1,
5909 5912 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5910 5913 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5911 5914 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5912 5915 rfs4_op_string[op], *cs.statusp));
5913 5916 if (*cs.statusp != NFS4_OK)
5914 5917 cs.cont = FALSE;
5915 5918 } else {
5916 5919 /*
5917 5920 * This is effectively dead code since XDR code
5918 5921 * will have already returned BADXDR if op doesn't
5919 5922 * decode to legal value. This only done for a
5920 5923 * day when XDR code doesn't verify v4 opcodes.
5921 5924 */
5922 5925 op = OP_ILLEGAL;
5923 - rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5926 + stat[OP_ILLEGAL_IDX].value.ui64++;
5924 5927
5925 5928 rfs4_op_illegal(argop, resop, req, &cs);
5926 5929 cs.cont = FALSE;
5927 5930 }
5928 5931
5929 5932 /*
5930 5933 * If not at last op, and if we are to stop, then
5931 5934 * compact the results array.
5932 5935 */
5933 5936 if ((i + 1) < args->array_len && !cs.cont) {
5934 5937 nfs_resop4 *new_res = kmem_alloc(
5935 5938 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5936 5939 bcopy(resp->array,
5937 5940 new_res, (i+1) * sizeof (nfs_resop4));
5938 5941 kmem_free(resp->array,
5939 5942 args->array_len * sizeof (nfs_resop4));
5940 5943
5941 5944 resp->array_len = i + 1;
5942 5945 resp->array = new_res;
5943 5946 }
5944 5947 }
5945 5948
5946 5949 rw_exit(&ne->exported_lock);
5947 5950
5948 5951 /*
5949 5952 * clear exportinfo and vnode fields from compound_state before dtrace
5950 5953 * probe, to avoid tracing residual values for path and share path.
5951 5954 */
5952 5955 if (cs.vp)
5953 5956 VN_RELE(cs.vp);
5954 5957 if (cs.saved_vp)
5955 5958 VN_RELE(cs.saved_vp);
5956 5959 cs.exi = cs.saved_exi = NULL;
5957 5960 cs.vp = cs.saved_vp = NULL;
5958 5961
5959 5962 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5960 5963 COMPOUND4res *, resp);
5961 5964
5962 5965 if (cs.saved_fh.nfs_fh4_val)
5963 5966 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5964 5967
5965 5968 if (cs.basecr)
5966 5969 crfree(cs.basecr);
5967 5970 if (cs.cr)
5968 5971 crfree(cs.cr);
5969 5972 /*
5970 5973 * done with this compound request, free the label
5971 5974 */
5972 5975
5973 5976 if (req->rq_label != NULL) {
5974 5977 kmem_free(req->rq_label, sizeof (bslabel_t));
5975 5978 req->rq_label = NULL;
5976 5979 }
5977 5980 }
5978 5981
5979 5982 /*
5980 5983 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5981 5984 * XXX zero out the tag and array values. Need to investigate why the
5982 5985 * XXX calls occur, but at least prevent the panic for now.
5983 5986 */
5984 5987 void
5985 5988 rfs4_compound_free(COMPOUND4res *resp)
5986 5989 {
5987 5990 uint_t i;
5988 5991
5989 5992 if (resp->tag.utf8string_val) {
5990 5993 UTF8STRING_FREE(resp->tag)
5991 5994 }
5992 5995
5993 5996 for (i = 0; i < resp->array_len; i++) {
5994 5997 nfs_resop4 *resop;
5995 5998 uint_t op;
5996 5999
5997 6000 resop = &resp->array[i];
5998 6001 op = (uint_t)resop->resop;
5999 6002 if (op < rfsv4disp_cnt) {
6000 6003 (*rfsv4disptab[op].dis_resfree)(resop);
6001 6004 }
6002 6005 }
6003 6006 if (resp->array != NULL) {
6004 6007 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6005 6008 }
6006 6009 }
6007 6010
6008 6011 /*
6009 6012 * Process the value of the compound request rpc flags, as a bit-AND
6010 6013 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6011 6014 */
6012 6015 void
6013 6016 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6014 6017 {
6015 6018 int i;
6016 6019 int flag = RPC_ALL;
6017 6020
6018 6021 for (i = 0; flag && i < args->array_len; i++) {
6019 6022 uint_t op;
6020 6023
6021 6024 op = (uint_t)args->array[i].argop;
6022 6025
6023 6026 if (op < rfsv4disp_cnt)
6024 6027 flag &= rfsv4disptab[op].dis_flags;
6025 6028 else
6026 6029 flag = 0;
6027 6030 }
6028 6031 *flagp = flag;
6029 6032 }
6030 6033
6031 6034 nfsstat4
6032 6035 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6033 6036 {
6034 6037 nfsstat4 e;
6035 6038
6036 6039 rfs4_dbe_lock(cp->rc_dbe);
6037 6040
6038 6041 if (cp->rc_sysidt != LM_NOSYSID) {
6039 6042 *sp = cp->rc_sysidt;
6040 6043 e = NFS4_OK;
6041 6044
6042 6045 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6043 6046 *sp = cp->rc_sysidt;
6044 6047 e = NFS4_OK;
6045 6048
6046 6049 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6047 6050 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6048 6051 } else
6049 6052 e = NFS4ERR_DELAY;
6050 6053
6051 6054 rfs4_dbe_unlock(cp->rc_dbe);
6052 6055 return (e);
6053 6056 }
6054 6057
6055 6058 #if defined(DEBUG) && ! defined(lint)
6056 6059 static void lock_print(char *str, int operation, struct flock64 *flk)
6057 6060 {
6058 6061 char *op, *type;
6059 6062
6060 6063 switch (operation) {
6061 6064 case F_GETLK: op = "F_GETLK";
6062 6065 break;
6063 6066 case F_SETLK: op = "F_SETLK";
6064 6067 break;
6065 6068 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6066 6069 break;
6067 6070 default: op = "F_UNKNOWN";
6068 6071 break;
6069 6072 }
6070 6073 switch (flk->l_type) {
6071 6074 case F_UNLCK: type = "F_UNLCK";
6072 6075 break;
6073 6076 case F_RDLCK: type = "F_RDLCK";
6074 6077 break;
6075 6078 case F_WRLCK: type = "F_WRLCK";
6076 6079 break;
6077 6080 default: type = "F_UNKNOWN";
6078 6081 break;
6079 6082 }
6080 6083
6081 6084 ASSERT(flk->l_whence == 0);
6082 6085 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6083 6086 str, op, type, (longlong_t)flk->l_start,
6084 6087 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6085 6088 }
6086 6089
6087 6090 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6088 6091 #else
6089 6092 #define LOCK_PRINT(d, s, t, f)
6090 6093 #endif
6091 6094
6092 6095 /*ARGSUSED*/
6093 6096 static bool_t
6094 6097 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6095 6098 {
6096 6099 return (TRUE);
6097 6100 }
6098 6101
6099 6102 /*
6100 6103 * Look up the pathname using the vp in cs as the directory vnode.
6101 6104 * cs->vp will be the vnode for the file on success
6102 6105 */
6103 6106
6104 6107 static nfsstat4
6105 6108 rfs4_lookup(component4 *component, struct svc_req *req,
6106 6109 struct compound_state *cs)
6107 6110 {
6108 6111 char *nm;
6109 6112 uint32_t len;
6110 6113 nfsstat4 status;
6111 6114 struct sockaddr *ca;
6112 6115 char *name;
6113 6116
6114 6117 if (cs->vp == NULL) {
6115 6118 return (NFS4ERR_NOFILEHANDLE);
6116 6119 }
6117 6120 if (cs->vp->v_type != VDIR) {
6118 6121 return (NFS4ERR_NOTDIR);
6119 6122 }
6120 6123
6121 6124 status = utf8_dir_verify(component);
6122 6125 if (status != NFS4_OK)
6123 6126 return (status);
6124 6127
6125 6128 nm = utf8_to_fn(component, &len, NULL);
6126 6129 if (nm == NULL) {
6127 6130 return (NFS4ERR_INVAL);
6128 6131 }
6129 6132
6130 6133 if (len > MAXNAMELEN) {
6131 6134 kmem_free(nm, len);
6132 6135 return (NFS4ERR_NAMETOOLONG);
6133 6136 }
6134 6137
6135 6138 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6136 6139 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6137 6140 MAXPATHLEN + 1);
6138 6141
6139 6142 if (name == NULL) {
6140 6143 kmem_free(nm, len);
6141 6144 return (NFS4ERR_INVAL);
6142 6145 }
6143 6146
6144 6147 status = do_rfs4_op_lookup(name, req, cs);
6145 6148
6146 6149 if (name != nm)
6147 6150 kmem_free(name, MAXPATHLEN + 1);
6148 6151
6149 6152 kmem_free(nm, len);
6150 6153
6151 6154 return (status);
6152 6155 }
6153 6156
6154 6157 static nfsstat4
6155 6158 rfs4_lookupfile(component4 *component, struct svc_req *req,
6156 6159 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6157 6160 {
6158 6161 nfsstat4 status;
6159 6162 vnode_t *dvp = cs->vp;
6160 6163 vattr_t bva, ava, fva;
6161 6164 int error;
6162 6165
6163 6166 /* Get "before" change value */
6164 6167 bva.va_mask = AT_CTIME|AT_SEQ;
6165 6168 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6166 6169 if (error)
6167 6170 return (puterrno4(error));
6168 6171
6169 6172 /* rfs4_lookup may VN_RELE directory */
6170 6173 VN_HOLD(dvp);
6171 6174
6172 6175 status = rfs4_lookup(component, req, cs);
6173 6176 if (status != NFS4_OK) {
6174 6177 VN_RELE(dvp);
6175 6178 return (status);
6176 6179 }
6177 6180
6178 6181 /*
6179 6182 * Get "after" change value, if it fails, simply return the
6180 6183 * before value.
6181 6184 */
6182 6185 ava.va_mask = AT_CTIME|AT_SEQ;
6183 6186 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6184 6187 ava.va_ctime = bva.va_ctime;
6185 6188 ava.va_seq = 0;
6186 6189 }
6187 6190 VN_RELE(dvp);
6188 6191
6189 6192 /*
6190 6193 * Validate the file is a file
6191 6194 */
6192 6195 fva.va_mask = AT_TYPE|AT_MODE;
6193 6196 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6194 6197 if (error)
6195 6198 return (puterrno4(error));
6196 6199
6197 6200 if (fva.va_type != VREG) {
6198 6201 if (fva.va_type == VDIR)
6199 6202 return (NFS4ERR_ISDIR);
6200 6203 if (fva.va_type == VLNK)
6201 6204 return (NFS4ERR_SYMLINK);
6202 6205 return (NFS4ERR_INVAL);
6203 6206 }
6204 6207
6205 6208 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6206 6209 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6207 6210
6208 6211 /*
6209 6212 * It is undefined if VOP_LOOKUP will change va_seq, so
6210 6213 * cinfo.atomic = TRUE only if we have
6211 6214 * non-zero va_seq's, and they have not changed.
6212 6215 */
6213 6216 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6214 6217 cinfo->atomic = TRUE;
6215 6218 else
6216 6219 cinfo->atomic = FALSE;
6217 6220
6218 6221 /* Check for mandatory locking */
6219 6222 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6220 6223 return (check_open_access(access, cs, req));
6221 6224 }
6222 6225
6223 6226 static nfsstat4
6224 6227 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6225 6228 cred_t *cr, vnode_t **vpp, bool_t *created)
6226 6229 {
6227 6230 int error;
6228 6231 nfsstat4 status = NFS4_OK;
6229 6232 vattr_t va;
6230 6233
6231 6234 tryagain:
6232 6235
6233 6236 /*
6234 6237 * The file open mode used is VWRITE. If the client needs
6235 6238 * some other semantic, then it should do the access checking
6236 6239 * itself. It would have been nice to have the file open mode
6237 6240 * passed as part of the arguments.
6238 6241 */
6239 6242
6240 6243 *created = TRUE;
6241 6244 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6242 6245
6243 6246 if (error) {
6244 6247 *created = FALSE;
6245 6248
6246 6249 /*
6247 6250 * If we got something other than file already exists
6248 6251 * then just return this error. Otherwise, we got
6249 6252 * EEXIST. If we were doing a GUARDED create, then
6250 6253 * just return this error. Otherwise, we need to
6251 6254 * make sure that this wasn't a duplicate of an
6252 6255 * exclusive create request.
6253 6256 *
6254 6257 * The assumption is made that a non-exclusive create
6255 6258 * request will never return EEXIST.
6256 6259 */
6257 6260
6258 6261 if (error != EEXIST || mode == GUARDED4) {
6259 6262 status = puterrno4(error);
6260 6263 return (status);
6261 6264 }
6262 6265 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6263 6266 NULL, NULL, NULL);
6264 6267
6265 6268 if (error) {
6266 6269 /*
6267 6270 * We couldn't find the file that we thought that
6268 6271 * we just created. So, we'll just try creating
6269 6272 * it again.
6270 6273 */
6271 6274 if (error == ENOENT)
6272 6275 goto tryagain;
6273 6276
6274 6277 status = puterrno4(error);
6275 6278 return (status);
6276 6279 }
6277 6280
6278 6281 if (mode == UNCHECKED4) {
6279 6282 /* existing object must be regular file */
6280 6283 if ((*vpp)->v_type != VREG) {
6281 6284 if ((*vpp)->v_type == VDIR)
6282 6285 status = NFS4ERR_ISDIR;
6283 6286 else if ((*vpp)->v_type == VLNK)
6284 6287 status = NFS4ERR_SYMLINK;
6285 6288 else
6286 6289 status = NFS4ERR_INVAL;
6287 6290 VN_RELE(*vpp);
6288 6291 return (status);
6289 6292 }
6290 6293
6291 6294 return (NFS4_OK);
6292 6295 }
6293 6296
6294 6297 /* Check for duplicate request */
6295 6298 va.va_mask = AT_MTIME;
6296 6299 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6297 6300 if (!error) {
6298 6301 /* We found the file */
6299 6302 const timestruc_t *mtime = &vap->va_mtime;
6300 6303
6301 6304 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6302 6305 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6303 6306 /* but its not our creation */
6304 6307 VN_RELE(*vpp);
6305 6308 return (NFS4ERR_EXIST);
6306 6309 }
6307 6310 *created = TRUE; /* retrans of create == created */
6308 6311 return (NFS4_OK);
6309 6312 }
6310 6313 VN_RELE(*vpp);
6311 6314 return (NFS4ERR_EXIST);
6312 6315 }
6313 6316
6314 6317 return (NFS4_OK);
6315 6318 }
6316 6319
6317 6320 static nfsstat4
6318 6321 check_open_access(uint32_t access, struct compound_state *cs,
6319 6322 struct svc_req *req)
6320 6323 {
6321 6324 int error;
6322 6325 vnode_t *vp;
6323 6326 bool_t readonly;
6324 6327 cred_t *cr = cs->cr;
6325 6328
6326 6329 /* For now we don't allow mandatory locking as per V2/V3 */
6327 6330 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6328 6331 return (NFS4ERR_ACCESS);
6329 6332 }
6330 6333
6331 6334 vp = cs->vp;
6332 6335 ASSERT(cr != NULL && vp->v_type == VREG);
6333 6336
6334 6337 /*
6335 6338 * If the file system is exported read only and we are trying
6336 6339 * to open for write, then return NFS4ERR_ROFS
6337 6340 */
6338 6341
6339 6342 readonly = rdonly4(req, cs);
6340 6343
6341 6344 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6342 6345 return (NFS4ERR_ROFS);
6343 6346
6344 6347 if (access & OPEN4_SHARE_ACCESS_READ) {
6345 6348 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6346 6349 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6347 6350 return (NFS4ERR_ACCESS);
6348 6351 }
6349 6352 }
6350 6353
6351 6354 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6352 6355 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6353 6356 if (error)
6354 6357 return (NFS4ERR_ACCESS);
6355 6358 }
6356 6359
6357 6360 return (NFS4_OK);
6358 6361 }
6359 6362
6360 6363 static nfsstat4
6361 6364 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6362 6365 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6363 6366 {
6364 6367 struct nfs4_svgetit_arg sarg;
6365 6368 struct nfs4_ntov_table ntov;
6366 6369
6367 6370 bool_t ntov_table_init = FALSE;
6368 6371 struct statvfs64 sb;
6369 6372 nfsstat4 status;
6370 6373 vnode_t *vp;
6371 6374 vattr_t bva, ava, iva, cva, *vap;
6372 6375 vnode_t *dvp;
6373 6376 timespec32_t *mtime;
6374 6377 char *nm = NULL;
6375 6378 uint_t buflen;
6376 6379 bool_t created;
6377 6380 bool_t setsize = FALSE;
6378 6381 len_t reqsize;
6379 6382 int error;
6380 6383 bool_t trunc;
6381 6384 caller_context_t ct;
6382 6385 component4 *component;
6383 6386 bslabel_t *clabel;
6384 6387 struct sockaddr *ca;
6385 6388 char *name = NULL;
6386 6389
6387 6390 sarg.sbp = &sb;
6388 6391 sarg.is_referral = B_FALSE;
6389 6392
6390 6393 dvp = cs->vp;
6391 6394
6392 6395 /* Check if the file system is read only */
6393 6396 if (rdonly4(req, cs))
6394 6397 return (NFS4ERR_ROFS);
6395 6398
6396 6399 /* check the label of including directory */
6397 6400 if (is_system_labeled()) {
6398 6401 ASSERT(req->rq_label != NULL);
6399 6402 clabel = req->rq_label;
6400 6403 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6401 6404 "got client label from request(1)",
6402 6405 struct svc_req *, req);
6403 6406 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6404 6407 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6405 6408 cs->exi)) {
6406 6409 return (NFS4ERR_ACCESS);
6407 6410 }
6408 6411 }
6409 6412 }
6410 6413
6411 6414 /*
6412 6415 * Get the last component of path name in nm. cs will reference
6413 6416 * the including directory on success.
6414 6417 */
6415 6418 component = &args->open_claim4_u.file;
6416 6419 status = utf8_dir_verify(component);
6417 6420 if (status != NFS4_OK)
6418 6421 return (status);
6419 6422
6420 6423 nm = utf8_to_fn(component, &buflen, NULL);
6421 6424
6422 6425 if (nm == NULL)
6423 6426 return (NFS4ERR_RESOURCE);
6424 6427
6425 6428 if (buflen > MAXNAMELEN) {
6426 6429 kmem_free(nm, buflen);
6427 6430 return (NFS4ERR_NAMETOOLONG);
6428 6431 }
6429 6432
6430 6433 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6431 6434 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6432 6435 if (error) {
6433 6436 kmem_free(nm, buflen);
6434 6437 return (puterrno4(error));
6435 6438 }
6436 6439
6437 6440 if (bva.va_type != VDIR) {
6438 6441 kmem_free(nm, buflen);
6439 6442 return (NFS4ERR_NOTDIR);
6440 6443 }
6441 6444
6442 6445 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6443 6446
6444 6447 switch (args->mode) {
6445 6448 case GUARDED4:
6446 6449 /*FALLTHROUGH*/
6447 6450 case UNCHECKED4:
6448 6451 nfs4_ntov_table_init(&ntov);
6449 6452 ntov_table_init = TRUE;
6450 6453
6451 6454 *attrset = 0;
6452 6455 status = do_rfs4_set_attrs(attrset,
6453 6456 &args->createhow4_u.createattrs,
6454 6457 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6455 6458
6456 6459 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6457 6460 sarg.vap->va_type != VREG) {
6458 6461 if (sarg.vap->va_type == VDIR)
6459 6462 status = NFS4ERR_ISDIR;
6460 6463 else if (sarg.vap->va_type == VLNK)
6461 6464 status = NFS4ERR_SYMLINK;
6462 6465 else
6463 6466 status = NFS4ERR_INVAL;
6464 6467 }
6465 6468
6466 6469 if (status != NFS4_OK) {
6467 6470 kmem_free(nm, buflen);
6468 6471 nfs4_ntov_table_free(&ntov, &sarg);
6469 6472 *attrset = 0;
6470 6473 return (status);
6471 6474 }
6472 6475
6473 6476 vap = sarg.vap;
6474 6477 vap->va_type = VREG;
6475 6478 vap->va_mask |= AT_TYPE;
6476 6479
6477 6480 if ((vap->va_mask & AT_MODE) == 0) {
6478 6481 vap->va_mask |= AT_MODE;
6479 6482 vap->va_mode = (mode_t)0600;
6480 6483 }
6481 6484
6482 6485 if (vap->va_mask & AT_SIZE) {
6483 6486
6484 6487 /* Disallow create with a non-zero size */
6485 6488
6486 6489 if ((reqsize = sarg.vap->va_size) != 0) {
6487 6490 kmem_free(nm, buflen);
6488 6491 nfs4_ntov_table_free(&ntov, &sarg);
6489 6492 *attrset = 0;
6490 6493 return (NFS4ERR_INVAL);
6491 6494 }
6492 6495 setsize = TRUE;
6493 6496 }
6494 6497 break;
6495 6498
6496 6499 case EXCLUSIVE4:
6497 6500 /* prohibit EXCL create of named attributes */
6498 6501 if (dvp->v_flag & V_XATTRDIR) {
6499 6502 kmem_free(nm, buflen);
6500 6503 *attrset = 0;
6501 6504 return (NFS4ERR_INVAL);
6502 6505 }
6503 6506
6504 6507 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6505 6508 cva.va_type = VREG;
6506 6509 /*
6507 6510 * Ensure no time overflows. Assumes underlying
6508 6511 * filesystem supports at least 32 bits.
6509 6512 * Truncate nsec to usec resolution to allow valid
6510 6513 * compares even if the underlying filesystem truncates.
6511 6514 */
6512 6515 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6513 6516 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6514 6517 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6515 6518 cva.va_mode = (mode_t)0;
6516 6519 vap = &cva;
6517 6520
6518 6521 /*
6519 6522 * For EXCL create, attrset is set to the server attr
6520 6523 * used to cache the client's verifier.
6521 6524 */
6522 6525 *attrset = FATTR4_TIME_MODIFY_MASK;
6523 6526 break;
6524 6527 }
6525 6528
6526 6529 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6527 6530 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6528 6531 MAXPATHLEN + 1);
6529 6532
6530 6533 if (name == NULL) {
6531 6534 kmem_free(nm, buflen);
6532 6535 return (NFS4ERR_SERVERFAULT);
6533 6536 }
6534 6537
6535 6538 status = create_vnode(dvp, name, vap, args->mode,
6536 6539 cs->cr, &vp, &created);
6537 6540 if (nm != name)
6538 6541 kmem_free(name, MAXPATHLEN + 1);
6539 6542 kmem_free(nm, buflen);
6540 6543
6541 6544 if (status != NFS4_OK) {
6542 6545 if (ntov_table_init)
6543 6546 nfs4_ntov_table_free(&ntov, &sarg);
6544 6547 *attrset = 0;
6545 6548 return (status);
6546 6549 }
6547 6550
6548 6551 trunc = (setsize && !created);
6549 6552
6550 6553 if (args->mode != EXCLUSIVE4) {
6551 6554 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6552 6555
6553 6556 /*
6554 6557 * True verification that object was created with correct
6555 6558 * attrs is impossible. The attrs could have been changed
6556 6559 * immediately after object creation. If attributes did
6557 6560 * not verify, the only recourse for the server is to
6558 6561 * destroy the object. Maybe if some attrs (like gid)
6559 6562 * are set incorrectly, the object should be destroyed;
6560 6563 * however, seems bad as a default policy. Do we really
6561 6564 * want to destroy an object over one of the times not
6562 6565 * verifying correctly? For these reasons, the server
6563 6566 * currently sets bits in attrset for createattrs
6564 6567 * that were set; however, no verification is done.
6565 6568 *
6566 6569 * vmask_to_nmask accounts for vattr bits set on create
6567 6570 * [do_rfs4_set_attrs() only sets resp bits for
6568 6571 * non-vattr/vfs bits.]
6569 6572 * Mask off any bits we set by default so as not to return
6570 6573 * more attrset bits than were requested in createattrs
6571 6574 */
6572 6575 if (created) {
6573 6576 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6574 6577 *attrset &= createmask;
6575 6578 } else {
6576 6579 /*
6577 6580 * We did not create the vnode (we tried but it
6578 6581 * already existed). In this case, the only createattr
6579 6582 * that the spec allows the server to set is size,
6580 6583 * and even then, it can only be set if it is 0.
6581 6584 */
6582 6585 *attrset = 0;
6583 6586 if (trunc)
6584 6587 *attrset = FATTR4_SIZE_MASK;
6585 6588 }
6586 6589 }
6587 6590 if (ntov_table_init)
6588 6591 nfs4_ntov_table_free(&ntov, &sarg);
6589 6592
6590 6593 /*
6591 6594 * Get the initial "after" sequence number, if it fails,
6592 6595 * set to zero, time to before.
6593 6596 */
6594 6597 iva.va_mask = AT_CTIME|AT_SEQ;
6595 6598 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6596 6599 iva.va_seq = 0;
6597 6600 iva.va_ctime = bva.va_ctime;
6598 6601 }
6599 6602
6600 6603 /*
6601 6604 * create_vnode attempts to create the file exclusive,
6602 6605 * if it already exists the VOP_CREATE will fail and
6603 6606 * may not increase va_seq. It is atomic if
6604 6607 * we haven't changed the directory, but if it has changed
6605 6608 * we don't know what changed it.
6606 6609 */
6607 6610 if (!created) {
6608 6611 if (bva.va_seq && iva.va_seq &&
6609 6612 bva.va_seq == iva.va_seq)
6610 6613 cinfo->atomic = TRUE;
6611 6614 else
6612 6615 cinfo->atomic = FALSE;
6613 6616 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6614 6617 } else {
6615 6618 /*
6616 6619 * The entry was created, we need to sync the
6617 6620 * directory metadata.
6618 6621 */
6619 6622 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6620 6623
6621 6624 /*
6622 6625 * Get "after" change value, if it fails, simply return the
6623 6626 * before value.
6624 6627 */
6625 6628 ava.va_mask = AT_CTIME|AT_SEQ;
6626 6629 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6627 6630 ava.va_ctime = bva.va_ctime;
6628 6631 ava.va_seq = 0;
6629 6632 }
6630 6633
6631 6634 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6632 6635
6633 6636 /*
6634 6637 * The cinfo->atomic = TRUE only if we have
6635 6638 * non-zero va_seq's, and it has incremented by exactly one
6636 6639 * during the create_vnode and it didn't
6637 6640 * change during the VOP_FSYNC.
6638 6641 */
6639 6642 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6640 6643 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6641 6644 cinfo->atomic = TRUE;
6642 6645 else
6643 6646 cinfo->atomic = FALSE;
6644 6647 }
6645 6648
6646 6649 /* Check for mandatory locking and that the size gets set. */
6647 6650 cva.va_mask = AT_MODE;
6648 6651 if (setsize)
6649 6652 cva.va_mask |= AT_SIZE;
6650 6653
6651 6654 /* Assume the worst */
6652 6655 cs->mandlock = TRUE;
6653 6656
6654 6657 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6655 6658 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6656 6659
6657 6660 /*
6658 6661 * Truncate the file if necessary; this would be
6659 6662 * the case for create over an existing file.
6660 6663 */
6661 6664
6662 6665 if (trunc) {
6663 6666 int in_crit = 0;
6664 6667 rfs4_file_t *fp;
6665 6668 nfs4_srv_t *nsrv4;
6666 6669 bool_t create = FALSE;
6667 6670
6668 6671 /*
6669 6672 * We are writing over an existing file.
6670 6673 * Check to see if we need to recall a delegation.
6671 6674 */
6672 6675 nsrv4 = nfs4_get_srv();
6673 6676 rfs4_hold_deleg_policy(nsrv4);
6674 6677 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6675 6678 if (rfs4_check_delegated_byfp(FWRITE, fp,
6676 6679 (reqsize == 0), FALSE, FALSE, &clientid)) {
6677 6680 rfs4_file_rele(fp);
6678 6681 rfs4_rele_deleg_policy(nsrv4);
6679 6682 VN_RELE(vp);
6680 6683 *attrset = 0;
6681 6684 return (NFS4ERR_DELAY);
6682 6685 }
6683 6686 rfs4_file_rele(fp);
6684 6687 }
6685 6688 rfs4_rele_deleg_policy(nsrv4);
6686 6689
6687 6690 if (nbl_need_check(vp)) {
6688 6691 in_crit = 1;
6689 6692
6690 6693 ASSERT(reqsize == 0);
6691 6694
6692 6695 nbl_start_crit(vp, RW_READER);
6693 6696 if (nbl_conflict(vp, NBL_WRITE, 0,
6694 6697 cva.va_size, 0, NULL)) {
6695 6698 in_crit = 0;
6696 6699 nbl_end_crit(vp);
6697 6700 VN_RELE(vp);
6698 6701 *attrset = 0;
6699 6702 return (NFS4ERR_ACCESS);
6700 6703 }
6701 6704 }
6702 6705 ct.cc_sysid = 0;
6703 6706 ct.cc_pid = 0;
6704 6707 ct.cc_caller_id = nfs4_srv_caller_id;
6705 6708 ct.cc_flags = CC_DONTBLOCK;
6706 6709
6707 6710 cva.va_mask = AT_SIZE;
6708 6711 cva.va_size = reqsize;
6709 6712 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6710 6713 if (in_crit)
6711 6714 nbl_end_crit(vp);
6712 6715 }
6713 6716 }
6714 6717
6715 6718 error = makefh4(&cs->fh, vp, cs->exi);
6716 6719
6717 6720 /*
6718 6721 * Force modified data and metadata out to stable storage.
6719 6722 */
6720 6723 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6721 6724
6722 6725 if (error) {
6723 6726 VN_RELE(vp);
6724 6727 *attrset = 0;
6725 6728 return (puterrno4(error));
6726 6729 }
6727 6730
6728 6731 /* if parent dir is attrdir, set namedattr fh flag */
6729 6732 if (dvp->v_flag & V_XATTRDIR)
6730 6733 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6731 6734
6732 6735 if (cs->vp)
6733 6736 VN_RELE(cs->vp);
6734 6737
6735 6738 cs->vp = vp;
6736 6739
6737 6740 /*
6738 6741 * if we did not create the file, we will need to check
6739 6742 * the access bits on the file
6740 6743 */
6741 6744
6742 6745 if (!created) {
6743 6746 if (setsize)
6744 6747 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6745 6748 status = check_open_access(args->share_access, cs, req);
6746 6749 if (status != NFS4_OK)
6747 6750 *attrset = 0;
6748 6751 }
6749 6752 return (status);
6750 6753 }
6751 6754
6752 6755 /*ARGSUSED*/
6753 6756 static void
6754 6757 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6755 6758 rfs4_openowner_t *oo, delegreq_t deleg,
6756 6759 uint32_t access, uint32_t deny,
6757 6760 OPEN4res *resp, int deleg_cur)
6758 6761 {
6759 6762 /* XXX Currently not using req */
6760 6763 rfs4_state_t *sp;
6761 6764 rfs4_file_t *fp;
6762 6765 bool_t screate = TRUE;
6763 6766 bool_t fcreate = TRUE;
6764 6767 uint32_t open_a, share_a;
6765 6768 uint32_t open_d, share_d;
6766 6769 rfs4_deleg_state_t *dsp;
6767 6770 sysid_t sysid;
6768 6771 nfsstat4 status;
6769 6772 caller_context_t ct;
6770 6773 int fflags = 0;
6771 6774 int recall = 0;
6772 6775 int err;
6773 6776 int first_open;
6774 6777
6775 6778 /* get the file struct and hold a lock on it during initial open */
6776 6779 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6777 6780 if (fp == NULL) {
6778 6781 resp->status = NFS4ERR_RESOURCE;
6779 6782 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6780 6783 return;
6781 6784 }
6782 6785
6783 6786 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6784 6787 if (sp == NULL) {
6785 6788 resp->status = NFS4ERR_RESOURCE;
6786 6789 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6787 6790 /* No need to keep any reference */
6788 6791 rw_exit(&fp->rf_file_rwlock);
6789 6792 rfs4_file_rele(fp);
6790 6793 return;
6791 6794 }
6792 6795
6793 6796 /* try to get the sysid before continuing */
6794 6797 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6795 6798 resp->status = status;
6796 6799 rfs4_file_rele(fp);
6797 6800 /* Not a fully formed open; "close" it */
6798 6801 if (screate == TRUE)
6799 6802 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6800 6803 rfs4_state_rele(sp);
6801 6804 return;
6802 6805 }
6803 6806
6804 6807 /* Calculate the fflags for this OPEN. */
6805 6808 if (access & OPEN4_SHARE_ACCESS_READ)
6806 6809 fflags |= FREAD;
6807 6810 if (access & OPEN4_SHARE_ACCESS_WRITE)
6808 6811 fflags |= FWRITE;
6809 6812
6810 6813 rfs4_dbe_lock(sp->rs_dbe);
6811 6814
6812 6815 /*
6813 6816 * Calculate the new deny and access mode that this open is adding to
6814 6817 * the file for this open owner;
6815 6818 */
6816 6819 open_d = (deny & ~sp->rs_open_deny);
6817 6820 open_a = (access & ~sp->rs_open_access);
6818 6821
6819 6822 /*
6820 6823 * Calculate the new share access and share deny modes that this open
6821 6824 * is adding to the file for this open owner;
6822 6825 */
6823 6826 share_a = (access & ~sp->rs_share_access);
6824 6827 share_d = (deny & ~sp->rs_share_deny);
6825 6828
6826 6829 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6827 6830
6828 6831 /*
6829 6832 * Check to see the client has already sent an open for this
6830 6833 * open owner on this file with the same share/deny modes.
6831 6834 * If so, we don't need to check for a conflict and we don't
6832 6835 * need to add another shrlock. If not, then we need to
6833 6836 * check for conflicts in deny and access before checking for
6834 6837 * conflicts in delegation. We don't want to recall a
6835 6838 * delegation based on an open that will eventually fail based
6836 6839 * on shares modes.
6837 6840 */
6838 6841
6839 6842 if (share_a || share_d) {
6840 6843 if ((err = rfs4_share(sp, access, deny)) != 0) {
6841 6844 rfs4_dbe_unlock(sp->rs_dbe);
6842 6845 resp->status = err;
6843 6846
6844 6847 rfs4_file_rele(fp);
6845 6848 /* Not a fully formed open; "close" it */
6846 6849 if (screate == TRUE)
6847 6850 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6848 6851 rfs4_state_rele(sp);
6849 6852 return;
6850 6853 }
6851 6854 }
6852 6855
6853 6856 rfs4_dbe_lock(fp->rf_dbe);
6854 6857
6855 6858 /*
6856 6859 * Check to see if this file is delegated and if so, if a
6857 6860 * recall needs to be done.
6858 6861 */
6859 6862 if (rfs4_check_recall(sp, access)) {
6860 6863 rfs4_dbe_unlock(fp->rf_dbe);
6861 6864 rfs4_dbe_unlock(sp->rs_dbe);
6862 6865 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6863 6866 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6864 6867 rfs4_dbe_lock(sp->rs_dbe);
6865 6868
6866 6869 /* if state closed while lock was dropped */
6867 6870 if (sp->rs_closed) {
6868 6871 if (share_a || share_d)
6869 6872 (void) rfs4_unshare(sp);
6870 6873 rfs4_dbe_unlock(sp->rs_dbe);
6871 6874 rfs4_file_rele(fp);
6872 6875 /* Not a fully formed open; "close" it */
6873 6876 if (screate == TRUE)
6874 6877 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6875 6878 rfs4_state_rele(sp);
6876 6879 resp->status = NFS4ERR_OLD_STATEID;
6877 6880 return;
6878 6881 }
6879 6882
6880 6883 rfs4_dbe_lock(fp->rf_dbe);
6881 6884 /* Let's see if the delegation was returned */
6882 6885 if (rfs4_check_recall(sp, access)) {
6883 6886 rfs4_dbe_unlock(fp->rf_dbe);
6884 6887 if (share_a || share_d)
6885 6888 (void) rfs4_unshare(sp);
6886 6889 rfs4_dbe_unlock(sp->rs_dbe);
6887 6890 rfs4_file_rele(fp);
6888 6891 rfs4_update_lease(sp->rs_owner->ro_client);
6889 6892
6890 6893 /* Not a fully formed open; "close" it */
6891 6894 if (screate == TRUE)
6892 6895 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6893 6896 rfs4_state_rele(sp);
6894 6897 resp->status = NFS4ERR_DELAY;
6895 6898 return;
6896 6899 }
6897 6900 }
6898 6901 /*
6899 6902 * the share check passed and any delegation conflict has been
6900 6903 * taken care of, now call vop_open.
6901 6904 * if this is the first open then call vop_open with fflags.
6902 6905 * if not, call vn_open_upgrade with just the upgrade flags.
6903 6906 *
6904 6907 * if the file has been opened already, it will have the current
6905 6908 * access mode in the state struct. if it has no share access, then
6906 6909 * this is a new open.
6907 6910 *
6908 6911 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6909 6912 * call VOP_OPEN(), just do the open upgrade.
6910 6913 */
6911 6914 if (first_open && !deleg_cur) {
6912 6915 ct.cc_sysid = sysid;
6913 6916 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6914 6917 ct.cc_caller_id = nfs4_srv_caller_id;
6915 6918 ct.cc_flags = CC_DONTBLOCK;
6916 6919 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6917 6920 if (err) {
6918 6921 rfs4_dbe_unlock(fp->rf_dbe);
6919 6922 if (share_a || share_d)
6920 6923 (void) rfs4_unshare(sp);
6921 6924 rfs4_dbe_unlock(sp->rs_dbe);
6922 6925 rfs4_file_rele(fp);
6923 6926
6924 6927 /* Not a fully formed open; "close" it */
6925 6928 if (screate == TRUE)
6926 6929 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6927 6930 rfs4_state_rele(sp);
6928 6931 /* check if a monitor detected a delegation conflict */
6929 6932 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6930 6933 resp->status = NFS4ERR_DELAY;
6931 6934 else
6932 6935 resp->status = NFS4ERR_SERVERFAULT;
6933 6936 return;
6934 6937 }
6935 6938 } else { /* open upgrade */
6936 6939 /*
6937 6940 * calculate the fflags for the new mode that is being added
6938 6941 * by this upgrade.
6939 6942 */
6940 6943 fflags = 0;
6941 6944 if (open_a & OPEN4_SHARE_ACCESS_READ)
6942 6945 fflags |= FREAD;
6943 6946 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6944 6947 fflags |= FWRITE;
6945 6948 vn_open_upgrade(cs->vp, fflags);
6946 6949 }
6947 6950 sp->rs_open_access |= access;
6948 6951 sp->rs_open_deny |= deny;
6949 6952
6950 6953 if (open_d & OPEN4_SHARE_DENY_READ)
6951 6954 fp->rf_deny_read++;
6952 6955 if (open_d & OPEN4_SHARE_DENY_WRITE)
6953 6956 fp->rf_deny_write++;
6954 6957 fp->rf_share_deny |= deny;
6955 6958
6956 6959 if (open_a & OPEN4_SHARE_ACCESS_READ)
6957 6960 fp->rf_access_read++;
6958 6961 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6959 6962 fp->rf_access_write++;
6960 6963 fp->rf_share_access |= access;
6961 6964
6962 6965 /*
6963 6966 * Check for delegation here. if the deleg argument is not
6964 6967 * DELEG_ANY, then this is a reclaim from a client and
6965 6968 * we must honor the delegation requested. If necessary we can
6966 6969 * set the recall flag.
6967 6970 */
6968 6971
6969 6972 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6970 6973
6971 6974 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6972 6975
6973 6976 next_stateid(&sp->rs_stateid);
6974 6977
6975 6978 resp->stateid = sp->rs_stateid.stateid;
6976 6979
6977 6980 rfs4_dbe_unlock(fp->rf_dbe);
6978 6981 rfs4_dbe_unlock(sp->rs_dbe);
6979 6982
6980 6983 if (dsp) {
6981 6984 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6982 6985 rfs4_deleg_state_rele(dsp);
6983 6986 }
6984 6987
6985 6988 rfs4_file_rele(fp);
6986 6989 rfs4_state_rele(sp);
6987 6990
6988 6991 resp->status = NFS4_OK;
6989 6992 }
6990 6993
6991 6994 /*ARGSUSED*/
6992 6995 static void
6993 6996 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6994 6997 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6995 6998 {
6996 6999 change_info4 *cinfo = &resp->cinfo;
6997 7000 bitmap4 *attrset = &resp->attrset;
6998 7001
6999 7002 if (args->opentype == OPEN4_NOCREATE)
7000 7003 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
7001 7004 req, cs, args->share_access, cinfo);
7002 7005 else {
7003 7006 /* inhibit delegation grants during exclusive create */
7004 7007
7005 7008 if (args->mode == EXCLUSIVE4)
7006 7009 rfs4_disable_delegation();
7007 7010
7008 7011 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7009 7012 oo->ro_client->rc_clientid);
7010 7013 }
7011 7014
7012 7015 if (resp->status == NFS4_OK) {
7013 7016
7014 7017 /* cs->vp cs->fh now reference the desired file */
7015 7018
7016 7019 rfs4_do_open(cs, req, oo,
7017 7020 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7018 7021 args->share_access, args->share_deny, resp, 0);
7019 7022
7020 7023 /*
7021 7024 * If rfs4_createfile set attrset, we must
7022 7025 * clear this attrset before the response is copied.
7023 7026 */
7024 7027 if (resp->status != NFS4_OK && resp->attrset) {
7025 7028 resp->attrset = 0;
7026 7029 }
7027 7030 }
7028 7031 else
7029 7032 *cs->statusp = resp->status;
7030 7033
7031 7034 if (args->mode == EXCLUSIVE4)
7032 7035 rfs4_enable_delegation();
7033 7036 }
7034 7037
7035 7038 /*ARGSUSED*/
7036 7039 static void
7037 7040 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7038 7041 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7039 7042 {
7040 7043 change_info4 *cinfo = &resp->cinfo;
7041 7044 vattr_t va;
7042 7045 vtype_t v_type = cs->vp->v_type;
7043 7046 int error = 0;
7044 7047
7045 7048 /* Verify that we have a regular file */
7046 7049 if (v_type != VREG) {
7047 7050 if (v_type == VDIR)
7048 7051 resp->status = NFS4ERR_ISDIR;
7049 7052 else if (v_type == VLNK)
7050 7053 resp->status = NFS4ERR_SYMLINK;
7051 7054 else
7052 7055 resp->status = NFS4ERR_INVAL;
7053 7056 return;
7054 7057 }
7055 7058
7056 7059 va.va_mask = AT_MODE|AT_UID;
7057 7060 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7058 7061 if (error) {
7059 7062 resp->status = puterrno4(error);
7060 7063 return;
7061 7064 }
7062 7065
7063 7066 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7064 7067
7065 7068 /*
7066 7069 * Check if we have access to the file, Note the the file
7067 7070 * could have originally been open UNCHECKED or GUARDED
7068 7071 * with mode bits that will now fail, but there is nothing
7069 7072 * we can really do about that except in the case that the
7070 7073 * owner of the file is the one requesting the open.
7071 7074 */
7072 7075 if (crgetuid(cs->cr) != va.va_uid) {
7073 7076 resp->status = check_open_access(args->share_access, cs, req);
7074 7077 if (resp->status != NFS4_OK) {
7075 7078 return;
7076 7079 }
7077 7080 }
7078 7081
7079 7082 /*
7080 7083 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7081 7084 */
7082 7085 cinfo->before = 0;
7083 7086 cinfo->after = 0;
7084 7087 cinfo->atomic = FALSE;
7085 7088
7086 7089 rfs4_do_open(cs, req, oo,
7087 7090 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7088 7091 args->share_access, args->share_deny, resp, 0);
7089 7092 }
7090 7093
7091 7094 static void
7092 7095 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7093 7096 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7094 7097 {
7095 7098 int error;
7096 7099 nfsstat4 status;
7097 7100 stateid4 stateid =
7098 7101 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7099 7102 rfs4_deleg_state_t *dsp;
7100 7103
7101 7104 /*
7102 7105 * Find the state info from the stateid and confirm that the
7103 7106 * file is delegated. If the state openowner is the same as
7104 7107 * the supplied openowner we're done. If not, get the file
7105 7108 * info from the found state info. Use that file info to
7106 7109 * create the state for this lock owner. Note solaris doen't
7107 7110 * really need the pathname to find the file. We may want to
7108 7111 * lookup the pathname and make sure that the vp exist and
7109 7112 * matches the vp in the file structure. However it is
7110 7113 * possible that the pathname nolonger exists (local process
7111 7114 * unlinks the file), so this may not be that useful.
7112 7115 */
7113 7116
7114 7117 status = rfs4_get_deleg_state(&stateid, &dsp);
7115 7118 if (status != NFS4_OK) {
7116 7119 resp->status = status;
7117 7120 return;
7118 7121 }
7119 7122
7120 7123 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7121 7124
7122 7125 /*
7123 7126 * New lock owner, create state. Since this was probably called
7124 7127 * in response to a CB_RECALL we set deleg to DELEG_NONE
7125 7128 */
7126 7129
7127 7130 ASSERT(cs->vp != NULL);
7128 7131 VN_RELE(cs->vp);
7129 7132 VN_HOLD(dsp->rds_finfo->rf_vp);
7130 7133 cs->vp = dsp->rds_finfo->rf_vp;
7131 7134
7132 7135 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7133 7136 rfs4_deleg_state_rele(dsp);
7134 7137 *cs->statusp = resp->status = puterrno4(error);
7135 7138 return;
7136 7139 }
7137 7140
7138 7141 /* Mark progress for delegation returns */
7139 7142 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7140 7143 rfs4_deleg_state_rele(dsp);
7141 7144 rfs4_do_open(cs, req, oo, DELEG_NONE,
7142 7145 args->share_access, args->share_deny, resp, 1);
7143 7146 }
7144 7147
7145 7148 /*ARGSUSED*/
7146 7149 static void
7147 7150 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7148 7151 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7149 7152 {
7150 7153 /*
7151 7154 * Lookup the pathname, it must already exist since this file
7152 7155 * was delegated.
7153 7156 *
7154 7157 * Find the file and state info for this vp and open owner pair.
7155 7158 * check that they are in fact delegated.
7156 7159 * check that the state access and deny modes are the same.
7157 7160 *
7158 7161 * Return the delgation possibly seting the recall flag.
7159 7162 */
7160 7163 rfs4_file_t *fp;
7161 7164 rfs4_state_t *sp;
7162 7165 bool_t create = FALSE;
7163 7166 bool_t dcreate = FALSE;
7164 7167 rfs4_deleg_state_t *dsp;
7165 7168 nfsace4 *ace;
7166 7169
7167 7170 /* Note we ignore oflags */
7168 7171 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7169 7172 req, cs, args->share_access, &resp->cinfo);
7170 7173
7171 7174 if (resp->status != NFS4_OK) {
7172 7175 return;
7173 7176 }
7174 7177
7175 7178 /* get the file struct and hold a lock on it during initial open */
7176 7179 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7177 7180 if (fp == NULL) {
7178 7181 resp->status = NFS4ERR_RESOURCE;
7179 7182 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7180 7183 return;
7181 7184 }
7182 7185
7183 7186 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7184 7187 if (sp == NULL) {
7185 7188 resp->status = NFS4ERR_SERVERFAULT;
7186 7189 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7187 7190 rw_exit(&fp->rf_file_rwlock);
7188 7191 rfs4_file_rele(fp);
7189 7192 return;
7190 7193 }
7191 7194
7192 7195 rfs4_dbe_lock(sp->rs_dbe);
7193 7196 rfs4_dbe_lock(fp->rf_dbe);
7194 7197 if (args->share_access != sp->rs_share_access ||
7195 7198 args->share_deny != sp->rs_share_deny ||
7196 7199 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7197 7200 NFS4_DEBUG(rfs4_debug,
7198 7201 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7199 7202 rfs4_dbe_unlock(fp->rf_dbe);
7200 7203 rfs4_dbe_unlock(sp->rs_dbe);
7201 7204 rfs4_file_rele(fp);
7202 7205 rfs4_state_rele(sp);
7203 7206 resp->status = NFS4ERR_SERVERFAULT;
7204 7207 return;
7205 7208 }
7206 7209 rfs4_dbe_unlock(fp->rf_dbe);
7207 7210 rfs4_dbe_unlock(sp->rs_dbe);
7208 7211
7209 7212 dsp = rfs4_finddeleg(sp, &dcreate);
7210 7213 if (dsp == NULL) {
7211 7214 rfs4_state_rele(sp);
7212 7215 rfs4_file_rele(fp);
7213 7216 resp->status = NFS4ERR_SERVERFAULT;
7214 7217 return;
7215 7218 }
7216 7219
7217 7220 next_stateid(&sp->rs_stateid);
7218 7221
7219 7222 resp->stateid = sp->rs_stateid.stateid;
7220 7223
7221 7224 resp->delegation.delegation_type = dsp->rds_dtype;
7222 7225
7223 7226 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7224 7227 open_read_delegation4 *rv =
7225 7228 &resp->delegation.open_delegation4_u.read;
7226 7229
7227 7230 rv->stateid = dsp->rds_delegid.stateid;
7228 7231 rv->recall = FALSE; /* no policy in place to set to TRUE */
7229 7232 ace = &rv->permissions;
7230 7233 } else {
7231 7234 open_write_delegation4 *rv =
7232 7235 &resp->delegation.open_delegation4_u.write;
7233 7236
7234 7237 rv->stateid = dsp->rds_delegid.stateid;
7235 7238 rv->recall = FALSE; /* no policy in place to set to TRUE */
7236 7239 ace = &rv->permissions;
7237 7240 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7238 7241 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7239 7242 }
7240 7243
7241 7244 /* XXX For now */
7242 7245 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7243 7246 ace->flag = 0;
7244 7247 ace->access_mask = 0;
7245 7248 ace->who.utf8string_len = 0;
7246 7249 ace->who.utf8string_val = 0;
7247 7250
7248 7251 rfs4_deleg_state_rele(dsp);
7249 7252 rfs4_state_rele(sp);
7250 7253 rfs4_file_rele(fp);
7251 7254 }
7252 7255
7253 7256 typedef enum {
7254 7257 NFS4_CHKSEQ_OKAY = 0,
7255 7258 NFS4_CHKSEQ_REPLAY = 1,
7256 7259 NFS4_CHKSEQ_BAD = 2
7257 7260 } rfs4_chkseq_t;
7258 7261
7259 7262 /*
7260 7263 * Generic function for sequence number checks.
7261 7264 */
7262 7265 static rfs4_chkseq_t
7263 7266 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7264 7267 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7265 7268 {
7266 7269 /* Same sequence ids and matching operations? */
7267 7270 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7268 7271 if (copyres == TRUE) {
7269 7272 rfs4_free_reply(resop);
7270 7273 rfs4_copy_reply(resop, lastop);
7271 7274 }
7272 7275 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7273 7276 "Replayed SEQID %d\n", seqid));
7274 7277 return (NFS4_CHKSEQ_REPLAY);
7275 7278 }
7276 7279
7277 7280 /* If the incoming sequence is not the next expected then it is bad */
7278 7281 if (rqst_seq != seqid + 1) {
7279 7282 if (rqst_seq == seqid) {
7280 7283 NFS4_DEBUG(rfs4_debug,
7281 7284 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7282 7285 "but last op was %d current op is %d\n",
7283 7286 lastop->resop, resop->resop));
7284 7287 return (NFS4_CHKSEQ_BAD);
7285 7288 }
7286 7289 NFS4_DEBUG(rfs4_debug,
7287 7290 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7288 7291 rqst_seq, seqid));
7289 7292 return (NFS4_CHKSEQ_BAD);
7290 7293 }
7291 7294
7292 7295 /* Everything okay -- next expected */
7293 7296 return (NFS4_CHKSEQ_OKAY);
7294 7297 }
7295 7298
7296 7299
7297 7300 static rfs4_chkseq_t
7298 7301 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7299 7302 {
7300 7303 rfs4_chkseq_t rc;
7301 7304
7302 7305 rfs4_dbe_lock(op->ro_dbe);
7303 7306 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7304 7307 TRUE);
7305 7308 rfs4_dbe_unlock(op->ro_dbe);
7306 7309
7307 7310 if (rc == NFS4_CHKSEQ_OKAY)
7308 7311 rfs4_update_lease(op->ro_client);
7309 7312
7310 7313 return (rc);
7311 7314 }
7312 7315
7313 7316 static rfs4_chkseq_t
7314 7317 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7315 7318 {
7316 7319 rfs4_chkseq_t rc;
7317 7320
7318 7321 rfs4_dbe_lock(op->ro_dbe);
7319 7322 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7320 7323 olo_seqid, resop, FALSE);
7321 7324 rfs4_dbe_unlock(op->ro_dbe);
7322 7325
7323 7326 return (rc);
7324 7327 }
7325 7328
7326 7329 static rfs4_chkseq_t
7327 7330 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7328 7331 {
7329 7332 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7330 7333
7331 7334 rfs4_dbe_lock(lsp->rls_dbe);
7332 7335 if (!lsp->rls_skip_seqid_check)
7333 7336 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7334 7337 resop, TRUE);
7335 7338 rfs4_dbe_unlock(lsp->rls_dbe);
7336 7339
7337 7340 return (rc);
7338 7341 }
7339 7342
7340 7343 static void
7341 7344 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7342 7345 struct svc_req *req, struct compound_state *cs)
7343 7346 {
7344 7347 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7345 7348 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7346 7349 open_owner4 *owner = &args->owner;
7347 7350 open_claim_type4 claim = args->claim;
7348 7351 rfs4_client_t *cp;
7349 7352 rfs4_openowner_t *oo;
7350 7353 bool_t create;
7351 7354 bool_t replay = FALSE;
7352 7355 int can_reclaim;
7353 7356
7354 7357 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7355 7358 OPEN4args *, args);
7356 7359
7357 7360 if (cs->vp == NULL) {
7358 7361 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7359 7362 goto end;
7360 7363 }
7361 7364
7362 7365 /*
7363 7366 * Need to check clientid and lease expiration first based on
7364 7367 * error ordering and incrementing sequence id.
7365 7368 */
7366 7369 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7367 7370 if (cp == NULL) {
7368 7371 *cs->statusp = resp->status =
7369 7372 rfs4_check_clientid(&owner->clientid, 0);
7370 7373 goto end;
7371 7374 }
7372 7375
7373 7376 if (rfs4_lease_expired(cp)) {
7374 7377 rfs4_client_close(cp);
7375 7378 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7376 7379 goto end;
7377 7380 }
7378 7381 can_reclaim = cp->rc_can_reclaim;
7379 7382
7380 7383 /*
7381 7384 * Find the open_owner for use from this point forward. Take
7382 7385 * care in updating the sequence id based on the type of error
7383 7386 * being returned.
7384 7387 */
7385 7388 retry:
7386 7389 create = TRUE;
7387 7390 oo = rfs4_findopenowner(owner, &create, args->seqid);
7388 7391 if (oo == NULL) {
7389 7392 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7390 7393 rfs4_client_rele(cp);
7391 7394 goto end;
7392 7395 }
7393 7396
7394 7397 /* Hold off access to the sequence space while the open is done */
7395 7398 rfs4_sw_enter(&oo->ro_sw);
7396 7399
7397 7400 /*
7398 7401 * If the open_owner existed before at the server, then check
7399 7402 * the sequence id.
7400 7403 */
7401 7404 if (!create && !oo->ro_postpone_confirm) {
7402 7405 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7403 7406 case NFS4_CHKSEQ_BAD:
7404 7407 if ((args->seqid > oo->ro_open_seqid) &&
7405 7408 oo->ro_need_confirm) {
7406 7409 rfs4_free_opens(oo, TRUE, FALSE);
7407 7410 rfs4_sw_exit(&oo->ro_sw);
7408 7411 rfs4_openowner_rele(oo);
7409 7412 goto retry;
7410 7413 }
7411 7414 resp->status = NFS4ERR_BAD_SEQID;
7412 7415 goto out;
7413 7416 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7414 7417 replay = TRUE;
7415 7418 goto out;
7416 7419 default:
7417 7420 break;
7418 7421 }
7419 7422
7420 7423 /*
7421 7424 * Sequence was ok and open owner exists
7422 7425 * check to see if we have yet to see an
7423 7426 * open_confirm.
7424 7427 */
7425 7428 if (oo->ro_need_confirm) {
7426 7429 rfs4_free_opens(oo, TRUE, FALSE);
7427 7430 rfs4_sw_exit(&oo->ro_sw);
7428 7431 rfs4_openowner_rele(oo);
7429 7432 goto retry;
7430 7433 }
7431 7434 }
7432 7435 /* Grace only applies to regular-type OPENs */
7433 7436 if (rfs4_clnt_in_grace(cp) &&
7434 7437 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7435 7438 *cs->statusp = resp->status = NFS4ERR_GRACE;
7436 7439 goto out;
7437 7440 }
7438 7441
7439 7442 /*
7440 7443 * If previous state at the server existed then can_reclaim
7441 7444 * will be set. If not reply NFS4ERR_NO_GRACE to the
7442 7445 * client.
7443 7446 */
7444 7447 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7445 7448 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7446 7449 goto out;
7447 7450 }
7448 7451
7449 7452
7450 7453 /*
7451 7454 * Reject the open if the client has missed the grace period
7452 7455 */
7453 7456 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7454 7457 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7455 7458 goto out;
7456 7459 }
7457 7460
7458 7461 /* Couple of up-front bookkeeping items */
7459 7462 if (oo->ro_need_confirm) {
7460 7463 /*
7461 7464 * If this is a reclaim OPEN then we should not ask
7462 7465 * for a confirmation of the open_owner per the
7463 7466 * protocol specification.
7464 7467 */
7465 7468 if (claim == CLAIM_PREVIOUS)
7466 7469 oo->ro_need_confirm = FALSE;
7467 7470 else
7468 7471 resp->rflags |= OPEN4_RESULT_CONFIRM;
7469 7472 }
7470 7473 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7471 7474
7472 7475 /*
7473 7476 * If there is an unshared filesystem mounted on this vnode,
7474 7477 * do not allow to open/create in this directory.
7475 7478 */
7476 7479 if (vn_ismntpt(cs->vp)) {
7477 7480 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7478 7481 goto out;
7479 7482 }
7480 7483
7481 7484 /*
7482 7485 * access must READ, WRITE, or BOTH. No access is invalid.
7483 7486 * deny can be READ, WRITE, BOTH, or NONE.
7484 7487 * bits not defined for access/deny are invalid.
7485 7488 */
7486 7489 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7487 7490 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7488 7491 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7489 7492 *cs->statusp = resp->status = NFS4ERR_INVAL;
7490 7493 goto out;
7491 7494 }
7492 7495
7493 7496
7494 7497 /*
7495 7498 * make sure attrset is zero before response is built.
7496 7499 */
7497 7500 resp->attrset = 0;
7498 7501
7499 7502 switch (claim) {
7500 7503 case CLAIM_NULL:
7501 7504 rfs4_do_opennull(cs, req, args, oo, resp);
7502 7505 break;
7503 7506 case CLAIM_PREVIOUS:
7504 7507 rfs4_do_openprev(cs, req, args, oo, resp);
7505 7508 break;
7506 7509 case CLAIM_DELEGATE_CUR:
7507 7510 rfs4_do_opendelcur(cs, req, args, oo, resp);
7508 7511 break;
7509 7512 case CLAIM_DELEGATE_PREV:
7510 7513 rfs4_do_opendelprev(cs, req, args, oo, resp);
7511 7514 break;
7512 7515 default:
7513 7516 resp->status = NFS4ERR_INVAL;
7514 7517 break;
7515 7518 }
7516 7519
7517 7520 out:
7518 7521 rfs4_client_rele(cp);
7519 7522
7520 7523 /* Catch sequence id handling here to make it a little easier */
7521 7524 switch (resp->status) {
7522 7525 case NFS4ERR_BADXDR:
7523 7526 case NFS4ERR_BAD_SEQID:
7524 7527 case NFS4ERR_BAD_STATEID:
7525 7528 case NFS4ERR_NOFILEHANDLE:
7526 7529 case NFS4ERR_RESOURCE:
7527 7530 case NFS4ERR_STALE_CLIENTID:
7528 7531 case NFS4ERR_STALE_STATEID:
7529 7532 /*
7530 7533 * The protocol states that if any of these errors are
7531 7534 * being returned, the sequence id should not be
7532 7535 * incremented. Any other return requires an
7533 7536 * increment.
7534 7537 */
7535 7538 break;
7536 7539 default:
7537 7540 /* Always update the lease in this case */
7538 7541 rfs4_update_lease(oo->ro_client);
7539 7542
7540 7543 /* Regular response - copy the result */
7541 7544 if (!replay)
7542 7545 rfs4_update_open_resp(oo, resop, &cs->fh);
7543 7546
7544 7547 /*
7545 7548 * REPLAY case: Only if the previous response was OK
7546 7549 * do we copy the filehandle. If not OK, no
7547 7550 * filehandle to copy.
7548 7551 */
7549 7552 if (replay == TRUE &&
7550 7553 resp->status == NFS4_OK &&
7551 7554 oo->ro_reply_fh.nfs_fh4_val) {
7552 7555 /*
7553 7556 * If this is a replay, we must restore the
7554 7557 * current filehandle/vp to that of what was
7555 7558 * returned originally. Try our best to do
7556 7559 * it.
7557 7560 */
7558 7561 nfs_fh4_fmt_t *fh_fmtp =
7559 7562 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7560 7563
7561 7564 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7562 7565 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7563 7566
7564 7567 if (cs->exi == NULL) {
7565 7568 resp->status = NFS4ERR_STALE;
7566 7569 goto finish;
7567 7570 }
7568 7571
7569 7572 VN_RELE(cs->vp);
7570 7573
7571 7574 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7572 7575 &resp->status);
7573 7576
7574 7577 if (cs->vp == NULL)
7575 7578 goto finish;
7576 7579
7577 7580 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7578 7581 }
7579 7582
7580 7583 /*
7581 7584 * If this was a replay, no need to update the
7582 7585 * sequence id. If the open_owner was not created on
7583 7586 * this pass, then update. The first use of an
7584 7587 * open_owner will not bump the sequence id.
7585 7588 */
7586 7589 if (replay == FALSE && !create)
7587 7590 rfs4_update_open_sequence(oo);
7588 7591 /*
7589 7592 * If the client is receiving an error and the
7590 7593 * open_owner needs to be confirmed, there is no way
7591 7594 * to notify the client of this fact ignoring the fact
7592 7595 * that the server has no method of returning a
7593 7596 * stateid to confirm. Therefore, the server needs to
7594 7597 * mark this open_owner in a way as to avoid the
7595 7598 * sequence id checking the next time the client uses
7596 7599 * this open_owner.
7597 7600 */
7598 7601 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7599 7602 oo->ro_postpone_confirm = TRUE;
7600 7603 /*
7601 7604 * If OK response then clear the postpone flag and
7602 7605 * reset the sequence id to keep in sync with the
7603 7606 * client.
7604 7607 */
7605 7608 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7606 7609 oo->ro_postpone_confirm = FALSE;
7607 7610 oo->ro_open_seqid = args->seqid;
7608 7611 }
7609 7612 break;
7610 7613 }
7611 7614
7612 7615 finish:
7613 7616 *cs->statusp = resp->status;
7614 7617
7615 7618 rfs4_sw_exit(&oo->ro_sw);
7616 7619 rfs4_openowner_rele(oo);
7617 7620
7618 7621 end:
7619 7622 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7620 7623 OPEN4res *, resp);
7621 7624 }
7622 7625
7623 7626 /*ARGSUSED*/
7624 7627 void
7625 7628 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7626 7629 struct svc_req *req, struct compound_state *cs)
7627 7630 {
7628 7631 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7629 7632 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7630 7633 rfs4_state_t *sp;
7631 7634 nfsstat4 status;
7632 7635
7633 7636 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7634 7637 OPEN_CONFIRM4args *, args);
7635 7638
7636 7639 if (cs->vp == NULL) {
7637 7640 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7638 7641 goto out;
7639 7642 }
7640 7643
7641 7644 if (cs->vp->v_type != VREG) {
7642 7645 *cs->statusp = resp->status =
7643 7646 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7644 7647 return;
7645 7648 }
7646 7649
7647 7650 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7648 7651 if (status != NFS4_OK) {
7649 7652 *cs->statusp = resp->status = status;
7650 7653 goto out;
7651 7654 }
7652 7655
7653 7656 /* Ensure specified filehandle matches */
7654 7657 if (cs->vp != sp->rs_finfo->rf_vp) {
7655 7658 rfs4_state_rele(sp);
7656 7659 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7657 7660 goto out;
7658 7661 }
7659 7662
7660 7663 /* hold off other access to open_owner while we tinker */
7661 7664 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7662 7665
7663 7666 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7664 7667 case NFS4_CHECK_STATEID_OKAY:
7665 7668 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7666 7669 resop) != 0) {
7667 7670 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7668 7671 break;
7669 7672 }
7670 7673 /*
7671 7674 * If it is the appropriate stateid and determined to
7672 7675 * be "OKAY" then this means that the stateid does not
7673 7676 * need to be confirmed and the client is in error for
7674 7677 * sending an OPEN_CONFIRM.
7675 7678 */
7676 7679 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7677 7680 break;
7678 7681 case NFS4_CHECK_STATEID_OLD:
7679 7682 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7680 7683 break;
7681 7684 case NFS4_CHECK_STATEID_BAD:
7682 7685 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7683 7686 break;
7684 7687 case NFS4_CHECK_STATEID_EXPIRED:
7685 7688 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7686 7689 break;
7687 7690 case NFS4_CHECK_STATEID_CLOSED:
7688 7691 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7689 7692 break;
7690 7693 case NFS4_CHECK_STATEID_REPLAY:
7691 7694 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7692 7695 resop)) {
7693 7696 case NFS4_CHKSEQ_OKAY:
7694 7697 /*
7695 7698 * This is replayed stateid; if seqid matches
7696 7699 * next expected, then client is using wrong seqid.
7697 7700 */
7698 7701 /* fall through */
7699 7702 case NFS4_CHKSEQ_BAD:
7700 7703 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7701 7704 break;
7702 7705 case NFS4_CHKSEQ_REPLAY:
7703 7706 /*
7704 7707 * Note this case is the duplicate case so
7705 7708 * resp->status is already set.
7706 7709 */
7707 7710 *cs->statusp = resp->status;
7708 7711 rfs4_update_lease(sp->rs_owner->ro_client);
7709 7712 break;
7710 7713 }
7711 7714 break;
7712 7715 case NFS4_CHECK_STATEID_UNCONFIRMED:
7713 7716 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7714 7717 resop) != NFS4_CHKSEQ_OKAY) {
7715 7718 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7716 7719 break;
7717 7720 }
7718 7721 *cs->statusp = resp->status = NFS4_OK;
7719 7722
7720 7723 next_stateid(&sp->rs_stateid);
7721 7724 resp->open_stateid = sp->rs_stateid.stateid;
7722 7725 sp->rs_owner->ro_need_confirm = FALSE;
7723 7726 rfs4_update_lease(sp->rs_owner->ro_client);
7724 7727 rfs4_update_open_sequence(sp->rs_owner);
7725 7728 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7726 7729 break;
7727 7730 default:
7728 7731 ASSERT(FALSE);
7729 7732 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7730 7733 break;
7731 7734 }
7732 7735 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7733 7736 rfs4_state_rele(sp);
7734 7737
7735 7738 out:
7736 7739 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7737 7740 OPEN_CONFIRM4res *, resp);
7738 7741 }
7739 7742
7740 7743 /*ARGSUSED*/
7741 7744 void
7742 7745 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7743 7746 struct svc_req *req, struct compound_state *cs)
7744 7747 {
7745 7748 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7746 7749 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7747 7750 uint32_t access = args->share_access;
7748 7751 uint32_t deny = args->share_deny;
7749 7752 nfsstat4 status;
7750 7753 rfs4_state_t *sp;
7751 7754 rfs4_file_t *fp;
7752 7755 int fflags = 0;
7753 7756
7754 7757 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7755 7758 OPEN_DOWNGRADE4args *, args);
7756 7759
7757 7760 if (cs->vp == NULL) {
7758 7761 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7759 7762 goto out;
7760 7763 }
7761 7764
7762 7765 if (cs->vp->v_type != VREG) {
7763 7766 *cs->statusp = resp->status = NFS4ERR_INVAL;
7764 7767 return;
7765 7768 }
7766 7769
7767 7770 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7768 7771 if (status != NFS4_OK) {
7769 7772 *cs->statusp = resp->status = status;
7770 7773 goto out;
7771 7774 }
7772 7775
7773 7776 /* Ensure specified filehandle matches */
7774 7777 if (cs->vp != sp->rs_finfo->rf_vp) {
7775 7778 rfs4_state_rele(sp);
7776 7779 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7777 7780 goto out;
7778 7781 }
7779 7782
7780 7783 /* hold off other access to open_owner while we tinker */
7781 7784 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7782 7785
7783 7786 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7784 7787 case NFS4_CHECK_STATEID_OKAY:
7785 7788 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7786 7789 resop) != NFS4_CHKSEQ_OKAY) {
7787 7790 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7788 7791 goto end;
7789 7792 }
7790 7793 break;
7791 7794 case NFS4_CHECK_STATEID_OLD:
7792 7795 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7793 7796 goto end;
7794 7797 case NFS4_CHECK_STATEID_BAD:
7795 7798 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7796 7799 goto end;
7797 7800 case NFS4_CHECK_STATEID_EXPIRED:
7798 7801 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7799 7802 goto end;
7800 7803 case NFS4_CHECK_STATEID_CLOSED:
7801 7804 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7802 7805 goto end;
7803 7806 case NFS4_CHECK_STATEID_UNCONFIRMED:
7804 7807 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7805 7808 goto end;
7806 7809 case NFS4_CHECK_STATEID_REPLAY:
7807 7810 /* Check the sequence id for the open owner */
7808 7811 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7809 7812 resop)) {
7810 7813 case NFS4_CHKSEQ_OKAY:
7811 7814 /*
7812 7815 * This is replayed stateid; if seqid matches
7813 7816 * next expected, then client is using wrong seqid.
7814 7817 */
7815 7818 /* fall through */
7816 7819 case NFS4_CHKSEQ_BAD:
7817 7820 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7818 7821 goto end;
7819 7822 case NFS4_CHKSEQ_REPLAY:
7820 7823 /*
7821 7824 * Note this case is the duplicate case so
7822 7825 * resp->status is already set.
7823 7826 */
7824 7827 *cs->statusp = resp->status;
7825 7828 rfs4_update_lease(sp->rs_owner->ro_client);
7826 7829 goto end;
7827 7830 }
7828 7831 break;
7829 7832 default:
7830 7833 ASSERT(FALSE);
7831 7834 break;
7832 7835 }
7833 7836
7834 7837 rfs4_dbe_lock(sp->rs_dbe);
7835 7838 /*
7836 7839 * Check that the new access modes and deny modes are valid.
7837 7840 * Check that no invalid bits are set.
7838 7841 */
7839 7842 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7840 7843 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7841 7844 *cs->statusp = resp->status = NFS4ERR_INVAL;
7842 7845 rfs4_update_open_sequence(sp->rs_owner);
7843 7846 rfs4_dbe_unlock(sp->rs_dbe);
7844 7847 goto end;
7845 7848 }
7846 7849
7847 7850 /*
7848 7851 * The new modes must be a subset of the current modes and
7849 7852 * the access must specify at least one mode. To test that
7850 7853 * the new mode is a subset of the current modes we bitwise
7851 7854 * AND them together and check that the result equals the new
7852 7855 * mode. For example:
7853 7856 * New mode, access == R and current mode, sp->rs_open_access == RW
7854 7857 * access & sp->rs_open_access == R == access, so the new access mode
7855 7858 * is valid. Consider access == RW, sp->rs_open_access = R
7856 7859 * access & sp->rs_open_access == R != access, so the new access mode
7857 7860 * is invalid.
7858 7861 */
7859 7862 if ((access & sp->rs_open_access) != access ||
7860 7863 (deny & sp->rs_open_deny) != deny ||
7861 7864 (access &
7862 7865 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7863 7866 *cs->statusp = resp->status = NFS4ERR_INVAL;
7864 7867 rfs4_update_open_sequence(sp->rs_owner);
7865 7868 rfs4_dbe_unlock(sp->rs_dbe);
7866 7869 goto end;
7867 7870 }
7868 7871
7869 7872 /*
7870 7873 * Release any share locks associated with this stateID.
7871 7874 * Strictly speaking, this violates the spec because the
7872 7875 * spec effectively requires that open downgrade be atomic.
7873 7876 * At present, fs_shrlock does not have this capability.
7874 7877 */
7875 7878 (void) rfs4_unshare(sp);
7876 7879
7877 7880 status = rfs4_share(sp, access, deny);
7878 7881 if (status != NFS4_OK) {
7879 7882 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7880 7883 rfs4_update_open_sequence(sp->rs_owner);
7881 7884 rfs4_dbe_unlock(sp->rs_dbe);
7882 7885 goto end;
7883 7886 }
7884 7887
7885 7888 fp = sp->rs_finfo;
7886 7889 rfs4_dbe_lock(fp->rf_dbe);
7887 7890
7888 7891 /*
7889 7892 * If the current mode has deny read and the new mode
7890 7893 * does not, decrement the number of deny read mode bits
7891 7894 * and if it goes to zero turn off the deny read bit
7892 7895 * on the file.
7893 7896 */
7894 7897 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7895 7898 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7896 7899 fp->rf_deny_read--;
7897 7900 if (fp->rf_deny_read == 0)
7898 7901 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7899 7902 }
7900 7903
7901 7904 /*
7902 7905 * If the current mode has deny write and the new mode
7903 7906 * does not, decrement the number of deny write mode bits
7904 7907 * and if it goes to zero turn off the deny write bit
7905 7908 * on the file.
7906 7909 */
7907 7910 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7908 7911 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7909 7912 fp->rf_deny_write--;
7910 7913 if (fp->rf_deny_write == 0)
7911 7914 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7912 7915 }
7913 7916
7914 7917 /*
7915 7918 * If the current mode has access read and the new mode
7916 7919 * does not, decrement the number of access read mode bits
7917 7920 * and if it goes to zero turn off the access read bit
7918 7921 * on the file. set fflags to FREAD for the call to
7919 7922 * vn_open_downgrade().
7920 7923 */
7921 7924 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7922 7925 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7923 7926 fp->rf_access_read--;
7924 7927 if (fp->rf_access_read == 0)
7925 7928 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7926 7929 fflags |= FREAD;
7927 7930 }
7928 7931
7929 7932 /*
7930 7933 * If the current mode has access write and the new mode
7931 7934 * does not, decrement the number of access write mode bits
7932 7935 * and if it goes to zero turn off the access write bit
7933 7936 * on the file. set fflags to FWRITE for the call to
7934 7937 * vn_open_downgrade().
7935 7938 */
7936 7939 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7937 7940 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7938 7941 fp->rf_access_write--;
7939 7942 if (fp->rf_access_write == 0)
7940 7943 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7941 7944 fflags |= FWRITE;
7942 7945 }
7943 7946
7944 7947 /* Check that the file is still accessible */
7945 7948 ASSERT(fp->rf_share_access);
7946 7949
7947 7950 rfs4_dbe_unlock(fp->rf_dbe);
7948 7951
7949 7952 /* now set the new open access and deny modes */
7950 7953 sp->rs_open_access = access;
7951 7954 sp->rs_open_deny = deny;
7952 7955
7953 7956 /*
7954 7957 * we successfully downgraded the share lock, now we need to downgrade
7955 7958 * the open. it is possible that the downgrade was only for a deny
7956 7959 * mode and we have nothing else to do.
7957 7960 */
7958 7961 if ((fflags & (FREAD|FWRITE)) != 0)
7959 7962 vn_open_downgrade(cs->vp, fflags);
7960 7963
7961 7964 /* Update the stateid */
7962 7965 next_stateid(&sp->rs_stateid);
7963 7966 resp->open_stateid = sp->rs_stateid.stateid;
7964 7967
7965 7968 rfs4_dbe_unlock(sp->rs_dbe);
7966 7969
7967 7970 *cs->statusp = resp->status = NFS4_OK;
7968 7971 /* Update the lease */
7969 7972 rfs4_update_lease(sp->rs_owner->ro_client);
7970 7973 /* And the sequence */
7971 7974 rfs4_update_open_sequence(sp->rs_owner);
7972 7975 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7973 7976
7974 7977 end:
7975 7978 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7976 7979 rfs4_state_rele(sp);
7977 7980 out:
7978 7981 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7979 7982 OPEN_DOWNGRADE4res *, resp);
7980 7983 }
7981 7984
7982 7985 static void *
7983 7986 memstr(const void *s1, const char *s2, size_t n)
7984 7987 {
7985 7988 size_t l = strlen(s2);
7986 7989 char *p = (char *)s1;
7987 7990
7988 7991 while (n >= l) {
7989 7992 if (bcmp(p, s2, l) == 0)
7990 7993 return (p);
7991 7994 p++;
7992 7995 n--;
7993 7996 }
7994 7997
7995 7998 return (NULL);
7996 7999 }
7997 8000
7998 8001 /*
7999 8002 * The logic behind this function is detailed in the NFSv4 RFC in the
8000 8003 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
8001 8004 * that section for explicit guidance to server behavior for
8002 8005 * SETCLIENTID.
8003 8006 */
8004 8007 void
8005 8008 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8006 8009 struct svc_req *req, struct compound_state *cs)
8007 8010 {
8008 8011 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8009 8012 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8010 8013 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8011 8014 rfs4_clntip_t *ci;
8012 8015 bool_t create;
8013 8016 char *addr, *netid;
8014 8017 int len;
8015 8018
8016 8019 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8017 8020 SETCLIENTID4args *, args);
8018 8021 retry:
8019 8022 newcp = cp_confirmed = cp_unconfirmed = NULL;
8020 8023
8021 8024 /*
8022 8025 * Save the caller's IP address
8023 8026 */
8024 8027 args->client.cl_addr =
8025 8028 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8026 8029
8027 8030 /*
8028 8031 * Record if it is a Solaris client that cannot handle referrals.
8029 8032 */
8030 8033 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8031 8034 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8032 8035 /* Add a "yes, it's downrev" record */
8033 8036 create = TRUE;
8034 8037 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8035 8038 ASSERT(ci != NULL);
8036 8039 rfs4_dbe_rele(ci->ri_dbe);
8037 8040 } else {
8038 8041 /* Remove any previous record */
8039 8042 rfs4_invalidate_clntip(args->client.cl_addr);
8040 8043 }
8041 8044
8042 8045 /*
8043 8046 * In search of an EXISTING client matching the incoming
8044 8047 * request to establish a new client identifier at the server
8045 8048 */
8046 8049 create = TRUE;
8047 8050 cp = rfs4_findclient(&args->client, &create, NULL);
8048 8051
8049 8052 /* Should never happen */
8050 8053 ASSERT(cp != NULL);
8051 8054
8052 8055 if (cp == NULL) {
8053 8056 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8054 8057 goto out;
8055 8058 }
8056 8059
8057 8060 /*
8058 8061 * Easiest case. Client identifier is newly created and is
8059 8062 * unconfirmed. Also note that for this case, no other
8060 8063 * entries exist for the client identifier. Nothing else to
8061 8064 * check. Just setup the response and respond.
8062 8065 */
8063 8066 if (create) {
8064 8067 *cs->statusp = res->status = NFS4_OK;
8065 8068 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8066 8069 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8067 8070 cp->rc_confirm_verf;
8068 8071 /* Setup callback information; CB_NULL confirmation later */
8069 8072 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8070 8073
8071 8074 rfs4_client_rele(cp);
8072 8075 goto out;
8073 8076 }
8074 8077
8075 8078 /*
8076 8079 * An existing, confirmed client may exist but it may not have
8077 8080 * been active for at least one lease period. If so, then
8078 8081 * "close" the client and create a new client identifier
8079 8082 */
8080 8083 if (rfs4_lease_expired(cp)) {
8081 8084 rfs4_client_close(cp);
8082 8085 goto retry;
8083 8086 }
8084 8087
8085 8088 if (cp->rc_need_confirm == TRUE)
8086 8089 cp_unconfirmed = cp;
8087 8090 else
8088 8091 cp_confirmed = cp;
8089 8092
8090 8093 cp = NULL;
8091 8094
8092 8095 /*
8093 8096 * We have a confirmed client, now check for an
8094 8097 * unconfimred entry
8095 8098 */
8096 8099 if (cp_confirmed) {
8097 8100 /* If creds don't match then client identifier is inuse */
8098 8101 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8099 8102 rfs4_cbinfo_t *cbp;
8100 8103 /*
8101 8104 * Some one else has established this client
8102 8105 * id. Try and say * who they are. We will use
8103 8106 * the call back address supplied by * the
8104 8107 * first client.
8105 8108 */
8106 8109 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8107 8110
8108 8111 addr = netid = NULL;
8109 8112
8110 8113 cbp = &cp_confirmed->rc_cbinfo;
8111 8114 if (cbp->cb_callback.cb_location.r_addr &&
8112 8115 cbp->cb_callback.cb_location.r_netid) {
8113 8116 cb_client4 *cbcp = &cbp->cb_callback;
8114 8117
8115 8118 len = strlen(cbcp->cb_location.r_addr)+1;
8116 8119 addr = kmem_alloc(len, KM_SLEEP);
8117 8120 bcopy(cbcp->cb_location.r_addr, addr, len);
8118 8121 len = strlen(cbcp->cb_location.r_netid)+1;
8119 8122 netid = kmem_alloc(len, KM_SLEEP);
8120 8123 bcopy(cbcp->cb_location.r_netid, netid, len);
8121 8124 }
8122 8125
8123 8126 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8124 8127 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8125 8128
8126 8129 rfs4_client_rele(cp_confirmed);
8127 8130 }
8128 8131
8129 8132 /*
8130 8133 * Confirmed, creds match, and verifier matches; must
8131 8134 * be an update of the callback info
8132 8135 */
8133 8136 if (cp_confirmed->rc_nfs_client.verifier ==
8134 8137 args->client.verifier) {
8135 8138 /* Setup callback information */
8136 8139 rfs4_client_setcb(cp_confirmed, &args->callback,
8137 8140 args->callback_ident);
8138 8141
8139 8142 /* everything okay -- move ahead */
8140 8143 *cs->statusp = res->status = NFS4_OK;
8141 8144 res->SETCLIENTID4res_u.resok4.clientid =
8142 8145 cp_confirmed->rc_clientid;
8143 8146
8144 8147 /* update the confirm_verifier and return it */
8145 8148 rfs4_client_scv_next(cp_confirmed);
8146 8149 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8147 8150 cp_confirmed->rc_confirm_verf;
8148 8151
8149 8152 rfs4_client_rele(cp_confirmed);
8150 8153 goto out;
8151 8154 }
8152 8155
8153 8156 /*
8154 8157 * Creds match but the verifier doesn't. Must search
8155 8158 * for an unconfirmed client that would be replaced by
8156 8159 * this request.
8157 8160 */
8158 8161 create = FALSE;
8159 8162 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8160 8163 cp_confirmed);
8161 8164 }
8162 8165
8163 8166 /*
8164 8167 * At this point, we have taken care of the brand new client
8165 8168 * struct, INUSE case, update of an existing, and confirmed
8166 8169 * client struct.
8167 8170 */
8168 8171
8169 8172 /*
8170 8173 * check to see if things have changed while we originally
8171 8174 * picked up the client struct. If they have, then return and
8172 8175 * retry the processing of this SETCLIENTID request.
8173 8176 */
8174 8177 if (cp_unconfirmed) {
8175 8178 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8176 8179 if (!cp_unconfirmed->rc_need_confirm) {
8177 8180 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8178 8181 rfs4_client_rele(cp_unconfirmed);
8179 8182 if (cp_confirmed)
8180 8183 rfs4_client_rele(cp_confirmed);
8181 8184 goto retry;
8182 8185 }
8183 8186 /* do away with the old unconfirmed one */
8184 8187 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8185 8188 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8186 8189 rfs4_client_rele(cp_unconfirmed);
8187 8190 cp_unconfirmed = NULL;
8188 8191 }
8189 8192
8190 8193 /*
8191 8194 * This search will temporarily hide the confirmed client
8192 8195 * struct while a new client struct is created as the
8193 8196 * unconfirmed one.
8194 8197 */
8195 8198 create = TRUE;
8196 8199 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8197 8200
8198 8201 ASSERT(newcp != NULL);
8199 8202
8200 8203 if (newcp == NULL) {
8201 8204 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8202 8205 rfs4_client_rele(cp_confirmed);
8203 8206 goto out;
8204 8207 }
8205 8208
8206 8209 /*
8207 8210 * If one was not created, then a similar request must be in
8208 8211 * process so release and start over with this one
8209 8212 */
8210 8213 if (create != TRUE) {
8211 8214 rfs4_client_rele(newcp);
8212 8215 if (cp_confirmed)
8213 8216 rfs4_client_rele(cp_confirmed);
8214 8217 goto retry;
8215 8218 }
8216 8219
8217 8220 *cs->statusp = res->status = NFS4_OK;
8218 8221 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8219 8222 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8220 8223 newcp->rc_confirm_verf;
8221 8224 /* Setup callback information; CB_NULL confirmation later */
8222 8225 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8223 8226
8224 8227 newcp->rc_cp_confirmed = cp_confirmed;
8225 8228
8226 8229 rfs4_client_rele(newcp);
8227 8230
8228 8231 out:
8229 8232 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8230 8233 SETCLIENTID4res *, res);
8231 8234 }
8232 8235
8233 8236 /*ARGSUSED*/
8234 8237 void
8235 8238 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8236 8239 struct svc_req *req, struct compound_state *cs)
8237 8240 {
8238 8241 SETCLIENTID_CONFIRM4args *args =
8239 8242 &argop->nfs_argop4_u.opsetclientid_confirm;
8240 8243 SETCLIENTID_CONFIRM4res *res =
8241 8244 &resop->nfs_resop4_u.opsetclientid_confirm;
8242 8245 rfs4_client_t *cp, *cptoclose = NULL;
8243 8246 nfs4_srv_t *nsrv4;
8244 8247
8245 8248 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8246 8249 struct compound_state *, cs,
8247 8250 SETCLIENTID_CONFIRM4args *, args);
8248 8251
8249 8252 nsrv4 = nfs4_get_srv();
8250 8253 *cs->statusp = res->status = NFS4_OK;
8251 8254
8252 8255 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8253 8256
8254 8257 if (cp == NULL) {
8255 8258 *cs->statusp = res->status =
8256 8259 rfs4_check_clientid(&args->clientid, 1);
8257 8260 goto out;
8258 8261 }
8259 8262
8260 8263 if (!creds_ok(cp, req, cs)) {
8261 8264 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8262 8265 rfs4_client_rele(cp);
8263 8266 goto out;
8264 8267 }
8265 8268
8266 8269 /* If the verifier doesn't match, the record doesn't match */
8267 8270 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8268 8271 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8269 8272 rfs4_client_rele(cp);
8270 8273 goto out;
8271 8274 }
8272 8275
8273 8276 rfs4_dbe_lock(cp->rc_dbe);
8274 8277 cp->rc_need_confirm = FALSE;
8275 8278 if (cp->rc_cp_confirmed) {
8276 8279 cptoclose = cp->rc_cp_confirmed;
8277 8280 cptoclose->rc_ss_remove = 1;
8278 8281 cp->rc_cp_confirmed = NULL;
8279 8282 }
8280 8283
8281 8284 /*
8282 8285 * Update the client's associated server instance, if it's changed
8283 8286 * since the client was created.
8284 8287 */
8285 8288 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8286 8289 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8287 8290
8288 8291 /*
8289 8292 * Record clientid in stable storage.
8290 8293 * Must be done after server instance has been assigned.
8291 8294 */
8292 8295 rfs4_ss_clid(nsrv4, cp);
8293 8296
8294 8297 rfs4_dbe_unlock(cp->rc_dbe);
8295 8298
8296 8299 if (cptoclose)
8297 8300 /* don't need to rele, client_close does it */
8298 8301 rfs4_client_close(cptoclose);
8299 8302
8300 8303 /* If needed, initiate CB_NULL call for callback path */
8301 8304 rfs4_deleg_cb_check(cp);
8302 8305 rfs4_update_lease(cp);
8303 8306
8304 8307 /*
8305 8308 * Check to see if client can perform reclaims
8306 8309 */
8307 8310 rfs4_ss_chkclid(nsrv4, cp);
8308 8311
8309 8312 rfs4_client_rele(cp);
8310 8313
8311 8314 out:
8312 8315 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8313 8316 struct compound_state *, cs,
8314 8317 SETCLIENTID_CONFIRM4 *, res);
8315 8318 }
8316 8319
8317 8320
8318 8321 /*ARGSUSED*/
8319 8322 void
8320 8323 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8321 8324 struct svc_req *req, struct compound_state *cs)
8322 8325 {
8323 8326 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8324 8327 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8325 8328 rfs4_state_t *sp;
8326 8329 nfsstat4 status;
8327 8330
8328 8331 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8329 8332 CLOSE4args *, args);
8330 8333
8331 8334 if (cs->vp == NULL) {
8332 8335 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8333 8336 goto out;
8334 8337 }
8335 8338
8336 8339 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8337 8340 if (status != NFS4_OK) {
8338 8341 *cs->statusp = resp->status = status;
8339 8342 goto out;
8340 8343 }
8341 8344
8342 8345 /* Ensure specified filehandle matches */
8343 8346 if (cs->vp != sp->rs_finfo->rf_vp) {
8344 8347 rfs4_state_rele(sp);
8345 8348 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8346 8349 goto out;
8347 8350 }
8348 8351
8349 8352 /* hold off other access to open_owner while we tinker */
8350 8353 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8351 8354
8352 8355 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8353 8356 case NFS4_CHECK_STATEID_OKAY:
8354 8357 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8355 8358 resop) != NFS4_CHKSEQ_OKAY) {
8356 8359 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8357 8360 goto end;
8358 8361 }
8359 8362 break;
8360 8363 case NFS4_CHECK_STATEID_OLD:
8361 8364 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8362 8365 goto end;
8363 8366 case NFS4_CHECK_STATEID_BAD:
8364 8367 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8365 8368 goto end;
8366 8369 case NFS4_CHECK_STATEID_EXPIRED:
8367 8370 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8368 8371 goto end;
8369 8372 case NFS4_CHECK_STATEID_CLOSED:
8370 8373 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8371 8374 goto end;
8372 8375 case NFS4_CHECK_STATEID_UNCONFIRMED:
8373 8376 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8374 8377 goto end;
8375 8378 case NFS4_CHECK_STATEID_REPLAY:
8376 8379 /* Check the sequence id for the open owner */
8377 8380 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8378 8381 resop)) {
8379 8382 case NFS4_CHKSEQ_OKAY:
8380 8383 /*
8381 8384 * This is replayed stateid; if seqid matches
8382 8385 * next expected, then client is using wrong seqid.
8383 8386 */
8384 8387 /* FALL THROUGH */
8385 8388 case NFS4_CHKSEQ_BAD:
8386 8389 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8387 8390 goto end;
8388 8391 case NFS4_CHKSEQ_REPLAY:
8389 8392 /*
8390 8393 * Note this case is the duplicate case so
8391 8394 * resp->status is already set.
8392 8395 */
8393 8396 *cs->statusp = resp->status;
8394 8397 rfs4_update_lease(sp->rs_owner->ro_client);
8395 8398 goto end;
8396 8399 }
8397 8400 break;
8398 8401 default:
8399 8402 ASSERT(FALSE);
8400 8403 break;
8401 8404 }
8402 8405
8403 8406 rfs4_dbe_lock(sp->rs_dbe);
8404 8407
8405 8408 /* Update the stateid. */
8406 8409 next_stateid(&sp->rs_stateid);
8407 8410 resp->open_stateid = sp->rs_stateid.stateid;
8408 8411
8409 8412 rfs4_dbe_unlock(sp->rs_dbe);
8410 8413
8411 8414 rfs4_update_lease(sp->rs_owner->ro_client);
8412 8415 rfs4_update_open_sequence(sp->rs_owner);
8413 8416 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8414 8417
8415 8418 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8416 8419
8417 8420 *cs->statusp = resp->status = status;
8418 8421
8419 8422 end:
8420 8423 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8421 8424 rfs4_state_rele(sp);
8422 8425 out:
8423 8426 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8424 8427 CLOSE4res *, resp);
8425 8428 }
8426 8429
8427 8430 /*
8428 8431 * Manage the counts on the file struct and close all file locks
8429 8432 */
8430 8433 /*ARGSUSED*/
8431 8434 void
8432 8435 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8433 8436 bool_t close_of_client)
8434 8437 {
8435 8438 rfs4_file_t *fp = sp->rs_finfo;
8436 8439 rfs4_lo_state_t *lsp;
8437 8440 int fflags = 0;
8438 8441
8439 8442 /*
8440 8443 * If this call is part of the larger closing down of client
8441 8444 * state then it is just easier to release all locks
8442 8445 * associated with this client instead of going through each
8443 8446 * individual file and cleaning locks there.
8444 8447 */
8445 8448 if (close_of_client) {
8446 8449 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8447 8450 !list_is_empty(&sp->rs_lostatelist) &&
8448 8451 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8449 8452 /* Is the PxFS kernel module loaded? */
8450 8453 if (lm_remove_file_locks != NULL) {
8451 8454 int new_sysid;
8452 8455
8453 8456 /* Encode the cluster nodeid in new sysid */
8454 8457 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8455 8458 lm_set_nlmid_flk(&new_sysid);
8456 8459
8457 8460 /*
8458 8461 * This PxFS routine removes file locks for a
8459 8462 * client over all nodes of a cluster.
8460 8463 */
8461 8464 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8462 8465 "lm_remove_file_locks(sysid=0x%x)\n",
8463 8466 new_sysid));
8464 8467 (*lm_remove_file_locks)(new_sysid);
8465 8468 } else {
8466 8469 struct flock64 flk;
8467 8470
8468 8471 /* Release all locks for this client */
8469 8472 flk.l_type = F_UNLKSYS;
8470 8473 flk.l_whence = 0;
8471 8474 flk.l_start = 0;
8472 8475 flk.l_len = 0;
8473 8476 flk.l_sysid =
8474 8477 sp->rs_owner->ro_client->rc_sysidt;
8475 8478 flk.l_pid = 0;
8476 8479 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8477 8480 &flk, F_REMOTELOCK | FREAD | FWRITE,
8478 8481 (u_offset_t)0, NULL, CRED(), NULL);
8479 8482 }
8480 8483
8481 8484 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8482 8485 }
8483 8486 }
8484 8487
8485 8488 /*
8486 8489 * Release all locks on this file by this lock owner or at
8487 8490 * least mark the locks as having been released
8488 8491 */
8489 8492 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8490 8493 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8491 8494 lsp->rls_locks_cleaned = TRUE;
8492 8495
8493 8496 /* Was this already taken care of above? */
8494 8497 if (!close_of_client &&
8495 8498 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8496 8499 (void) cleanlocks(sp->rs_finfo->rf_vp,
8497 8500 lsp->rls_locker->rl_pid,
8498 8501 lsp->rls_locker->rl_client->rc_sysidt);
8499 8502 }
8500 8503
8501 8504 /*
8502 8505 * Release any shrlocks associated with this open state ID.
8503 8506 * This must be done before the rfs4_state gets marked closed.
8504 8507 */
8505 8508 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8506 8509 (void) rfs4_unshare(sp);
8507 8510
8508 8511 if (sp->rs_open_access) {
8509 8512 rfs4_dbe_lock(fp->rf_dbe);
8510 8513
8511 8514 /*
8512 8515 * Decrement the count for each access and deny bit that this
8513 8516 * state has contributed to the file.
8514 8517 * If the file counts go to zero
8515 8518 * clear the appropriate bit in the appropriate mask.
8516 8519 */
8517 8520 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8518 8521 fp->rf_access_read--;
8519 8522 fflags |= FREAD;
8520 8523 if (fp->rf_access_read == 0)
8521 8524 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8522 8525 }
8523 8526 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8524 8527 fp->rf_access_write--;
8525 8528 fflags |= FWRITE;
8526 8529 if (fp->rf_access_write == 0)
8527 8530 fp->rf_share_access &=
8528 8531 ~OPEN4_SHARE_ACCESS_WRITE;
8529 8532 }
8530 8533 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8531 8534 fp->rf_deny_read--;
8532 8535 if (fp->rf_deny_read == 0)
8533 8536 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8534 8537 }
8535 8538 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8536 8539 fp->rf_deny_write--;
8537 8540 if (fp->rf_deny_write == 0)
8538 8541 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8539 8542 }
8540 8543
8541 8544 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8542 8545
8543 8546 rfs4_dbe_unlock(fp->rf_dbe);
8544 8547
8545 8548 sp->rs_open_access = 0;
8546 8549 sp->rs_open_deny = 0;
8547 8550 }
8548 8551 }
8549 8552
8550 8553 /*
8551 8554 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8552 8555 */
8553 8556 static nfsstat4
8554 8557 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8555 8558 {
8556 8559 rfs4_lockowner_t *lo;
8557 8560 rfs4_client_t *cp;
8558 8561 uint32_t len;
8559 8562
8560 8563 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8561 8564 if (lo != NULL) {
8562 8565 cp = lo->rl_client;
8563 8566 if (rfs4_lease_expired(cp)) {
8564 8567 rfs4_lockowner_rele(lo);
8565 8568 rfs4_dbe_hold(cp->rc_dbe);
8566 8569 rfs4_client_close(cp);
8567 8570 return (NFS4ERR_EXPIRED);
8568 8571 }
8569 8572 dp->owner.clientid = lo->rl_owner.clientid;
8570 8573 len = lo->rl_owner.owner_len;
8571 8574 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8572 8575 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8573 8576 dp->owner.owner_len = len;
8574 8577 rfs4_lockowner_rele(lo);
8575 8578 goto finish;
8576 8579 }
8577 8580
8578 8581 /*
8579 8582 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8580 8583 * of the client id contain the boot time for a NFS4 lock. So we
8581 8584 * fabricate and identity by setting clientid to the sysid, and
8582 8585 * the lock owner to the pid.
8583 8586 */
8584 8587 dp->owner.clientid = flk->l_sysid;
8585 8588 len = sizeof (pid_t);
8586 8589 dp->owner.owner_len = len;
8587 8590 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8588 8591 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8589 8592 finish:
8590 8593 dp->offset = flk->l_start;
8591 8594 dp->length = flk->l_len;
8592 8595
8593 8596 if (flk->l_type == F_RDLCK)
8594 8597 dp->locktype = READ_LT;
8595 8598 else if (flk->l_type == F_WRLCK)
8596 8599 dp->locktype = WRITE_LT;
8597 8600 else
8598 8601 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8599 8602
8600 8603 return (NFS4_OK);
8601 8604 }
8602 8605
8603 8606 /*
8604 8607 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8605 8608 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8606 8609 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8607 8610 * for that (obviously); they are sending the LOCK requests with some delays
8608 8611 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8609 8612 * locking and delay implementation at the client side.
8610 8613 *
8611 8614 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8612 8615 * fast retries on its own (the for loop below) in a hope the lock will be
8613 8616 * available soon. And if not, the client won't need to resend the LOCK
8614 8617 * requests so fast to check the lock availability. This basically saves some
8615 8618 * network traffic and tries to make sure the client gets the lock ASAP.
8616 8619 */
8617 8620 static int
8618 8621 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8619 8622 {
8620 8623 int error;
8621 8624 struct flock64 flk;
8622 8625 int i;
8623 8626 clock_t delaytime;
8624 8627 int cmd;
8625 8628 int spin_cnt = 0;
8626 8629
8627 8630 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8628 8631 retry:
8629 8632 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8630 8633
8631 8634 for (i = 0; i < rfs4_maxlock_tries; i++) {
8632 8635 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8633 8636 error = VOP_FRLOCK(vp, cmd,
8634 8637 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8635 8638
8636 8639 if (error != EAGAIN && error != EACCES)
8637 8640 break;
8638 8641
8639 8642 if (i < rfs4_maxlock_tries - 1) {
8640 8643 delay(delaytime);
8641 8644 delaytime *= 2;
8642 8645 }
8643 8646 }
8644 8647
8645 8648 if (error == EAGAIN || error == EACCES) {
8646 8649 /* Get the owner of the lock */
8647 8650 flk = *flock;
8648 8651 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8649 8652 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8650 8653 NULL) == 0) {
8651 8654 /*
8652 8655 * There's a race inherent in the current VOP_FRLOCK
8653 8656 * design where:
8654 8657 * a: "other guy" takes a lock that conflicts with a
8655 8658 * lock we want
8656 8659 * b: we attempt to take our lock (non-blocking) and
8657 8660 * the attempt fails.
8658 8661 * c: "other guy" releases the conflicting lock
8659 8662 * d: we ask what lock conflicts with the lock we want,
8660 8663 * getting F_UNLCK (no lock blocks us)
8661 8664 *
8662 8665 * If we retry the non-blocking lock attempt in this
8663 8666 * case (restart at step 'b') there's some possibility
8664 8667 * that many such attempts might fail. However a test
8665 8668 * designed to actually provoke this race shows that
8666 8669 * the vast majority of cases require no retry, and
8667 8670 * only a few took as many as three retries. Here's
8668 8671 * the test outcome:
8669 8672 *
8670 8673 * number of retries how many times we needed
8671 8674 * that many retries
8672 8675 * 0 79461
8673 8676 * 1 862
8674 8677 * 2 49
8675 8678 * 3 5
8676 8679 *
8677 8680 * Given those empirical results, we arbitrarily limit
8678 8681 * the retry count to ten.
8679 8682 *
8680 8683 * If we actually make to ten retries and give up,
8681 8684 * nothing catastrophic happens, but we're unable to
8682 8685 * return the information about the conflicting lock to
8683 8686 * the NFS client. That's an acceptable trade off vs.
8684 8687 * letting this retry loop run forever.
8685 8688 */
8686 8689 if (flk.l_type == F_UNLCK) {
8687 8690 if (spin_cnt++ < 10) {
8688 8691 /* No longer locked, retry */
8689 8692 goto retry;
8690 8693 }
8691 8694 } else {
8692 8695 *flock = flk;
8693 8696 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8694 8697 F_GETLK, &flk);
8695 8698 }
8696 8699 }
8697 8700 }
8698 8701
8699 8702 return (error);
8700 8703 }
8701 8704
8702 8705 /*ARGSUSED*/
8703 8706 static nfsstat4
8704 8707 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8705 8708 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8706 8709 {
8707 8710 nfsstat4 status;
8708 8711 rfs4_lockowner_t *lo = lsp->rls_locker;
8709 8712 rfs4_state_t *sp = lsp->rls_state;
8710 8713 struct flock64 flock;
8711 8714 int16_t ltype;
8712 8715 int flag;
8713 8716 int error;
8714 8717 sysid_t sysid;
8715 8718 LOCK4res *lres;
8716 8719 vnode_t *vp;
8717 8720
8718 8721 if (rfs4_lease_expired(lo->rl_client)) {
8719 8722 return (NFS4ERR_EXPIRED);
8720 8723 }
8721 8724
8722 8725 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8723 8726 return (status);
8724 8727
8725 8728 /* Check for zero length. To lock to end of file use all ones for V4 */
8726 8729 if (length == 0)
8727 8730 return (NFS4ERR_INVAL);
8728 8731 else if (length == (length4)(~0))
8729 8732 length = 0; /* Posix to end of file */
8730 8733
8731 8734 retry:
8732 8735 rfs4_dbe_lock(sp->rs_dbe);
8733 8736 if (sp->rs_closed == TRUE) {
8734 8737 rfs4_dbe_unlock(sp->rs_dbe);
8735 8738 return (NFS4ERR_OLD_STATEID);
8736 8739 }
8737 8740
8738 8741 if (resop->resop != OP_LOCKU) {
8739 8742 switch (locktype) {
8740 8743 case READ_LT:
8741 8744 case READW_LT:
8742 8745 if ((sp->rs_share_access
8743 8746 & OPEN4_SHARE_ACCESS_READ) == 0) {
8744 8747 rfs4_dbe_unlock(sp->rs_dbe);
8745 8748
8746 8749 return (NFS4ERR_OPENMODE);
8747 8750 }
8748 8751 ltype = F_RDLCK;
8749 8752 break;
8750 8753 case WRITE_LT:
8751 8754 case WRITEW_LT:
8752 8755 if ((sp->rs_share_access
8753 8756 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8754 8757 rfs4_dbe_unlock(sp->rs_dbe);
8755 8758
8756 8759 return (NFS4ERR_OPENMODE);
8757 8760 }
8758 8761 ltype = F_WRLCK;
8759 8762 break;
8760 8763 }
8761 8764 } else
8762 8765 ltype = F_UNLCK;
8763 8766
8764 8767 flock.l_type = ltype;
8765 8768 flock.l_whence = 0; /* SEEK_SET */
8766 8769 flock.l_start = offset;
8767 8770 flock.l_len = length;
8768 8771 flock.l_sysid = sysid;
8769 8772 flock.l_pid = lsp->rls_locker->rl_pid;
8770 8773
8771 8774 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8772 8775 if (flock.l_len < 0 || flock.l_start < 0) {
8773 8776 rfs4_dbe_unlock(sp->rs_dbe);
8774 8777 return (NFS4ERR_INVAL);
8775 8778 }
8776 8779
8777 8780 /*
8778 8781 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8779 8782 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8780 8783 */
8781 8784 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8782 8785
8783 8786 vp = sp->rs_finfo->rf_vp;
8784 8787 VN_HOLD(vp);
8785 8788
8786 8789 /*
8787 8790 * We need to unlock sp before we call the underlying filesystem to
8788 8791 * acquire the file lock.
8789 8792 */
8790 8793 rfs4_dbe_unlock(sp->rs_dbe);
8791 8794
8792 8795 error = setlock(vp, &flock, flag, cred);
8793 8796
8794 8797 /*
8795 8798 * Make sure the file is still open. In a case the file was closed in
8796 8799 * the meantime, clean the lock we acquired using the setlock() call
8797 8800 * above, and return the appropriate error.
8798 8801 */
8799 8802 rfs4_dbe_lock(sp->rs_dbe);
8800 8803 if (sp->rs_closed == TRUE) {
8801 8804 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8802 8805 rfs4_dbe_unlock(sp->rs_dbe);
8803 8806
8804 8807 VN_RELE(vp);
8805 8808
8806 8809 return (NFS4ERR_OLD_STATEID);
8807 8810 }
8808 8811 rfs4_dbe_unlock(sp->rs_dbe);
8809 8812
8810 8813 VN_RELE(vp);
8811 8814
8812 8815 if (error == 0) {
8813 8816 rfs4_dbe_lock(lsp->rls_dbe);
8814 8817 next_stateid(&lsp->rls_lockid);
8815 8818 rfs4_dbe_unlock(lsp->rls_dbe);
8816 8819 }
8817 8820
8818 8821 /*
8819 8822 * N.B. We map error values to nfsv4 errors. This is differrent
8820 8823 * than puterrno4 routine.
8821 8824 */
8822 8825 switch (error) {
8823 8826 case 0:
8824 8827 status = NFS4_OK;
8825 8828 break;
8826 8829 case EAGAIN:
8827 8830 case EACCES: /* Old value */
8828 8831 /* Can only get here if op is OP_LOCK */
8829 8832 ASSERT(resop->resop == OP_LOCK);
8830 8833 lres = &resop->nfs_resop4_u.oplock;
8831 8834 status = NFS4ERR_DENIED;
8832 8835 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8833 8836 == NFS4ERR_EXPIRED)
8834 8837 goto retry;
8835 8838 break;
8836 8839 case ENOLCK:
8837 8840 status = NFS4ERR_DELAY;
8838 8841 break;
8839 8842 case EOVERFLOW:
8840 8843 status = NFS4ERR_INVAL;
8841 8844 break;
8842 8845 case EINVAL:
8843 8846 status = NFS4ERR_NOTSUPP;
8844 8847 break;
8845 8848 default:
8846 8849 status = NFS4ERR_SERVERFAULT;
8847 8850 break;
8848 8851 }
8849 8852
8850 8853 return (status);
8851 8854 }
8852 8855
8853 8856 /*ARGSUSED*/
8854 8857 void
8855 8858 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8856 8859 struct svc_req *req, struct compound_state *cs)
8857 8860 {
8858 8861 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8859 8862 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8860 8863 nfsstat4 status;
8861 8864 stateid4 *stateid;
8862 8865 rfs4_lockowner_t *lo;
8863 8866 rfs4_client_t *cp;
8864 8867 rfs4_state_t *sp = NULL;
8865 8868 rfs4_lo_state_t *lsp = NULL;
8866 8869 bool_t ls_sw_held = FALSE;
8867 8870 bool_t create = TRUE;
8868 8871 bool_t lcreate = TRUE;
8869 8872 bool_t dup_lock = FALSE;
8870 8873 int rc;
8871 8874
8872 8875 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8873 8876 LOCK4args *, args);
8874 8877
8875 8878 if (cs->vp == NULL) {
8876 8879 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8877 8880 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8878 8881 cs, LOCK4res *, resp);
8879 8882 return;
8880 8883 }
8881 8884
8882 8885 if (args->locker.new_lock_owner) {
8883 8886 /* Create a new lockowner for this instance */
8884 8887 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8885 8888
8886 8889 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8887 8890
8888 8891 stateid = &olo->open_stateid;
8889 8892 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8890 8893 if (status != NFS4_OK) {
8891 8894 NFS4_DEBUG(rfs4_debug,
8892 8895 (CE_NOTE, "Get state failed in lock %d", status));
8893 8896 *cs->statusp = resp->status = status;
8894 8897 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8895 8898 cs, LOCK4res *, resp);
8896 8899 return;
8897 8900 }
8898 8901
8899 8902 /* Ensure specified filehandle matches */
8900 8903 if (cs->vp != sp->rs_finfo->rf_vp) {
8901 8904 rfs4_state_rele(sp);
8902 8905 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8903 8906 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8904 8907 cs, LOCK4res *, resp);
8905 8908 return;
8906 8909 }
8907 8910
8908 8911 /* hold off other access to open_owner while we tinker */
8909 8912 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8910 8913
8911 8914 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8912 8915 case NFS4_CHECK_STATEID_OLD:
8913 8916 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8914 8917 goto end;
8915 8918 case NFS4_CHECK_STATEID_BAD:
8916 8919 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8917 8920 goto end;
8918 8921 case NFS4_CHECK_STATEID_EXPIRED:
8919 8922 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8920 8923 goto end;
8921 8924 case NFS4_CHECK_STATEID_UNCONFIRMED:
8922 8925 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8923 8926 goto end;
8924 8927 case NFS4_CHECK_STATEID_CLOSED:
8925 8928 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8926 8929 goto end;
8927 8930 case NFS4_CHECK_STATEID_OKAY:
8928 8931 case NFS4_CHECK_STATEID_REPLAY:
8929 8932 switch (rfs4_check_olo_seqid(olo->open_seqid,
8930 8933 sp->rs_owner, resop)) {
8931 8934 case NFS4_CHKSEQ_OKAY:
8932 8935 if (rc == NFS4_CHECK_STATEID_OKAY)
8933 8936 break;
8934 8937 /*
8935 8938 * This is replayed stateid; if seqid
8936 8939 * matches next expected, then client
8937 8940 * is using wrong seqid.
8938 8941 */
8939 8942 /* FALLTHROUGH */
8940 8943 case NFS4_CHKSEQ_BAD:
8941 8944 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8942 8945 goto end;
8943 8946 case NFS4_CHKSEQ_REPLAY:
8944 8947 /* This is a duplicate LOCK request */
8945 8948 dup_lock = TRUE;
8946 8949
8947 8950 /*
8948 8951 * For a duplicate we do not want to
8949 8952 * create a new lockowner as it should
8950 8953 * already exist.
8951 8954 * Turn off the lockowner create flag.
8952 8955 */
8953 8956 lcreate = FALSE;
8954 8957 }
8955 8958 break;
8956 8959 }
8957 8960
8958 8961 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8959 8962 if (lo == NULL) {
8960 8963 NFS4_DEBUG(rfs4_debug,
8961 8964 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8962 8965 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8963 8966 goto end;
8964 8967 }
8965 8968
8966 8969 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8967 8970 if (lsp == NULL) {
8968 8971 rfs4_update_lease(sp->rs_owner->ro_client);
8969 8972 /*
8970 8973 * Only update theh open_seqid if this is not
8971 8974 * a duplicate request
8972 8975 */
8973 8976 if (dup_lock == FALSE) {
8974 8977 rfs4_update_open_sequence(sp->rs_owner);
8975 8978 }
8976 8979
8977 8980 NFS4_DEBUG(rfs4_debug,
8978 8981 (CE_NOTE, "rfs4_op_lock: no state"));
8979 8982 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8980 8983 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8981 8984 rfs4_lockowner_rele(lo);
8982 8985 goto end;
8983 8986 }
8984 8987
8985 8988 /*
8986 8989 * This is the new_lock_owner branch and the client is
8987 8990 * supposed to be associating a new lock_owner with
8988 8991 * the open file at this point. If we find that a
8989 8992 * lock_owner/state association already exists and a
8990 8993 * successful LOCK request was returned to the client,
8991 8994 * an error is returned to the client since this is
8992 8995 * not appropriate. The client should be using the
8993 8996 * existing lock_owner branch.
8994 8997 */
8995 8998 if (dup_lock == FALSE && create == FALSE) {
8996 8999 if (lsp->rls_lock_completed == TRUE) {
8997 9000 *cs->statusp =
8998 9001 resp->status = NFS4ERR_BAD_SEQID;
8999 9002 rfs4_lockowner_rele(lo);
9000 9003 goto end;
9001 9004 }
9002 9005 }
9003 9006
9004 9007 rfs4_update_lease(sp->rs_owner->ro_client);
9005 9008
9006 9009 /*
9007 9010 * Only update theh open_seqid if this is not
9008 9011 * a duplicate request
9009 9012 */
9010 9013 if (dup_lock == FALSE) {
9011 9014 rfs4_update_open_sequence(sp->rs_owner);
9012 9015 }
9013 9016
9014 9017 /*
9015 9018 * If this is a duplicate lock request, just copy the
9016 9019 * previously saved reply and return.
9017 9020 */
9018 9021 if (dup_lock == TRUE) {
9019 9022 /* verify that lock_seqid's match */
9020 9023 if (lsp->rls_seqid != olo->lock_seqid) {
9021 9024 NFS4_DEBUG(rfs4_debug,
9022 9025 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9023 9026 "lsp->seqid=%d old->seqid=%d",
9024 9027 lsp->rls_seqid, olo->lock_seqid));
9025 9028 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9026 9029 } else {
9027 9030 rfs4_copy_reply(resop, &lsp->rls_reply);
9028 9031 /*
9029 9032 * Make sure to copy the just
9030 9033 * retrieved reply status into the
9031 9034 * overall compound status
9032 9035 */
9033 9036 *cs->statusp = resp->status;
9034 9037 }
9035 9038 rfs4_lockowner_rele(lo);
9036 9039 goto end;
9037 9040 }
9038 9041
9039 9042 rfs4_dbe_lock(lsp->rls_dbe);
9040 9043
9041 9044 /* Make sure to update the lock sequence id */
9042 9045 lsp->rls_seqid = olo->lock_seqid;
9043 9046
9044 9047 NFS4_DEBUG(rfs4_debug,
9045 9048 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9046 9049
9047 9050 /*
9048 9051 * This is used to signify the newly created lockowner
9049 9052 * stateid and its sequence number. The checks for
9050 9053 * sequence number and increment don't occur on the
9051 9054 * very first lock request for a lockowner.
9052 9055 */
9053 9056 lsp->rls_skip_seqid_check = TRUE;
9054 9057
9055 9058 /* hold off other access to lsp while we tinker */
9056 9059 rfs4_sw_enter(&lsp->rls_sw);
9057 9060 ls_sw_held = TRUE;
9058 9061
9059 9062 rfs4_dbe_unlock(lsp->rls_dbe);
9060 9063
9061 9064 rfs4_lockowner_rele(lo);
9062 9065 } else {
9063 9066 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9064 9067 /* get lsp and hold the lock on the underlying file struct */
9065 9068 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9066 9069 != NFS4_OK) {
9067 9070 *cs->statusp = resp->status = status;
9068 9071 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9069 9072 cs, LOCK4res *, resp);
9070 9073 return;
9071 9074 }
9072 9075 create = FALSE; /* We didn't create lsp */
9073 9076
9074 9077 /* Ensure specified filehandle matches */
9075 9078 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9076 9079 rfs4_lo_state_rele(lsp, TRUE);
9077 9080 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9078 9081 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9079 9082 cs, LOCK4res *, resp);
9080 9083 return;
9081 9084 }
9082 9085
9083 9086 /* hold off other access to lsp while we tinker */
9084 9087 rfs4_sw_enter(&lsp->rls_sw);
9085 9088 ls_sw_held = TRUE;
9086 9089
9087 9090 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9088 9091 /*
9089 9092 * The stateid looks like it was okay (expected to be
9090 9093 * the next one)
9091 9094 */
9092 9095 case NFS4_CHECK_STATEID_OKAY:
9093 9096 /*
9094 9097 * The sequence id is now checked. Determine
9095 9098 * if this is a replay or if it is in the
9096 9099 * expected (next) sequence. In the case of a
9097 9100 * replay, there are two replay conditions
9098 9101 * that may occur. The first is the normal
9099 9102 * condition where a LOCK is done with a
9100 9103 * NFS4_OK response and the stateid is
9101 9104 * updated. That case is handled below when
9102 9105 * the stateid is identified as a REPLAY. The
9103 9106 * second is the case where an error is
9104 9107 * returned, like NFS4ERR_DENIED, and the
9105 9108 * sequence number is updated but the stateid
9106 9109 * is not updated. This second case is dealt
9107 9110 * with here. So it may seem odd that the
9108 9111 * stateid is okay but the sequence id is a
9109 9112 * replay but it is okay.
9110 9113 */
9111 9114 switch (rfs4_check_lock_seqid(
9112 9115 args->locker.locker4_u.lock_owner.lock_seqid,
9113 9116 lsp, resop)) {
9114 9117 case NFS4_CHKSEQ_REPLAY:
9115 9118 if (resp->status != NFS4_OK) {
9116 9119 /*
9117 9120 * Here is our replay and need
9118 9121 * to verify that the last
9119 9122 * response was an error.
9120 9123 */
9121 9124 *cs->statusp = resp->status;
9122 9125 goto end;
9123 9126 }
9124 9127 /*
9125 9128 * This is done since the sequence id
9126 9129 * looked like a replay but it didn't
9127 9130 * pass our check so a BAD_SEQID is
9128 9131 * returned as a result.
9129 9132 */
9130 9133 /*FALLTHROUGH*/
9131 9134 case NFS4_CHKSEQ_BAD:
9132 9135 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9133 9136 goto end;
9134 9137 case NFS4_CHKSEQ_OKAY:
9135 9138 /* Everything looks okay move ahead */
9136 9139 break;
9137 9140 }
9138 9141 break;
9139 9142 case NFS4_CHECK_STATEID_OLD:
9140 9143 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9141 9144 goto end;
9142 9145 case NFS4_CHECK_STATEID_BAD:
9143 9146 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9144 9147 goto end;
9145 9148 case NFS4_CHECK_STATEID_EXPIRED:
9146 9149 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9147 9150 goto end;
9148 9151 case NFS4_CHECK_STATEID_CLOSED:
9149 9152 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9150 9153 goto end;
9151 9154 case NFS4_CHECK_STATEID_REPLAY:
9152 9155 switch (rfs4_check_lock_seqid(
9153 9156 args->locker.locker4_u.lock_owner.lock_seqid,
9154 9157 lsp, resop)) {
9155 9158 case NFS4_CHKSEQ_OKAY:
9156 9159 /*
9157 9160 * This is a replayed stateid; if
9158 9161 * seqid matches the next expected,
9159 9162 * then client is using wrong seqid.
9160 9163 */
9161 9164 case NFS4_CHKSEQ_BAD:
9162 9165 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9163 9166 goto end;
9164 9167 case NFS4_CHKSEQ_REPLAY:
9165 9168 rfs4_update_lease(lsp->rls_locker->rl_client);
9166 9169 *cs->statusp = status = resp->status;
9167 9170 goto end;
9168 9171 }
9169 9172 break;
9170 9173 default:
9171 9174 ASSERT(FALSE);
9172 9175 break;
9173 9176 }
9174 9177
9175 9178 rfs4_update_lock_sequence(lsp);
9176 9179 rfs4_update_lease(lsp->rls_locker->rl_client);
9177 9180 }
9178 9181
9179 9182 /*
9180 9183 * NFS4 only allows locking on regular files, so
9181 9184 * verify type of object.
9182 9185 */
9183 9186 if (cs->vp->v_type != VREG) {
9184 9187 if (cs->vp->v_type == VDIR)
9185 9188 status = NFS4ERR_ISDIR;
9186 9189 else
9187 9190 status = NFS4ERR_INVAL;
9188 9191 goto out;
9189 9192 }
9190 9193
9191 9194 cp = lsp->rls_state->rs_owner->ro_client;
9192 9195
9193 9196 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9194 9197 status = NFS4ERR_GRACE;
9195 9198 goto out;
9196 9199 }
9197 9200
9198 9201 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9199 9202 status = NFS4ERR_NO_GRACE;
9200 9203 goto out;
9201 9204 }
9202 9205
9203 9206 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9204 9207 status = NFS4ERR_NO_GRACE;
9205 9208 goto out;
9206 9209 }
9207 9210
9208 9211 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9209 9212 cs->deleg = TRUE;
9210 9213
9211 9214 status = rfs4_do_lock(lsp, args->locktype,
9212 9215 args->offset, args->length, cs->cr, resop);
9213 9216
9214 9217 out:
9215 9218 lsp->rls_skip_seqid_check = FALSE;
9216 9219
9217 9220 *cs->statusp = resp->status = status;
9218 9221
9219 9222 if (status == NFS4_OK) {
9220 9223 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9221 9224 lsp->rls_lock_completed = TRUE;
9222 9225 }
9223 9226 /*
9224 9227 * Only update the "OPEN" response here if this was a new
9225 9228 * lock_owner
9226 9229 */
9227 9230 if (sp)
9228 9231 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9229 9232
9230 9233 rfs4_update_lock_resp(lsp, resop);
9231 9234
9232 9235 end:
9233 9236 if (lsp) {
9234 9237 if (ls_sw_held)
9235 9238 rfs4_sw_exit(&lsp->rls_sw);
9236 9239 /*
9237 9240 * If an sp obtained, then the lsp does not represent
9238 9241 * a lock on the file struct.
9239 9242 */
9240 9243 if (sp != NULL)
9241 9244 rfs4_lo_state_rele(lsp, FALSE);
9242 9245 else
9243 9246 rfs4_lo_state_rele(lsp, TRUE);
9244 9247 }
9245 9248 if (sp) {
9246 9249 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9247 9250 rfs4_state_rele(sp);
9248 9251 }
9249 9252
9250 9253 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9251 9254 LOCK4res *, resp);
9252 9255 }
9253 9256
9254 9257 /* free function for LOCK/LOCKT */
9255 9258 static void
9256 9259 lock_denied_free(nfs_resop4 *resop)
9257 9260 {
9258 9261 LOCK4denied *dp = NULL;
9259 9262
9260 9263 switch (resop->resop) {
9261 9264 case OP_LOCK:
9262 9265 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9263 9266 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9264 9267 break;
9265 9268 case OP_LOCKT:
9266 9269 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9267 9270 dp = &resop->nfs_resop4_u.oplockt.denied;
9268 9271 break;
9269 9272 default:
9270 9273 break;
9271 9274 }
9272 9275
9273 9276 if (dp)
9274 9277 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9275 9278 }
9276 9279
9277 9280 /*ARGSUSED*/
9278 9281 void
9279 9282 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9280 9283 struct svc_req *req, struct compound_state *cs)
9281 9284 {
9282 9285 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9283 9286 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9284 9287 nfsstat4 status;
9285 9288 stateid4 *stateid = &args->lock_stateid;
9286 9289 rfs4_lo_state_t *lsp;
9287 9290
9288 9291 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9289 9292 LOCKU4args *, args);
9290 9293
9291 9294 if (cs->vp == NULL) {
9292 9295 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9293 9296 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9294 9297 LOCKU4res *, resp);
9295 9298 return;
9296 9299 }
9297 9300
9298 9301 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9299 9302 *cs->statusp = resp->status = status;
9300 9303 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9301 9304 LOCKU4res *, resp);
9302 9305 return;
9303 9306 }
9304 9307
9305 9308 /* Ensure specified filehandle matches */
9306 9309 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9307 9310 rfs4_lo_state_rele(lsp, TRUE);
9308 9311 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9309 9312 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9310 9313 LOCKU4res *, resp);
9311 9314 return;
9312 9315 }
9313 9316
9314 9317 /* hold off other access to lsp while we tinker */
9315 9318 rfs4_sw_enter(&lsp->rls_sw);
9316 9319
9317 9320 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9318 9321 case NFS4_CHECK_STATEID_OKAY:
9319 9322 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9320 9323 != NFS4_CHKSEQ_OKAY) {
9321 9324 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9322 9325 goto end;
9323 9326 }
9324 9327 break;
9325 9328 case NFS4_CHECK_STATEID_OLD:
9326 9329 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9327 9330 goto end;
9328 9331 case NFS4_CHECK_STATEID_BAD:
9329 9332 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9330 9333 goto end;
9331 9334 case NFS4_CHECK_STATEID_EXPIRED:
9332 9335 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9333 9336 goto end;
9334 9337 case NFS4_CHECK_STATEID_CLOSED:
9335 9338 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9336 9339 goto end;
9337 9340 case NFS4_CHECK_STATEID_REPLAY:
9338 9341 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9339 9342 case NFS4_CHKSEQ_OKAY:
9340 9343 /*
9341 9344 * This is a replayed stateid; if
9342 9345 * seqid matches the next expected,
9343 9346 * then client is using wrong seqid.
9344 9347 */
9345 9348 case NFS4_CHKSEQ_BAD:
9346 9349 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9347 9350 goto end;
9348 9351 case NFS4_CHKSEQ_REPLAY:
9349 9352 rfs4_update_lease(lsp->rls_locker->rl_client);
9350 9353 *cs->statusp = status = resp->status;
9351 9354 goto end;
9352 9355 }
9353 9356 break;
9354 9357 default:
9355 9358 ASSERT(FALSE);
9356 9359 break;
9357 9360 }
9358 9361
9359 9362 rfs4_update_lock_sequence(lsp);
9360 9363 rfs4_update_lease(lsp->rls_locker->rl_client);
9361 9364
9362 9365 /*
9363 9366 * NFS4 only allows locking on regular files, so
9364 9367 * verify type of object.
9365 9368 */
9366 9369 if (cs->vp->v_type != VREG) {
9367 9370 if (cs->vp->v_type == VDIR)
9368 9371 status = NFS4ERR_ISDIR;
9369 9372 else
9370 9373 status = NFS4ERR_INVAL;
9371 9374 goto out;
9372 9375 }
9373 9376
9374 9377 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9375 9378 status = NFS4ERR_GRACE;
9376 9379 goto out;
9377 9380 }
9378 9381
9379 9382 status = rfs4_do_lock(lsp, args->locktype,
9380 9383 args->offset, args->length, cs->cr, resop);
9381 9384
9382 9385 out:
9383 9386 *cs->statusp = resp->status = status;
9384 9387
9385 9388 if (status == NFS4_OK)
9386 9389 resp->lock_stateid = lsp->rls_lockid.stateid;
9387 9390
9388 9391 rfs4_update_lock_resp(lsp, resop);
9389 9392
9390 9393 end:
9391 9394 rfs4_sw_exit(&lsp->rls_sw);
9392 9395 rfs4_lo_state_rele(lsp, TRUE);
9393 9396
9394 9397 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9395 9398 LOCKU4res *, resp);
9396 9399 }
9397 9400
9398 9401 /*
9399 9402 * LOCKT is a best effort routine, the client can not be guaranteed that
9400 9403 * the status return is still in effect by the time the reply is received.
9401 9404 * They are numerous race conditions in this routine, but we are not required
9402 9405 * and can not be accurate.
9403 9406 */
9404 9407 /*ARGSUSED*/
9405 9408 void
9406 9409 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9407 9410 struct svc_req *req, struct compound_state *cs)
9408 9411 {
9409 9412 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9410 9413 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9411 9414 rfs4_lockowner_t *lo;
9412 9415 rfs4_client_t *cp;
9413 9416 bool_t create = FALSE;
9414 9417 struct flock64 flk;
9415 9418 int error;
9416 9419 int flag = FREAD | FWRITE;
9417 9420 int ltype;
9418 9421 length4 posix_length;
9419 9422 sysid_t sysid;
9420 9423 pid_t pid;
9421 9424
9422 9425 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9423 9426 LOCKT4args *, args);
9424 9427
9425 9428 if (cs->vp == NULL) {
9426 9429 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9427 9430 goto out;
9428 9431 }
9429 9432
9430 9433 /*
9431 9434 * NFS4 only allows locking on regular files, so
9432 9435 * verify type of object.
9433 9436 */
9434 9437 if (cs->vp->v_type != VREG) {
9435 9438 if (cs->vp->v_type == VDIR)
9436 9439 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9437 9440 else
9438 9441 *cs->statusp = resp->status = NFS4ERR_INVAL;
9439 9442 goto out;
9440 9443 }
9441 9444
9442 9445 /*
9443 9446 * Check out the clientid to ensure the server knows about it
9444 9447 * so that we correctly inform the client of a server reboot.
9445 9448 */
9446 9449 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9447 9450 == NULL) {
9448 9451 *cs->statusp = resp->status =
9449 9452 rfs4_check_clientid(&args->owner.clientid, 0);
9450 9453 goto out;
9451 9454 }
9452 9455 if (rfs4_lease_expired(cp)) {
9453 9456 rfs4_client_close(cp);
9454 9457 /*
9455 9458 * Protocol doesn't allow returning NFS4ERR_STALE as
9456 9459 * other operations do on this check so STALE_CLIENTID
9457 9460 * is returned instead
9458 9461 */
9459 9462 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9460 9463 goto out;
9461 9464 }
9462 9465
9463 9466 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9464 9467 *cs->statusp = resp->status = NFS4ERR_GRACE;
9465 9468 rfs4_client_rele(cp);
9466 9469 goto out;
9467 9470 }
9468 9471 rfs4_client_rele(cp);
9469 9472
9470 9473 resp->status = NFS4_OK;
9471 9474
9472 9475 switch (args->locktype) {
9473 9476 case READ_LT:
9474 9477 case READW_LT:
9475 9478 ltype = F_RDLCK;
9476 9479 break;
9477 9480 case WRITE_LT:
9478 9481 case WRITEW_LT:
9479 9482 ltype = F_WRLCK;
9480 9483 break;
9481 9484 }
9482 9485
9483 9486 posix_length = args->length;
9484 9487 /* Check for zero length. To lock to end of file use all ones for V4 */
9485 9488 if (posix_length == 0) {
9486 9489 *cs->statusp = resp->status = NFS4ERR_INVAL;
9487 9490 goto out;
9488 9491 } else if (posix_length == (length4)(~0)) {
9489 9492 posix_length = 0; /* Posix to end of file */
9490 9493 }
9491 9494
9492 9495 /* Find or create a lockowner */
9493 9496 lo = rfs4_findlockowner(&args->owner, &create);
9494 9497
9495 9498 if (lo) {
9496 9499 pid = lo->rl_pid;
9497 9500 if ((resp->status =
9498 9501 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9499 9502 goto err;
9500 9503 } else {
9501 9504 pid = 0;
9502 9505 sysid = lockt_sysid;
9503 9506 }
9504 9507 retry:
9505 9508 flk.l_type = ltype;
9506 9509 flk.l_whence = 0; /* SEEK_SET */
9507 9510 flk.l_start = args->offset;
9508 9511 flk.l_len = posix_length;
9509 9512 flk.l_sysid = sysid;
9510 9513 flk.l_pid = pid;
9511 9514 flag |= F_REMOTELOCK;
9512 9515
9513 9516 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9514 9517
9515 9518 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9516 9519 if (flk.l_len < 0 || flk.l_start < 0) {
9517 9520 resp->status = NFS4ERR_INVAL;
9518 9521 goto err;
9519 9522 }
9520 9523 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9521 9524 NULL, cs->cr, NULL);
9522 9525
9523 9526 /*
9524 9527 * N.B. We map error values to nfsv4 errors. This is differrent
9525 9528 * than puterrno4 routine.
9526 9529 */
9527 9530 switch (error) {
9528 9531 case 0:
9529 9532 if (flk.l_type == F_UNLCK)
9530 9533 resp->status = NFS4_OK;
9531 9534 else {
9532 9535 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9533 9536 goto retry;
9534 9537 resp->status = NFS4ERR_DENIED;
9535 9538 }
9536 9539 break;
9537 9540 case EOVERFLOW:
9538 9541 resp->status = NFS4ERR_INVAL;
9539 9542 break;
9540 9543 case EINVAL:
9541 9544 resp->status = NFS4ERR_NOTSUPP;
9542 9545 break;
9543 9546 default:
9544 9547 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9545 9548 error);
9546 9549 resp->status = NFS4ERR_SERVERFAULT;
9547 9550 break;
9548 9551 }
9549 9552
9550 9553 err:
9551 9554 if (lo)
9552 9555 rfs4_lockowner_rele(lo);
9553 9556 *cs->statusp = resp->status;
9554 9557 out:
9555 9558 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9556 9559 LOCKT4res *, resp);
9557 9560 }
9558 9561
9559 9562 int
9560 9563 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9561 9564 {
9562 9565 int err;
9563 9566 int cmd;
9564 9567 vnode_t *vp;
9565 9568 struct shrlock shr;
9566 9569 struct shr_locowner shr_loco;
9567 9570 int fflags = 0;
9568 9571
9569 9572 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9570 9573 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9571 9574
9572 9575 if (sp->rs_closed)
9573 9576 return (NFS4ERR_OLD_STATEID);
9574 9577
9575 9578 vp = sp->rs_finfo->rf_vp;
9576 9579 ASSERT(vp);
9577 9580
9578 9581 shr.s_access = shr.s_deny = 0;
9579 9582
9580 9583 if (access & OPEN4_SHARE_ACCESS_READ) {
9581 9584 fflags |= FREAD;
9582 9585 shr.s_access |= F_RDACC;
9583 9586 }
9584 9587 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9585 9588 fflags |= FWRITE;
9586 9589 shr.s_access |= F_WRACC;
9587 9590 }
9588 9591 ASSERT(shr.s_access);
9589 9592
9590 9593 if (deny & OPEN4_SHARE_DENY_READ)
9591 9594 shr.s_deny |= F_RDDNY;
9592 9595 if (deny & OPEN4_SHARE_DENY_WRITE)
9593 9596 shr.s_deny |= F_WRDNY;
9594 9597
9595 9598 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9596 9599 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9597 9600 shr_loco.sl_pid = shr.s_pid;
9598 9601 shr_loco.sl_id = shr.s_sysid;
9599 9602 shr.s_owner = (caddr_t)&shr_loco;
9600 9603 shr.s_own_len = sizeof (shr_loco);
9601 9604
9602 9605 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9603 9606
9604 9607 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9605 9608 if (err != 0) {
9606 9609 if (err == EAGAIN)
9607 9610 err = NFS4ERR_SHARE_DENIED;
9608 9611 else
9609 9612 err = puterrno4(err);
9610 9613 return (err);
9611 9614 }
9612 9615
9613 9616 sp->rs_share_access |= access;
9614 9617 sp->rs_share_deny |= deny;
9615 9618
9616 9619 return (0);
9617 9620 }
9618 9621
9619 9622 int
9620 9623 rfs4_unshare(rfs4_state_t *sp)
9621 9624 {
9622 9625 int err;
9623 9626 struct shrlock shr;
9624 9627 struct shr_locowner shr_loco;
9625 9628
9626 9629 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9627 9630
9628 9631 if (sp->rs_closed || sp->rs_share_access == 0)
9629 9632 return (0);
9630 9633
9631 9634 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9632 9635 ASSERT(sp->rs_finfo->rf_vp);
9633 9636
9634 9637 shr.s_access = shr.s_deny = 0;
9635 9638 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9636 9639 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9637 9640 shr_loco.sl_pid = shr.s_pid;
9638 9641 shr_loco.sl_id = shr.s_sysid;
9639 9642 shr.s_owner = (caddr_t)&shr_loco;
9640 9643 shr.s_own_len = sizeof (shr_loco);
9641 9644
9642 9645 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9643 9646 NULL);
9644 9647 if (err != 0) {
9645 9648 err = puterrno4(err);
9646 9649 return (err);
9647 9650 }
9648 9651
9649 9652 sp->rs_share_access = 0;
9650 9653 sp->rs_share_deny = 0;
9651 9654
9652 9655 return (0);
9653 9656
9654 9657 }
9655 9658
9656 9659 static int
9657 9660 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9658 9661 {
9659 9662 struct clist *wcl;
9660 9663 count4 count = rok->data_len;
9661 9664 int wlist_len;
9662 9665
9663 9666 wcl = args->wlist;
9664 9667 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9665 9668 return (FALSE);
9666 9669 }
9667 9670 wcl = args->wlist;
9668 9671 rok->wlist_len = wlist_len;
9669 9672 rok->wlist = wcl;
9670 9673 return (TRUE);
9671 9674 }
9672 9675
9673 9676 /* tunable to disable server referrals */
9674 9677 int rfs4_no_referrals = 0;
9675 9678
9676 9679 /*
9677 9680 * Find an NFS record in reparse point data.
9678 9681 * Returns 0 for success and <0 or an errno value on failure.
9679 9682 */
9680 9683 int
9681 9684 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9682 9685 {
9683 9686 int err;
9684 9687 char *stype, *val;
9685 9688 nvlist_t *nvl;
9686 9689 nvpair_t *curr;
9687 9690
9688 9691 if ((nvl = reparse_init()) == NULL)
9689 9692 return (-1);
9690 9693
9691 9694 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9692 9695 reparse_free(nvl);
9693 9696 return (err);
9694 9697 }
9695 9698
9696 9699 curr = NULL;
9697 9700 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9698 9701 if ((stype = nvpair_name(curr)) == NULL) {
9699 9702 reparse_free(nvl);
9700 9703 return (-2);
9701 9704 }
9702 9705 if (strncasecmp(stype, "NFS", 3) == 0)
9703 9706 break;
9704 9707 }
9705 9708
9706 9709 if ((curr == NULL) ||
9707 9710 (nvpair_value_string(curr, &val))) {
9708 9711 reparse_free(nvl);
9709 9712 return (-3);
9710 9713 }
9711 9714 *nvlp = nvl;
9712 9715 *svcp = stype;
9713 9716 *datap = val;
9714 9717 return (0);
9715 9718 }
9716 9719
9717 9720 int
9718 9721 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9719 9722 {
9720 9723 nvlist_t *nvl;
9721 9724 char *s, *d;
9722 9725
9723 9726 if (rfs4_no_referrals != 0)
9724 9727 return (B_FALSE);
9725 9728
9726 9729 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9727 9730 return (B_FALSE);
9728 9731
9729 9732 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9730 9733 return (B_FALSE);
9731 9734
9732 9735 reparse_free(nvl);
9733 9736
9734 9737 return (B_TRUE);
9735 9738 }
9736 9739
9737 9740 /*
9738 9741 * There is a user-level copy of this routine in ref_subr.c.
9739 9742 * Changes should be kept in sync.
9740 9743 */
9741 9744 static int
9742 9745 nfs4_create_components(char *path, component4 *comp4)
9743 9746 {
9744 9747 int slen, plen, ncomp;
9745 9748 char *ori_path, *nxtc, buf[MAXNAMELEN];
9746 9749
9747 9750 if (path == NULL)
9748 9751 return (0);
9749 9752
9750 9753 plen = strlen(path) + 1; /* include the terminator */
9751 9754 ori_path = path;
9752 9755 ncomp = 0;
9753 9756
9754 9757 /* count number of components in the path */
9755 9758 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9756 9759 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9757 9760 if ((slen = nxtc - path) == 0) {
9758 9761 path = nxtc + 1;
9759 9762 continue;
9760 9763 }
9761 9764
9762 9765 if (comp4 != NULL) {
9763 9766 bcopy(path, buf, slen);
9764 9767 buf[slen] = '\0';
9765 9768 (void) str_to_utf8(buf, &comp4[ncomp]);
9766 9769 }
9767 9770
9768 9771 ncomp++; /* 1 valid component */
9769 9772 path = nxtc + 1;
9770 9773 }
9771 9774 if (*nxtc == '\0' || *nxtc == '\n')
9772 9775 break;
9773 9776 }
9774 9777
9775 9778 return (ncomp);
9776 9779 }
9777 9780
9778 9781 /*
9779 9782 * There is a user-level copy of this routine in ref_subr.c.
9780 9783 * Changes should be kept in sync.
9781 9784 */
9782 9785 static int
9783 9786 make_pathname4(char *path, pathname4 *pathname)
9784 9787 {
9785 9788 int ncomp;
9786 9789 component4 *comp4;
9787 9790
9788 9791 if (pathname == NULL)
9789 9792 return (0);
9790 9793
9791 9794 if (path == NULL) {
9792 9795 pathname->pathname4_val = NULL;
9793 9796 pathname->pathname4_len = 0;
9794 9797 return (0);
9795 9798 }
9796 9799
9797 9800 /* count number of components to alloc buffer */
9798 9801 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9799 9802 pathname->pathname4_val = NULL;
9800 9803 pathname->pathname4_len = 0;
9801 9804 return (0);
9802 9805 }
9803 9806 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9804 9807
9805 9808 /* copy components into allocated buffer */
9806 9809 ncomp = nfs4_create_components(path, comp4);
9807 9810
9808 9811 pathname->pathname4_val = comp4;
9809 9812 pathname->pathname4_len = ncomp;
9810 9813
9811 9814 return (ncomp);
9812 9815 }
9813 9816
9814 9817 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9815 9818
9816 9819 fs_locations4 *
9817 9820 fetch_referral(vnode_t *vp, cred_t *cr)
9818 9821 {
9819 9822 nvlist_t *nvl;
9820 9823 char *stype, *sdata;
9821 9824 fs_locations4 *result;
9822 9825 char buf[1024];
9823 9826 size_t bufsize;
9824 9827 XDR xdr;
9825 9828 int err;
9826 9829
9827 9830 /*
9828 9831 * Check attrs to ensure it's a reparse point
9829 9832 */
9830 9833 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9831 9834 return (NULL);
9832 9835
9833 9836 /*
9834 9837 * Look for an NFS record and get the type and data
9835 9838 */
9836 9839 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9837 9840 return (NULL);
9838 9841
9839 9842 /*
9840 9843 * With the type and data, upcall to get the referral
9841 9844 */
9842 9845 bufsize = sizeof (buf);
9843 9846 bzero(buf, sizeof (buf));
9844 9847 err = reparse_kderef((const char *)stype, (const char *)sdata,
9845 9848 buf, &bufsize);
9846 9849 reparse_free(nvl);
9847 9850
9848 9851 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9849 9852 char *, stype, char *, sdata, char *, buf, int, err);
9850 9853 if (err) {
9851 9854 cmn_err(CE_NOTE,
9852 9855 "reparsed daemon not running: unable to get referral (%d)",
9853 9856 err);
9854 9857 return (NULL);
9855 9858 }
9856 9859
9857 9860 /*
9858 9861 * We get an XDR'ed record back from the kderef call
9859 9862 */
9860 9863 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9861 9864 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9862 9865 err = xdr_fs_locations4(&xdr, result);
9863 9866 XDR_DESTROY(&xdr);
9864 9867 if (err != TRUE) {
9865 9868 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9866 9869 int, err);
9867 9870 return (NULL);
9868 9871 }
9869 9872
9870 9873 /*
9871 9874 * Look at path to recover fs_root, ignoring the leading '/'
9872 9875 */
9873 9876 (void) make_pathname4(vp->v_path, &result->fs_root);
9874 9877
9875 9878 return (result);
9876 9879 }
9877 9880
9878 9881 char *
9879 9882 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9880 9883 {
9881 9884 fs_locations4 *fsl;
9882 9885 fs_location4 *fs;
9883 9886 char *server, *path, *symbuf;
9884 9887 static char *prefix = "/net/";
9885 9888 int i, size, npaths;
9886 9889 uint_t len;
9887 9890
9888 9891 /* Get the referral */
9889 9892 if ((fsl = fetch_referral(vp, cr)) == NULL)
9890 9893 return (NULL);
9891 9894
9892 9895 /* Deal with only the first location and first server */
9893 9896 fs = &fsl->locations_val[0];
9894 9897 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9895 9898 if (server == NULL) {
9896 9899 rfs4_free_fs_locations4(fsl);
9897 9900 kmem_free(fsl, sizeof (fs_locations4));
9898 9901 return (NULL);
9899 9902 }
9900 9903
9901 9904 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9902 9905 size = strlen(prefix) + len;
9903 9906 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9904 9907 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9905 9908
9906 9909 /* Allocate the symlink buffer and fill it */
9907 9910 symbuf = kmem_zalloc(size, KM_SLEEP);
9908 9911 (void) strcat(symbuf, prefix);
9909 9912 (void) strcat(symbuf, server);
9910 9913 kmem_free(server, len);
9911 9914
9912 9915 npaths = 0;
9913 9916 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9914 9917 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9915 9918 if (path == NULL)
9916 9919 continue;
9917 9920 (void) strcat(symbuf, "/");
9918 9921 (void) strcat(symbuf, path);
9919 9922 npaths++;
9920 9923 kmem_free(path, len);
9921 9924 }
9922 9925
9923 9926 rfs4_free_fs_locations4(fsl);
9924 9927 kmem_free(fsl, sizeof (fs_locations4));
9925 9928
9926 9929 if (strsz != NULL)
9927 9930 *strsz = size;
9928 9931 return (symbuf);
9929 9932 }
9930 9933
9931 9934 /*
9932 9935 * Check to see if we have a downrev Solaris client, so that we
9933 9936 * can send it a symlink instead of a referral.
9934 9937 */
9935 9938 int
9936 9939 client_is_downrev(struct svc_req *req)
9937 9940 {
9938 9941 struct sockaddr *ca;
9939 9942 rfs4_clntip_t *ci;
9940 9943 bool_t create = FALSE;
9941 9944 int is_downrev;
9942 9945
9943 9946 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9944 9947 ASSERT(ca);
9945 9948 ci = rfs4_find_clntip(ca, &create);
9946 9949 if (ci == NULL)
9947 9950 return (0);
9948 9951 is_downrev = ci->ri_no_referrals;
9949 9952 rfs4_dbe_rele(ci->ri_dbe);
9950 9953 return (is_downrev);
9951 9954 }
9952 9955
9953 9956 /*
9954 9957 * Do the main work of handling HA-NFSv4 Resource Group failover on
9955 9958 * Sun Cluster.
9956 9959 * We need to detect whether any RG admin paths have been added or removed,
9957 9960 * and adjust resources accordingly.
9958 9961 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
9959 9962 * order to scale, the list and array of paths need to be held in more
9960 9963 * suitable data structures.
9961 9964 */
9962 9965 static void
9963 9966 hanfsv4_failover(nfs4_srv_t *nsrv4)
9964 9967 {
9965 9968 int i, start_grace, numadded_paths = 0;
9966 9969 char **added_paths = NULL;
9967 9970 rfs4_dss_path_t *dss_path;
9968 9971
9969 9972 /*
9970 9973 * Note: currently, dss_pathlist cannot be NULL, since
9971 9974 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
9972 9975 * make the latter dynamically specified too, the following will
9973 9976 * need to be adjusted.
9974 9977 */
9975 9978
9976 9979 /*
9977 9980 * First, look for removed paths: RGs that have been failed-over
9978 9981 * away from this node.
9979 9982 * Walk the "currently-serving" dss_pathlist and, for each
9980 9983 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
9981 9984 * from nfsd. If not, that RG path has been removed.
9982 9985 *
9983 9986 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
9984 9987 * any duplicates.
9985 9988 */
9986 9989 dss_path = nsrv4->dss_pathlist;
9987 9990 do {
9988 9991 int found = 0;
9989 9992 char *path = dss_path->path;
9990 9993
9991 9994 /* used only for non-HA so may not be removed */
9992 9995 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
9993 9996 dss_path = dss_path->next;
9994 9997 continue;
9995 9998 }
9996 9999
9997 10000 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
9998 10001 int cmpret;
9999 10002 char *newpath = rfs4_dss_newpaths[i];
10000 10003
10001 10004 /*
10002 10005 * Since nfsd has sorted rfs4_dss_newpaths for us,
10003 10006 * once the return from strcmp is negative we know
10004 10007 * we've passed the point where "path" should be,
10005 10008 * and can stop searching: "path" has been removed.
10006 10009 */
10007 10010 cmpret = strcmp(path, newpath);
10008 10011 if (cmpret < 0)
10009 10012 break;
10010 10013 if (cmpret == 0) {
10011 10014 found = 1;
10012 10015 break;
10013 10016 }
10014 10017 }
10015 10018
10016 10019 if (found == 0) {
10017 10020 unsigned index = dss_path->index;
10018 10021 rfs4_servinst_t *sip = dss_path->sip;
10019 10022 rfs4_dss_path_t *path_next = dss_path->next;
10020 10023
10021 10024 /*
10022 10025 * This path has been removed.
10023 10026 * We must clear out the servinst reference to
10024 10027 * it, since it's now owned by another
10025 10028 * node: we should not attempt to touch it.
10026 10029 */
10027 10030 ASSERT(dss_path == sip->dss_paths[index]);
10028 10031 sip->dss_paths[index] = NULL;
10029 10032
10030 10033 /* remove from "currently-serving" list, and destroy */
10031 10034 remque(dss_path);
10032 10035 /* allow for NUL */
10033 10036 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10034 10037 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10035 10038
10036 10039 dss_path = path_next;
10037 10040 } else {
10038 10041 /* path was found; not removed */
10039 10042 dss_path = dss_path->next;
10040 10043 }
10041 10044 } while (dss_path != nsrv4->dss_pathlist);
10042 10045
10043 10046 /*
10044 10047 * Now, look for added paths: RGs that have been failed-over
10045 10048 * to this node.
10046 10049 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10047 10050 * for each path, check if it is on the "currently-serving"
10048 10051 * dss_pathlist. If not, that RG path has been added.
10049 10052 *
10050 10053 * Note: we don't do duplicate detection here; nfsd does that for us.
10051 10054 *
10052 10055 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10053 10056 * an upper bound for the size needed for added_paths[numadded_paths].
10054 10057 */
10055 10058
10056 10059 /* probably more space than we need, but guaranteed to be enough */
10057 10060 if (rfs4_dss_numnewpaths > 0) {
10058 10061 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10059 10062 added_paths = kmem_zalloc(sz, KM_SLEEP);
10060 10063 }
10061 10064
10062 10065 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10063 10066 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10064 10067 int found = 0;
10065 10068 char *newpath = rfs4_dss_newpaths[i];
10066 10069
10067 10070 dss_path = nsrv4->dss_pathlist;
10068 10071 do {
10069 10072 char *path = dss_path->path;
10070 10073
10071 10074 /* used only for non-HA */
10072 10075 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10073 10076 dss_path = dss_path->next;
10074 10077 continue;
10075 10078 }
10076 10079
10077 10080 if (strncmp(path, newpath, strlen(path)) == 0) {
10078 10081 found = 1;
10079 10082 break;
10080 10083 }
10081 10084
10082 10085 dss_path = dss_path->next;
10083 10086 } while (dss_path != nsrv4->dss_pathlist);
10084 10087
10085 10088 if (found == 0) {
10086 10089 added_paths[numadded_paths] = newpath;
10087 10090 numadded_paths++;
10088 10091 }
10089 10092 }
10090 10093
10091 10094 /* did we find any added paths? */
10092 10095 if (numadded_paths > 0) {
10093 10096
10094 10097 /* create a new server instance, and start its grace period */
10095 10098 start_grace = 1;
10096 10099 /* CSTYLED */
10097 10100 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10098 10101
10099 10102 /* read in the stable storage state from these paths */
10100 10103 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10101 10104
10102 10105 /*
10103 10106 * Multiple failovers during a grace period will cause
10104 10107 * clients of the same resource group to be partitioned
10105 10108 * into different server instances, with different
10106 10109 * grace periods. Since clients of the same resource
10107 10110 * group must be subject to the same grace period,
10108 10111 * we need to reset all currently active grace periods.
10109 10112 */
10110 10113 rfs4_grace_reset_all(nsrv4);
10111 10114 }
10112 10115
10113 10116 if (rfs4_dss_numnewpaths > 0)
10114 10117 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10115 10118 }
|
↓ open down ↓ |
4182 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX